# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient

str_conn='mongodb://localhost:27017'
cursor = MongoClient(str_conn)

cursor.list_database_names() 

db=cursor.companes.companies


### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [2]:

filtro={'name': True, '_id':False}

list(db.find({'name': 'Babelgum'},filtro))


[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [3]:
list(db.find().limit(1))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
  'name': 'Wetpaint',
  'permalink': 'abc2',
  'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
  'homepage_url': 'http://wetpaint-inc.com',
  'blog_url': 'http://digitalquarters.net/',
  'blog_feed_url': 'http://digitalquarters.net/feed/',
  'twitter_username': 'BachelrWetpaint',
  'category_code': 'web',
  'number_of_employees': 47,
  'founded_year': 2005,
  'founded_month': 10,
  'founded_day': 17,
  'deadpooled_year': 1,
  'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
  'alias_list': '',
  'email_address': 'info@wetpaint.com',
  'phone_number': '206.859.6300',
  'description': 'Technology Platform Company',
  'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
  'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
  'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and mon

In [4]:
list(db.find({'number_of_employees': {'$gt':5000}}).sort('number_of_employees').limit(1))

[{'_id': ObjectId('52cdef7c4bab8bd675297e49'),
  'name': 'Nintendo',
  'permalink': 'nintendo',
  'crunchbase_url': 'http://www.crunchbase.com/company/nintendo',
  'homepage_url': 'http://nintendo.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'NintendoAmerica',
  'category_code': 'games_video',
  'number_of_employees': 5080,
  'founded_year': 1889,
  'founded_month': 9,
  'founded_day': 23,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'games, console',
  'alias_list': '',
  'email_address': 'noalegal@noa.nintendo.com',
  'phone_number': '1-800-255-3700 ',
  'description': '',
  'created_at': 'Sat Dec 22 13:27:00 UTC 2007',
  'updated_at': 'Thu Sep 05 12:41:16 UTC 2013',
  'overview': '<p>Nintendo, a technology company widely known for its line of game consoles, was actually founded in 1889 by Fusajiro Yamauchi. It began as a card game company and evolved into one of the largest Japanese c

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [5]:
query={'$and':[{'founded_year': {'$lte': 2005}}, 
                         {'founded_year': {'$gte': 2000}}]}

filtro={'name': True, '_id':False,'founded_year':True}

list(db.find(query,filtro).limit(1))

[{'name': 'Wetpaint', 'founded_year': 2005}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [6]:
query={'$and': [{'ipo.valuation_amount' : {'$gt':100000000}},
                {'founded_year': {'$lte': 2010 }}]}

filtro={'name': True, '_id':False,'founded_year':True}

list(db.find(query,filtro).limit(1))

[{'name': 'Facebook', 'founded_year': 2004}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [7]:
query={'$and':[{'number_of_employees':{'$lt':1000}},{'founded_year':{'$lt':2005}}]}

list(db.find(query).sort('number_of_employees').limit(1))

[{'_id': ObjectId('52cdef7c4bab8bd675297d93'),
  'name': 'Fox Interactive Media',
  'permalink': 'fox-interactive-media',
  'crunchbase_url': 'http://www.crunchbase.com/company/fox-interactive-media',
  'homepage_url': 'http://www.newscorp.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'twitterapi',
  'category_code': 'web',
  'number_of_employees': 0,
  'founded_year': 1979,
  'founded_month': 6,
  'founded_day': 1,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': '',
  'tag_list': '',
  'alias_list': None,
  'email_address': '',
  'phone_number': '',
  'description': '',
  'created_at': 'Thu May 31 21:46:57 UTC 2007',
  'updated_at': 'Mon Aug 19 17:13:27 UTC 2013',
  'overview': '<p>Fox Interactive Media (FIM) oversees <a href="http://www.crunchbase.com/company/newscorporation" title="News Corporation">News Corporation</a>&#8217;s Internet business operations.</p>',
  'image': {'available_sizes': [[[150, 71],
 

### 6. All the companies that don't include the `partners` field.

In [8]:
query={'companies': {'$ne': 'partners'}}

list(db.find(query).limit(1))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
  'name': 'Wetpaint',
  'permalink': 'abc2',
  'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
  'homepage_url': 'http://wetpaint-inc.com',
  'blog_url': 'http://digitalquarters.net/',
  'blog_feed_url': 'http://digitalquarters.net/feed/',
  'twitter_username': 'BachelrWetpaint',
  'category_code': 'web',
  'number_of_employees': 47,
  'founded_year': 2005,
  'founded_month': 10,
  'founded_day': 17,
  'deadpooled_year': 1,
  'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
  'alias_list': '',
  'email_address': 'info@wetpaint.com',
  'phone_number': '206.859.6300',
  'description': 'Technology Platform Company',
  'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
  'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
  'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and mon

### 7. All the companies that have a null type of value on the `category_code` field.

In [9]:
query={'category_code':{'$type':'null'}}

filtro={'name': True, '_id':False,'category_code':True}

list(db.find(query,filtro).limit(1))

[{'name': 'Collective', 'category_code': None}]

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [10]:
query={'$and':[{'number_of_employees':{'$gte':100}}, {'number_of_employees':{'$lt':1000}}]}

filtro={'name': True, '_id':False,'number_of_employees':True}

list(db.find(query,filtro).limit(1))



[{'name': 'AdventNet', 'number_of_employees': 600}]

### 9. Order all the companies by their IPO price in a descending order.

In [11]:


list(db.find().sort('ipo.valuation_amount',-1).limit(1))

[{'_id': ObjectId('52cdef7e4bab8bd67529a8b4'),
  'name': 'GREE',
  'permalink': 'gree',
  'crunchbase_url': 'http://www.crunchbase.com/company/gree',
  'homepage_url': 'http://www.gree-corp.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'gree_corp',
  'category_code': 'games_video',
  'number_of_employees': 700,
  'founded_year': 2004,
  'founded_month': 12,
  'founded_day': 7,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'mobile-web, japan, tokyo, social-network, mobile-social-network, mobile-games',
  'alias_list': None,
  'email_address': 'inquiry@gree-corp.com',
  'phone_number': '',
  'description': 'Internet media business,SNS,  free game',
  'created_at': 'Sat Dec 20 16:42:57 UTC 2008',
  'updated_at': 'Tue Jan 01 21:37:04 UTC 2013',
  'overview': '<p>GREE provides Japan&#8217;s leading mobile social network, and is at the forefront of mobile technology. GREE was ranked as Japan&#82

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [12]:
list(db.find().sort('number_of_employees',-1).limit(10))

[{'_id': ObjectId('52cdef7d4bab8bd67529941a'),
  'name': 'Siemens',
  'permalink': 'siemens',
  'crunchbase_url': 'http://www.crunchbase.com/company/siemens',
  'homepage_url': 'http://www.siemens.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'Siemens',
  'category_code': 'hardware',
  'number_of_employees': 405000,
  'founded_year': 1847,
  'founded_month': None,
  'founded_day': None,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'automation, building-technologies, drive-technology, energy',
  'alias_list': '',
  'email_address': 'contact@siemens.com',
  'phone_number': '49 89 636 34134',
  'description': 'Electronics and Electrical Engineering',
  'created_at': 'Thu Jul 31 09:29:43 UTC 2008',
  'updated_at': 'Thu Nov 28 20:32:55 UTC 2013',
  'overview': '<p>Siemens AG, an electronics and electrical engineering company, operates in the industry, energy, and healthcare sectors worldwide. 

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [13]:
query={'founded_month':{'$gte':7}}
#Son 1000 pero lo limito a 1
list(db.find(query).limit(1))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
  'name': 'Wetpaint',
  'permalink': 'abc2',
  'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
  'homepage_url': 'http://wetpaint-inc.com',
  'blog_url': 'http://digitalquarters.net/',
  'blog_feed_url': 'http://digitalquarters.net/feed/',
  'twitter_username': 'BachelrWetpaint',
  'category_code': 'web',
  'number_of_employees': 47,
  'founded_year': 2005,
  'founded_month': 10,
  'founded_day': 17,
  'deadpooled_year': 1,
  'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
  'alias_list': '',
  'email_address': 'info@wetpaint.com',
  'phone_number': '206.859.6300',
  'description': 'Technology Platform Company',
  'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
  'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
  'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and mon

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [14]:
query={'$and':[{'founded_year':{'$lt':2000}},{'acquisition.price_amount':{'$gt':10000}}]}

query2={'acquisition.price_amount':{'$gt':10000}}

list(db.find(query).limit(1))

[{'_id': ObjectId('52cdef7c4bab8bd675297d90'),
  'name': 'Postini',
  'permalink': 'postini',
  'crunchbase_url': 'http://www.crunchbase.com/company/postini',
  'homepage_url': 'http://postini.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': None,
  'category_code': 'web',
  'number_of_employees': None,
  'founded_year': 1999,
  'founded_month': 6,
  'founded_day': 2,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': '',
  'alias_list': None,
  'email_address': '',
  'phone_number': '888.584.3150',
  'description': None,
  'created_at': 'Fri Jun 08 12:19:51 UTC 2007',
  'updated_at': 'Sat Aug 13 18:02:34 UTC 2011',
  'overview': '<p>Postini focuses on two main issues: security and compliance. Postini states that it handles more than 1 billion messages everyday and protects more than 35,000 businesses worldwide.</p>\n\n<p>Postini offers solutions that protect your company from malicious internet a

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [15]:
query={'acquisition.acquired_year':{'$gt':2010}}

filter={'name':True, 'acquisition':True, '_id':False}

list(db.find(query,filter).limit(1))

[{'name': 'Wetpaint',
  'acquisition': {'price_amount': 30000000,
   'price_currency_code': 'USD',
   'term_code': 'cash_and_stock',
   'source_url': 'http://allthingsd.com/20131216/viggle-tries-to-bulk-up-its-social-tv-business-by-buying-wetpaint/?mod=atdtweet',
   'source_description': ' Viggle Tries to Bulk Up Its Social TV Business by Buying Wetpaint',
   'acquired_year': 2013,
   'acquired_month': 12,
   'acquired_day': 16,
   'acquiring_company': {'name': 'Viggle', 'permalink': 'viggle'}}}]

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [21]:
filter1 = {'_id': False, 'name' : True, 'founded_year' : True}


In [26]:
list(db.find(None ,filter1).sort('founded_year',-1).limit(1))

[{'name': 'Fixya', 'founded_year': 2013}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [25]:
query={'founded_day':{'$lte':7}}

filter={'name':True,'acquisition.price_amount':True }

list(db.find(query,filter).sort('acquisition.price_amount',-1).limit(1))

[{'_id': ObjectId('52cdef7d4bab8bd6752989a1'),
  'name': 'Netscape',
  'acquisition': {'price_amount': 4200000000}}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [28]:
query={'$and':[{'category_code':'web'},{'number_of_employees':{'$gt':4000}}]}

filter={'name':True,'number_of_employees':True, '_id':False }

list(db.find(query,filter).sort('number_of_employees'))

[{'name': 'Expedia', 'number_of_employees': 4400},
 {'name': 'AOL', 'number_of_employees': 8000},
 {'name': 'Webkinz', 'number_of_employees': 8657},
 {'name': 'Rakuten', 'number_of_employees': 10000},
 {'name': 'Los Angeles Times Media Group', 'number_of_employees': 10000},
 {'name': 'Groupon', 'number_of_employees': 10000},
 {'name': 'Yahoo!', 'number_of_employees': 13600},
 {'name': 'eBay', 'number_of_employees': 15000},
 {'name': 'Experian', 'number_of_employees': 15500}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [30]:
query={'$and':[{'acquisition.price_amount':{'$gt':1e7}},{'acquisition.price_currency_code':'EUR'}]}

filter={'name':True,'acquisition.price_amount':True, '_id':False, 'acquisition.price_currency_code':True }

list(db.find(query,filter))

[{'name': 'ZYB',
  'acquisition': {'price_amount': 31500000, 'price_currency_code': 'EUR'}},
 {'name': 'Apertio',
  'acquisition': {'price_amount': 140000000, 'price_currency_code': 'EUR'}},
 {'name': 'Greenfield Online',
  'acquisition': {'price_amount': 40000000, 'price_currency_code': 'EUR'}},
 {'name': 'Webedia',
  'acquisition': {'price_amount': 70000000, 'price_currency_code': 'EUR'}},
 {'name': 'Wayfinder',
  'acquisition': {'price_amount': 24000000, 'price_currency_code': 'EUR'}},
 {'name': 'Tuenti Technologies',
  'acquisition': {'price_amount': 70000000, 'price_currency_code': 'EUR'}},
 {'name': 'BioMed Central',
  'acquisition': {'price_amount': 43400000, 'price_currency_code': 'EUR'}}]

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [33]:
query={'acquisition.acquired_month':{'$lt':4}}

filter={'name':True,'acquisition':True, '_id':False} 

list(db.find(query,filter).limit(1))






[{'name': 'Kyte',
  'acquisition': {'price_amount': None,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/',
   'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million',
   'acquired_year': 2011,
   'acquired_month': 1,
   'acquired_day': 31,
   'acquiring_company': {'name': 'KIT digital', 'permalink': 'kit-digital'}}}]

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [37]:
query={'$and':[{'founded_year':{'$gte':2000}},
               {'founded_year':{'$lte':2010}},
               {'acquisition.acquired_year':{'$gte':2011}}]}

filter={'name':True,'acquisition.acquired_year':True, '_id':False} 

list(db.find(query,filter).sort('name').limit(1))

[{'name': '1000 Markets', 'acquisition': {'acquired_year': 2012}}]

### 20. All the companies that have been 'deadpooled' after the third year.

In [68]:
query={'$subtract': [ "$deadpooles_year", "$founded_year" ] }

list(db.aggregate(query))

TypeError: pipeline must be a list

In [74]:
import pandas as pd
query={'$and': [{'deadpooled_year':{'$gt':3}},
                {'founded_year':{'$gte':1000}}]}

df=pd.DataFrame(list(db.find(query)))




In [62]:
df.columns
#'deadpooled_year'  ,  'founded_year'

df.shape

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)


In [71]:
df[df.deadpooled_year>df.founded_year+3].head()

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,founded_year,founded_month,founded_day,deadpooled_year,deadpooled_month,deadpooled_day,deadpooled_url,tag_list,alias_list,email_address,phone_number,description,created_at,updated_at,overview,image,products,relationships,competitions,providerships,total_money_raised,funding_rounds,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
1,52cdef7c4bab8bd675297da0,Babelgum,babelgum,http://www.crunchbase.com/company/babelgum,http://babelgum.com,http://babelgum.com/blog,http://feeds.feedburner.com/Babelgum,Babelgum,games_video,,2007,3.0,1.0,2013,1.0,9.0,,"iptv, web2ireland",,info@babelgum.com,,,Sat Jun 09 08:15:21 UTC 2007,Wed Oct 16 06:30:25 UTC 2013,<p>Babelgum is an integrated web and mobile vi...,"{'available_sizes': [[[135, 102], 'assets/imag...","[{'name': 'Babelgum', 'permalink': 'babelgum'}]","[{'is_past': False, 'title': 'CEO', 'person': ...","[{'competitor': {'name': 'Joost', 'permalink':...",[],$13.2M,"[{'id': 17, 'round_code': 'a', 'source_url': '...",[],,[],"[{'description': '', 'address1': '', 'address2...",[],,[],[],[],[]
3,52cdef7c4bab8bd675297dae,Thoof,thoof,http://www.crunchbase.com/company/thoof,http://thoof.com,,,,web,,2006,12.0,1.0,2013,8.0,4.0,,social-news,,,,,Sat Jun 16 05:05:50 UTC 2007,Wed Dec 18 13:06:44 UTC 2013,<p>Thoof is a late entry to the personalized/s...,"{'available_sizes': [[[150, 74], 'assets/image...","[{'name': 'Thoof', 'permalink': 'thoof'}]","[{'is_past': True, 'title': 'Founder and CEO',...","[{'competitor': {'name': 'Last.fm', 'permalink...",[],$1M,"[{'id': 35, 'round_code': 'seed', 'source_url'...",[],,[],"[{'description': None, 'address1': None, 'addr...",[],,[],[],[{'external_url': 'http://www.duraslide.com.sg...,[]
4,52cdef7c4bab8bd675297db6,Wesabe,wesabe,http://www.crunchbase.com/company/wesabe,http://wesabe.com,http://blog.wesabe.com/,http://blog.wesabe.com/feed/,jasonlong,web,,2005,12.0,1.0,2010,6.0,30.0,http://techcrunch.com/2010/06/30/wesabe-shuts-...,"moneymanagement, finance, money",,contact@wesabe.com,,,Wed Jun 20 10:59:01 UTC 2007,Fri Apr 15 01:40:13 UTC 2011,<p>JF11V8 You&#8217;ve hit the ball out the pa...,"{'available_sizes': [[[129, 133], 'assets/imag...","[{'name': 'Wesabe', 'permalink': 'wesabe'}]","[{'is_past': False, 'title': 'Co-Founder and C...","[{'competitor': {'name': 'Revolution Money', '...",[],$4.7M,"[{'id': 43, 'round_code': 'seed', 'source_url'...",[],,[],"[{'description': None, 'address1': '400 Montgo...",[],,"[{'embed_code': '<script src=""http://flash.rev...",[],[],[]
6,52cdef7c4bab8bd675297dbe,Stickam,stickam,http://www.crunchbase.com/company/stickam,http://www.stickam.com,http://webmaster.stickam.com,http://blog.stickam.com/index.php/feed/,Stickam,games_video,35.0,2006,2.0,,2013,1.0,31.0,http://blog.stickam.com/post/41909003713/stick...,"video, livestream, webcam, live, livechat, cha...",,support@stickam.com,,,Thu Dec 20 10:07:40 UTC 2007,Sat Mar 16 09:55:43 UTC 2013,<p>Stickam.com is a social networking site tha...,"{'available_sizes': [[[150, 27], 'assets/image...","[{'name': 'Stickam', 'permalink': 'stickam'}, ...","[{'is_past': False, 'title': 'Chairman', 'pers...","[{'competitor': {'name': 'Ustream', 'permalink...",[],$0,[],[],,[],"[{'description': '', 'address1': '1975 pads rd...",[],,[],[],[],[]
7,52cdef7c4bab8bd675297dcc,AllPeers,allpeers,http://www.crunchbase.com/company/allpeers,http://www.allpeers.com,http://www.allpeers.com/blog/,http://www.allpeers.com/blog/feed/,,web,,2004,8.0,1.0,2008,3.0,3.0,http://www.techcrunch.com/2008/03/03/much-hype...,allpeers,,,,,Thu Jun 28 07:47:43 UTC 2007,Sat Mar 06 00:28:43 UTC 2010,"<p>AllPeers is a simple, persistent buddy list...","{'available_sizes': [[[150, 58], 'assets/image...","[{'name': 'AllPeers', 'permalink': 'allpeers'}]","[{'is_past': None, 'title': 'Chief Software Ar...",[],[],$0,"[{'id': 64, 'round_code': 'a', 'source_url': '...",[],,[],"[{'description': None, 'address1': None, 'addr...",[],,[],[],[],[]
