# Advanced Querying Mongo

Importing libraries and setting up connection

In [33]:
%pip install pymongo

Note: you may need to restart the kernel to use updated packages.


In [75]:
from pymongo import MongoClient
import pandas as pd
client = MongoClient()

In [35]:
db= client.Ironhacks

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [38]:
db.list_collection_names()
colec=db.collection

In [153]:

list(colec.find({'name':'Babelgum'}).limit(5))

[{'_id': ObjectId('52cdef7c4bab8bd675297da0'),
  'name': 'Babelgum',
  'permalink': 'babelgum',
  'crunchbase_url': 'http://www.crunchbase.com/company/babelgum',
  'homepage_url': 'http://babelgum.com',
  'blog_url': 'http://babelgum.com/blog',
  'blog_feed_url': 'http://feeds.feedburner.com/Babelgum',
  'twitter_username': 'Babelgum',
  'category_code': 'games_video',
  'number_of_employees': None,
  'founded_year': 2007,
  'founded_month': 3,
  'founded_day': 1,
  'deadpooled_year': 2013,
  'deadpooled_month': 1,
  'deadpooled_day': 9,
  'deadpooled_url': '',
  'tag_list': 'iptv, web2ireland',
  'alias_list': '',
  'email_address': 'info@babelgum.com',
  'phone_number': '',
  'description': '',
  'created_at': 'Sat Jun 09 08:15:21 UTC 2007',
  'updated_at': 'Wed Oct 16 06:30:25 UTC 2013',
  'overview': '<p>Babelgum is an integrated web and mobile video content platform, free for users and supported by advertising, available on-demand to a global audience.</p>\n\n<p>On March 20th 2009

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [59]:
list(colec.find({'number_of_employees' : {'$gte':5000}} ,{'_id': 0, 
                                    'name': 1}).limit(20))

[{'name': 'Facebook'},
 {'name': 'eBay'},
 {'name': 'Cisco'},
 {'name': 'Yahoo!'},
 {'name': 'Google'},
 {'name': 'Intel'},
 {'name': 'Nintendo'},
 {'name': 'Adobe Systems'},
 {'name': 'Sony'},
 {'name': 'PayPal'},
 {'name': 'The Walt Disney Company'},
 {'name': 'AOL'},
 {'name': 'Webkinz'},
 {'name': 'Sun Microsystems'},
 {'name': 'Nokia'},
 {'name': 'Rakuten'},
 {'name': 'Microsoft'},
 {'name': 'Apple'},
 {'name': 'NetApp'},
 {'name': 'Motorola Solutions'}]

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [86]:
list(colec.find({'$and': [{'founded_year':{'$gte':2000}}, {'founded_year':{'$lte':2005}}]},{'_id': 0, 
                                    'name': 1}).limit(5))

[{'name': 'Wetpaint'},
 {'name': 'Zoho'},
 {'name': 'Digg'},
 {'name': 'Facebook'},
 {'name': 'Omnidrive'}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [85]:
list(colec.find({'$and': [{'ipo.valuation_amount':{'$gte':100000000}}, {'founded_year':{'$lte':2010}}]},{'_id': 0, 
                                    'name': 1, 'ipo.valuation_amount': 1}).limit(5))

[{'name': 'Facebook', 'ipo': {'valuation_amount': 104000000000}},
 {'name': 'Twitter', 'ipo': {'valuation_amount': 18100000000}},
 {'name': 'Yelp', 'ipo': {'valuation_amount': 1300000000}},
 {'name': 'LinkedIn', 'ipo': {'valuation_amount': 9310000000}},
 {'name': 'Amazon', 'ipo': {'valuation_amount': 100000000000}}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [101]:
list(colec.find({'$and':[{'number_of_employees':{'$lt':1000}},{'founded_year':{'$lt':2005}}]},{'_id':0,'number_of_employees': 1, 
'founded_year':1}).limit(10).sort('number_of_employees', -1))

[{'number_of_employees': 974, 'founded_year': 2000},
 {'number_of_employees': 948, 'founded_year': 1997},
 {'number_of_employees': 931, 'founded_year': 1997},
 {'number_of_employees': 903, 'founded_year': 1983},
 {'number_of_employees': 900, 'founded_year': 1991},
 {'number_of_employees': 900, 'founded_year': 1999},
 {'number_of_employees': 881, 'founded_year': 1999},
 {'number_of_employees': 800, 'founded_year': 1998},
 {'number_of_employees': 800, 'founded_year': 1999},
 {'number_of_employees': 800, 'founded_year': 2004}]

### 6. All the companies that don't include the `partners` field.

In [109]:
list(colec.find({'partners': {'$exists': 'null'}},{'_id': 0, 
                                    'name': 1}).limit(5))

[{'name': 'Wetpaint'},
 {'name': 'AdventNet'},
 {'name': 'Zoho'},
 {'name': 'Digg'},
 {'name': 'Facebook'}]

### 7. All the companies that have a null type of value on the `category_code` field.

In [110]:
list(colec.find({'category_code': {'$type': 'null'}},{'_id': 0, 
                                    'name': 1}).limit(5))

[{'name': 'Collective'},
 {'name': 'Snimmer'},
 {'name': 'KoolIM'},
 {'name': 'Level9 Media'},
 {'name': 'VidKing'}]

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [115]:
list(colec.find({'$and':[{'number_of_employees':{'$gte':100}},{'number_of_employees':{'$lt':1000}}]},{'_id':0, 'name': 1, 
'number of employees':1}).limit(10))

[{'name': 'AdventNet'},
 {'name': 'AddThis'},
 {'name': 'OpenX'},
 {'name': 'LifeLock'},
 {'name': 'Jajah'},
 {'name': 'Livestream'},
 {'name': 'Ustream'},
 {'name': 'iContact'},
 {'name': 'Yelp'},
 {'name': 'Dailymotion'}]

### 9. Order all the companies by their IPO price in a descending order.

In [118]:
list(colec.find({'ipo.valuation_amount': {'$gte':1}},{'_id': 0, 
                                    'name': 1, 'ipo.valuation_amount': 1}).limit(5).sort('ipo.valuation_amount', -1))

[{'name': 'GREE', 'ipo': {'valuation_amount': 108960000000}},
 {'name': 'Facebook', 'ipo': {'valuation_amount': 104000000000}},
 {'name': 'Amazon', 'ipo': {'valuation_amount': 100000000000}},
 {'name': 'Twitter', 'ipo': {'valuation_amount': 18100000000}},
 {'name': 'Groupon', 'ipo': {'valuation_amount': 12800000000}}]

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [121]:
list(colec.find({'number_of_employees':{'$gte':1}},{'_id': 0, 
                                    'name': 1}).limit(10).sort('number_of_employees'))

[{'name': 'OurStage'},
 {'name': 'eGenerations'},
 {'name': 'Localeze'},
 {'name': 'Swivel'},
 {'name': 'Mixercast'},
 {'name': 'Entrecard'},
 {'name': 'Pixel Qi'},
 {'name': 'eBurl'},
 {'name': 'Contrastream'},
 {'name': 'FeVote'}]

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [123]:
list(colec.find({'founded_month':{'$gte':6}},{'_id': 0, 'name':1}).limit(1000))

[{'name': 'Wetpaint'},
 {'name': 'Zoho'},
 {'name': 'Digg'},
 {'name': 'Omnidrive'},
 {'name': 'Postini'},
 {'name': 'Geni'},
 {'name': 'Fox Interactive Media'},
 {'name': 'eBay'},
 {'name': 'Joost'},
 {'name': 'Plaxo'},
 {'name': 'Powerset'},
 {'name': 'Technorati'},
 {'name': 'Sparter'},
 {'name': 'Kyte'},
 {'name': 'Thoof'},
 {'name': 'Jingle Networks'},
 {'name': 'LifeLock'},
 {'name': 'Wesabe'},
 {'name': 'SmugMug'},
 {'name': 'Google'},
 {'name': 'Skype'},
 {'name': 'Pando Networks'},
 {'name': 'Ikan'},
 {'name': 'delicious'},
 {'name': 'Topix'},
 {'name': 'Pownce'},
 {'name': 'AllPeers'},
 {'name': 'Wize'},
 {'name': 'AllofMP3'},
 {'name': 'SellABand'},
 {'name': 'iContact'},
 {'name': 'MeeVee'},
 {'name': 'blinkx'},
 {'name': 'Zlio'},
 {'name': 'Yelp'},
 {'name': 'Jaiku'},
 {'name': 'Yapta'},
 {'name': 'Fleck'},
 {'name': 'SideStep'},
 {'name': 'RockYou'},
 {'name': 'Instructables'},
 {'name': 'Netvibes'},
 {'name': 'Swivel'},
 {'name': 'Slide'},
 {'name': 'TripHub'},
 {'name':

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [128]:
list(colec.find({'$and':[{'founded_year':{'$lt':2000}},{'ipo.valuation_amount':{'$gt':1000000}}]},{'_id':0, 'name':1}))

[{'name': 'Amazon'},
 {'name': 'Nielsen'},
 {'name': 'OpenTable'},
 {'name': 'Shutterfly'},
 {'name': 'Salesforce'},
 {'name': 'QuinStreet'},
 {'name': 'Rackspace'},
 {'name': 'BMC Software'},
 {'name': 'Infoblox'},
 {'name': 'Tencent'},
 {'name': 'Baidu'},
 {'name': 'Geeknet'},
 {'name': 'Opsware'},
 {'name': 'Telenav'},
 {'name': 'Mixi'},
 {'name': 'QlikTech'},
 {'name': 'Pironet'},
 {'name': 'Pironet'},
 {'name': 'SolarWinds'},
 {'name': 'DemandTec'},
 {'name': 'Cornerstone OnDemand'},
 {'name': 'Western Digital'},
 {'name': 'Real Goods Solar'},
 {'name': 'GT Solar'}]

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [131]:
list(colec.find({'acquisition.acquired_year':{'$gt':2010}},{'_id':0, 'name':1, 'acquisition.acquired_year':1}).limit(5).sort('acquisition.price_amount'))

[{'name': 'blogTV', 'acquisition': {'acquired_year': 2013}},
 {'name': 'Kyte', 'acquisition': {'acquired_year': 2011}},
 {'name': 'Mashery', 'acquisition': {'acquired_year': 2013}},
 {'name': 'delicious', 'acquisition': {'acquired_year': 2011}},
 {'name': 'Geni', 'acquisition': {'acquired_year': 2012}}]

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [135]:
list(colec.find({'founded_year':{'$gt':1}},{'_id':0, 'name':1, 'founded_year':1}).limit(5))

[{'name': 'Wetpaint', 'founded_year': 2005},
 {'name': 'AdventNet', 'founded_year': 1996},
 {'name': 'Zoho', 'founded_year': 2005},
 {'name': 'Digg', 'founded_year': 2004},
 {'name': 'Facebook', 'founded_year': 2004}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [136]:
list(colec.find({'founded_day':{'$gte':7}},{'_id':0, 'name':1}).limit(10).sort('acquisition.price_amount',-1))

[{'name': 'Siebel Systems'},
 {'name': 'Yammer'},
 {'name': 'Yammer'},
 {'name': 'ngmoco'},
 {'name': 'Audible'},
 {'name': 'Adify'},
 {'name': 'Zong'},
 {'name': 'Topsy Labs'},
 {'name': 'BreakingPoint Systems'},
 {'name': 'Goodreads'}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [141]:
list(colec.find({'$and':[{'category_code':'web'},{'number_of_employees':{'$gt':4000}}]},{'_id':0, 'name':1}).limit(10).sort('number_of_employees'))

[{'name': 'Expedia'},
 {'name': 'AOL'},
 {'name': 'Webkinz'},
 {'name': 'Rakuten'},
 {'name': 'Los Angeles Times Media Group'},
 {'name': 'Groupon'},
 {'name': 'Yahoo!'},
 {'name': 'eBay'},
 {'name': 'Experian'}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [152]:
list(colec.find({'$and':[{'acquisition.price_amount':{'$gt':10000000}}, {'acquisition.price_currency_code': 'EUR'}]},{'_id':0, 'name':1}).limit(5))

[{'name': 'ZYB'},
 {'name': 'Apertio'},
 {'name': 'Greenfield Online'},
 {'name': 'Webedia'},
 {'name': 'Wayfinder'}]

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [147]:
list(colec.find({'acquisition.acquired_month':{'$lt':4}},{'_id':0, 'name':1, 'acquisition':1}).limit(10))

[{'name': 'Kyte',
  'acquisition': {'price_amount': None,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/',
   'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million',
   'acquired_year': 2011,
   'acquired_month': 1,
   'acquired_day': 31,
   'acquiring_company': {'name': 'KIT digital', 'permalink': 'kit-digital'}}},
 {'name': 'NetRatings',
  'acquisition': {'price_amount': 327000000,
   'price_currency_code': 'USD',
   'term_code': 'cash',
   'source_url': 'http://login.vnuemedia.com/hr/login/login_subscribe.jsp?id=0oqDem1gYIfIclz9i2%2Ffqj5NxCp2AC5DPbVnyT2da8GyV2mXjasabE128n69OrmcAh52%2FGE3pSG%2F%0AEKRYD9vh9EhrJrxukmUzh532fSMTZXL42gwPB80UWVtF1NwJ5UZSM%2BCkLU1mpYBoHFgiH%2Fi0f6Ax%0A9yMIVxt47t%2BHamhEQ0nkOEK24L',
   'source_description': 'Nielsen buys rest of NetRatings',
   'acquired_year': 2007,
   'acquired_month': 2,
   

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [151]:
list(colec.find({'$and':[{'founded_year':{'$gte':2000}},{'founded_year':{'$lte':2010}},{'acquisition.acquired_year':{'$gte':2010}}]},{'_id':0, 'name':1}).limit(5))

[{'name': 'Wetpaint'},
 {'name': 'Digg'},
 {'name': 'Geni'},
 {'name': 'Kyte'},
 {'name': 'Veoh'}]

### 20. All the companies that have been 'deadpooled' after the third year.

In [150]:
list(colec.find({'deadpooled_year':{'$gt':3}},{'_id':0, 'name':1}).limit(5))

[{'name': 'Omnidrive'},
 {'name': 'Babelgum'},
 {'name': 'Sparter'},
 {'name': 'Thoof'},
 {'name': 'Mercora'}]