# Advanced Querying Mongo

Importing libraries and setting up connection

In [6]:
%pip install pymongo
from pymongo import MongoClient
cursor = MongoClient("mongodb://localhost:27017")

Note: you may need to restart the kernel to use updated packages.


In [9]:
db = cursor.companies
colec = db.companies

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [10]:
query = {'name': 'Babelgum'}

filtro = {'name': True, '_id': False}

list(colec.find(query, filtro))

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [19]:
query = {'number_of_employees': {'$gt' : 5000}}

filtro = {'name': True, 'number_of_employees': True, '_id': False}

list(colec.find(query, filtro).sort('number_of_employees', 1).limit(20))

[{'name': 'Nintendo', 'number_of_employees': 5080},
 {'name': 'Hexaware Technologies', 'number_of_employees': 5200},
 {'name': 'Facebook', 'number_of_employees': 5299},
 {'name': 'OpenText', 'number_of_employees': 5300},
 {'name': 'CPM Braxis', 'number_of_employees': 5400},
 {'name': 'LSI', 'number_of_employees': 5400},
 {'name': 'Microchip Technologies', 'number_of_employees': 5500},
 {'name': 'Mediaset', 'number_of_employees': 5729},
 {'name': 'Mindray Medical International', 'number_of_employees': 5763},
 {'name': 'Dentsu', 'number_of_employees': 6000},
 {'name': 'Atmel', 'number_of_employees': 6000},
 {'name': 'Tata Communications', 'number_of_employees': 6000},
 {'name': 'Baidu', 'number_of_employees': 6000},
 {'name': 'Acxiom', 'number_of_employees': 6200},
 {'name': 'Acxiom', 'number_of_employees': 6200},
 {'name': 'SRA International', 'number_of_employees': 6400},
 {'name': "Moody's", 'number_of_employees': 6800},
 {'name': 'ManTech', 'number_of_employees': 7000},
 {'name': 'Ad

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [52]:
query = ({'$and':[{'founded_year': {'$gt' : 2000}},
                  {'founded_year': {'$lt' : 2005}}]})

filtro = {'name': True, 'founded_year': True, '_id': False}

list(colec.find(query, filtro).limit(10))

[{'name': 'Digg', 'ipo': None},
 {'name': 'Facebook',
  'ipo': {'valuation_amount': 104000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 5,
   'pub_day': 18,
   'stock_symbol': 'NASDAQ:FB'}},
 {'name': 'StumbleUpon', 'ipo': None},
 {'name': 'Gizmoz', 'ipo': None},
 {'name': 'Plaxo', 'ipo': None},
 {'name': 'Technorati', 'ipo': None},
 {'name': 'AddThis', 'ipo': None},
 {'name': 'Veoh', 'ipo': None},
 {'name': 'Meetup', 'ipo': None},
 {'name': 'SmugMug', 'ipo': None}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [45]:
query = ({'$and':[{'ipo.valuation_amount': {'$gt' : 100000000}},
                  {'founded_year': {'$lt' : 2010}}]})

filtro = {'name': True, 'ipo': True, '_id': False}

list(colec.find(query, filtro).sort('ipo',1).limit(5))

[{'name': 'Salesforce',
  'ipo': {'valuation_amount': 110000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2004,
   'pub_month': 7,
   'pub_day': 2,
   'stock_symbol': 'NYSE:CRM'}},
 {'name': 'Geeknet',
  'ipo': {'valuation_amount': 134000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2010,
   'pub_month': 11,
   'pub_day': 2,
   'stock_symbol': 'GKNT'}},
 {'name': 'QuinStreet',
  'ipo': {'valuation_amount': 140000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2010,
   'pub_month': 1,
   'pub_day': 11,
   'stock_symbol': 'NASDAQ:QNST'}},
 {'name': 'Tudou',
  'ipo': {'valuation_amount': 174000000,
   'valuation_currency_code': 'USD',
   'pub_year': None,
   'pub_month': None,
   'pub_day': None,
   'stock_symbol': 'NASDAQ:TUDO'}},
 {'name': 'KIT digital',
  'ipo': {'valuation_amount': 235000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2010,
   'pub_month': 7,
   'pub_day': 27,
   'stock_symbol': 'KITD'}}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [47]:
query = ({'$and':[{'number_of_employees': {'$lt' : 1000}},
                  {'founded_year': {'$lt' : 2005}}]})

filtro = {'name': True, 'number_of_employees': True, '_id': False}

list(colec.find(query, filtro).sort('number_of_employees', -1).limit(10))

[{'name': 'Infinera Corporation', 'number_of_employees': 974},
 {'name': 'NorthPoint Communications Group', 'number_of_employees': 948},
 {'name': '888 Holdings', 'number_of_employees': 931},
 {'name': 'Forrester Research', 'number_of_employees': 903},
 {'name': 'SonicWALL', 'number_of_employees': 900},
 {'name': 'Webmetrics', 'number_of_employees': 900},
 {'name': 'Cornerstone OnDemand', 'number_of_employees': 881},
 {'name': 'Mozilla', 'number_of_employees': 800},
 {'name': 'Buongiorno', 'number_of_employees': 800},
 {'name': 'Yelp', 'number_of_employees': 800}]

### 6. All the companies that don't include the `partners` field.

In [98]:
list(colec.find({'partners': {'$size':0}}, {'name':True, 'partners': True}).limit(10))


[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
  'name': 'Wetpaint',
  'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8b'),
  'name': 'AdventNet',
  'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8c'), 'name': 'Zoho', 'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8d'), 'name': 'Digg', 'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8e'),
  'name': 'Facebook',
  'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8f'),
  'name': 'Omnidrive',
  'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d90'),
  'name': 'Postini',
  'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d91'), 'name': 'Geni', 'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d92'),
  'name': 'Flektor',
  'partners': []},
 {'_id': ObjectId('52cdef7c4bab8bd675297d93'),
  'name': 'Fox Interactive Media',
  'partners': []}]

### 7. All the companies that have a null type of value on the `category_code` field.

In [57]:
list(colec.find({'category_code': None}, {'name':True, 'category_code': True}).limit(10))

[{'_id': ObjectId('52cdef7c4bab8bd6752980f6'),
  'name': 'Collective',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd675298225'),
  'name': 'Snimmer',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd675298226'),
  'name': 'KoolIM',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd675298261'),
  'name': 'Level9 Media',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd675298262'),
  'name': 'VidKing',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd67529826e'),
  'name': 'Drigg',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd675298276'),
  'name': 'SpaceTime',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd675298279'),
  'name': 'Touch Clarity',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd67529827a'),
  'name': 'MMDAYS',
  'category_code': None},
 {'_id': ObjectId('52cdef7c4bab8bd67529827b'),
  'name': 'Inside Group',
  'category_code': None}]

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [58]:
query = ({'$and':[{'number_of_employees': {'$gte' : 100}},
                  {'number_of_employees': {'$lt' : 1000}}]})

filtro = {'name': True, 'number_of_employees': True, '_id': False}

list(colec.find(query, filtro).sort('number_of_employees', -1).limit(10))

[{'name': 'Datamonitor', 'number_of_employees': 984},
 {'name': 'Infinera Corporation', 'number_of_employees': 974},
 {'name': 'Box', 'number_of_employees': 950},
 {'name': 'NorthPoint Communications Group', 'number_of_employees': 948},
 {'name': '888 Holdings', 'number_of_employees': 931},
 {'name': 'Forrester Research', 'number_of_employees': 903},
 {'name': 'SonicWALL', 'number_of_employees': 900},
 {'name': 'Relax Solutions Pvt Ltd', 'number_of_employees': 900},
 {'name': 'Webmetrics', 'number_of_employees': 900},
 {'name': 'InMobi', 'number_of_employees': 900}]

### 9. Order all the companies by their IPO price in a descending order.

In [62]:
query = {}

filtro = {'name': True, 'ipo': True, '_id': False}

list(colec.find(query, filtro).sort('ipo',-1).limit(5))

[{'name': 'GREE',
  'ipo': {'valuation_amount': 108960000000,
   'valuation_currency_code': 'JPY',
   'pub_year': 2008,
   'pub_month': 12,
   'pub_day': 17,
   'stock_symbol': '3632'}},
 {'name': 'Facebook',
  'ipo': {'valuation_amount': 104000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 5,
   'pub_day': 18,
   'stock_symbol': 'NASDAQ:FB'}},
 {'name': 'Amazon',
  'ipo': {'valuation_amount': 100000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 1997,
   'pub_month': 5,
   'pub_day': None,
   'stock_symbol': 'NASDAQ:AMZN'}},
 {'name': 'Twitter',
  'ipo': {'valuation_amount': 18100000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2013,
   'pub_month': 11,
   'pub_day': 7,
   'stock_symbol': 'NYSE:TWTR'}},
 {'name': 'Groupon',
  'ipo': {'valuation_amount': 12800000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2011,
   'pub_month': 11,
   'pub_day': 7,
   'stock_symbol': 'NASDAQ:GRPN'}}]

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [64]:
query = {}

filtro = {'name': True, 'number_of_employees': True, '_id': False}

list(colec.find(query, filtro).sort('number_of_employees', -1).limit(10))

[{'name': 'Siemens', 'number_of_employees': 405000},
 {'name': 'IBM', 'number_of_employees': 388000},
 {'name': 'Toyota', 'number_of_employees': 320000},
 {'name': 'PayPal', 'number_of_employees': 300000},
 {'name': 'Nippon Telegraph and Telephone Corporation',
  'number_of_employees': 227000},
 {'name': 'Samsung Electronics', 'number_of_employees': 221726},
 {'name': 'Accenture', 'number_of_employees': 205000},
 {'name': 'Tata Consultancy Services', 'number_of_employees': 200300},
 {'name': 'Flextronics International', 'number_of_employees': 200000},
 {'name': 'Safeway', 'number_of_employees': 186000}]

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [70]:
query= {'founded_month': {'$gte': 7}}                

filtro = {'name': True, 'founded_month': True, '_id': False}

list(colec.find(query, filtro).sort('number_of_employees', -1).limit(10)) 
#limit(10) porque 1000 son muchas 

[{'name': 'PayPal', 'founded_month': 12},
 {'name': 'ExxonMobil', 'founded_month': 11},
 {'name': 'Google', 'founded_month': 9},
 {'name': 'kalimatapla', 'founded_month': 8},
 {'name': 'BBC', 'founded_month': 7},
 {'name': 'The Walt Disney Company', 'founded_month': 10},
 {'name': 'eBay', 'founded_month': 9},
 {'name': 'Groupon', 'founded_month': 11},
 {'name': 'Los Angeles Times Media Group', 'founded_month': 12},
 {'name': 'Adobe Systems', 'founded_month': 12}]

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [83]:
query = ({'$and':[{'founded_year': {'$lt' : 2000}},
                  {'acquisition.price_amount': {'$gt' : 10000000}}]})

filtro = {'name': True, 'founded_year': True, 'acquisition.price_amount': True, '_id': False}

list(colec.find(query, filtro).sort('founded_year', -1).limit(10))

[{'name': 'Alibaba',
  'founded_year': 1999,
  'acquisition': {'price_amount': 1000000000}},
 {'name': 'Snapfish',
  'founded_year': 1999,
  'acquisition': {'price_amount': 300000000}},
 {'name': 'MIVA',
  'founded_year': 1999,
  'acquisition': {'price_amount': 11600000}},
 {'name': 'Zappos',
  'founded_year': 1999,
  'acquisition': {'price_amount': 1200000000}},
 {'name': 'Neopets',
  'founded_year': 1999,
  'acquisition': {'price_amount': 160000000}},
 {'name': 'Postini',
  'founded_year': 1999,
  'acquisition': {'price_amount': 625000000}},
 {'name': 'Kaboose',
  'founded_year': 1999,
  'acquisition': {'price_amount': 18400000}},
 {'name': 'PriceGrabber',
  'founded_year': 1999,
  'acquisition': {'price_amount': 485000000}},
 {'name': 'SideStep',
  'founded_year': 1999,
  'acquisition': {'price_amount': 180000000}},
 {'name': 'Recipezaar',
  'founded_year': 1999,
  'acquisition': {'price_amount': 25000000}}]

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [80]:
query = ({'$and':[{'founded_year': {'$gte' : 2010}},
                  {'acquisition.price_amount': {'$gt' : 10000000}}]})

filtro = {'name': True, 'acquisition': True, '_id': False}

list(colec.find(query, filtro).sort('acquisition.price_amount', -1).limit(10))

[{'name': 'Magento',
  'acquisition': {'price_amount': 180000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/06/23/ebay-acquired-magento-for-over-180-million-but-not-everyone-is-smiling/',
   'source_description': 'eBay Acquired Magento For Over $180 Million â€“ But Not Everyone Is Smiling',
   'acquired_year': 2011,
   'acquired_month': 6,
   'acquired_day': 6,
   'acquiring_company': {'name': 'eBay', 'permalink': 'ebay'}}}]

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [85]:
query = {}

filtro = {'name': True, 'founded_year': True, '_id': False}

list(colec.find(query, filtro).sort('founded_year', -1).limit(10))

[{'name': 'Wamba', 'founded_year': 2013},
 {'name': 'Gimigo', 'founded_year': 2013},
 {'name': 'Clowdy', 'founded_year': 2013},
 {'name': 'Fixya', 'founded_year': 2013},
 {'name': 'Fluc', 'founded_year': 2013},
 {'name': 'SEOGroup', 'founded_year': 2013},
 {'name': 'Pikk', 'founded_year': 2013},
 {'name': 'WhosCall', 'founded_year': 2013},
 {'name': 'iBazar', 'founded_year': 2013},
 {'name': 'Advaliant', 'founded_year': 2013}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [86]:
query = {'founded_day': {'$lte' : 7}}
                 
filtro = {'name': True, 'acquisition.price_amount': True, '_id': False}

list(colec.find(query, filtro).sort('acquisition', -1).limit(10))

[{'name': 'Netscape', 'acquisition': {'price_amount': 4200000000}},
 {'name': 'PayPal', 'acquisition': {'price_amount': 1500000000}},
 {'name': 'Zappos', 'acquisition': {'price_amount': 1200000000}},
 {'name': 'Alibaba', 'acquisition': {'price_amount': 1000000000}},
 {'name': 'Postini', 'acquisition': {'price_amount': 625000000}},
 {'name': 'Danger', 'acquisition': {'price_amount': 500000000}},
 {'name': 'Clearwell Systems', 'acquisition': {'price_amount': 410000000}},
 {'name': 'PrimeSense', 'acquisition': {'price_amount': 345000000}},
 {'name': 'Amobee', 'acquisition': {'price_amount': 321000000}},
 {'name': 'BlueLithium', 'acquisition': {'price_amount': 300000000}}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [92]:
query = ({'$and':[{'category_code': 'web'}, {'number_of_employees': {'$gt': 4000}}]})
                 
filtro = {'name': True, 'category_code': True, '_id': False}

list(colec.find(query, filtro).sort('number_of_employees', 1).limit(10))

[{'name': 'Expedia', 'category_code': 'web'},
 {'name': 'AOL', 'category_code': 'web'},
 {'name': 'Webkinz', 'category_code': 'web'},
 {'name': 'Rakuten', 'category_code': 'web'},
 {'name': 'Los Angeles Times Media Group', 'category_code': 'web'},
 {'name': 'Groupon', 'category_code': 'web'},
 {'name': 'Yahoo!', 'category_code': 'web'},
 {'name': 'eBay', 'category_code': 'web'},
 {'name': 'Experian', 'category_code': 'web'}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [109]:
query = ({'$and':[{'acquisition.price_currency_code': 'EUR'},
                  {'acquisition.price_amount': {'$gt' : 10000000}}]})

filtro = {'name': True, 'acquisition': True, '_id': False}

list(colec.find(query, filtro).sort('acquisition.price_amount', 1).limit(5))

[{'name': 'Wayfinder',
  'acquisition': {'price_amount': 24000000,
   'price_currency_code': 'EUR',
   'term_code': None,
   'source_url': 'http://www.techcrunch.com/2008/12/08/vodafone-acquires-swedens-wayfinder-for-e24-million/',
   'source_description': "Vodafone Acquires Sweden's WayFinder For â‚¬24 Million",
   'acquired_year': 2008,
   'acquired_month': 12,
   'acquired_day': 8,
   'acquiring_company': {'name': 'Vodafone', 'permalink': 'vodafone'}}},
 {'name': 'ZYB',
  'acquisition': {'price_amount': 31500000,
   'price_currency_code': 'EUR',
   'term_code': 'cash',
   'source_url': 'http://www.techcrunch.com/2008/05/16/danish-mobile-social-network-zyb-acquired-by-vodafone-for-e315-million/',
   'source_description': 'Danish Network ZYB Acquired By Vodafone For â‚¬31.5 million',
   'acquired_year': 2008,
   'acquired_month': 5,
   'acquired_day': 16,
   'acquiring_company': {'name': 'Vodafone', 'permalink': 'vodafone'}}},
 {'name': 'Greenfield Online',
  'acquisition': {'price_am

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [108]:
query= {'founded_month': {'$lte': 4}}                

filtro = {'name': True, 'acquisition': True, '_id': False}

list(colec.find(query, filtro).sort('name', -1).limit(10)) 

[{'name': 'zebraspot design', 'acquisition': None},
 {'name': 'zanox', 'acquisition': None},
 {'name': 'youserbase', 'acquisition': None},
 {'name': 'writewith', 'acquisition': None},
 {'name': 'wikiHow', 'acquisition': None},
 {'name': 'wheresbest', 'acquisition': None},
 {'name': 'webwork', 'acquisition': None},
 {'name': 'web seo masters', 'acquisition': None},
 {'name': 'weSRCH', 'acquisition': None},
 {'name': 'watercompare', 'acquisition': None}]

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [117]:
query = ({'$and':[{'founded_year': {'$gte' : 2000}},
                  {'founded_year': {'$lte' : 2010}},
                  {'acquisition.acquired_year': {'$gte': 2011}}]})

filtro = {'name': True, 'acquisition': True, 'founded_year': True, '_id': False}

list(colec.find(query, filtro).sort('acquisition.price_amount', -1).limit(5))

[{'name': 'SuccessFactors',
  'founded_year': 2001,
  'acquisition': {'price_amount': 3400000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2012/02/22/successfactors-sap/',
   'source_description': 'After Closing $3.4B Acquisition Of SuccessFactors, SAP Pushes Human Capital Management In The Cloud',
   'acquired_year': 2012,
   'acquired_month': 2,
   'acquired_day': 22,
   'acquiring_company': {'name': 'SAP', 'permalink': 'sap'}}},
 {'name': 'Sourcefire',
  'founded_year': 2001,
  'acquisition': {'price_amount': 2700000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2013/07/23/cisco-acquires-cybersecurity-company-sourcefire-for-2-7b/',
   'source_description': 'Cisco Acquires Cybersecurity Company Sourcefire For $2.7B',
   'acquired_year': 2013,
   'acquired_month': 7,
   'acquired_day': 23,
   'acquiring_company': {'name': 'Cisco', 'permalink': 'cisco'}}},
 {'name': 'ExactTarget',

### 20. All the companies that have been 'deadpooled' after the third year.

In [122]:
query= {'deadpooled_year': {'$gt': 3}}                

filtro = {'name': True, 'deadpooled_year': True, '_id': False}

list(colec.find(query, filtro).sort('deadpooled_year', -1).limit(10)) 

[{'name': 'Ptch', 'deadpooled_year': 2014},
 {'name': 'mTraks', 'deadpooled_year': 2013},
 {'name': 'Thoof', 'deadpooled_year': 2013},
 {'name': 'RampedMedia', 'deadpooled_year': 2013},
 {'name': 'Stickam', 'deadpooled_year': 2013},
 {'name': 'Frazr', 'deadpooled_year': 2013},
 {'name': 'Nirvanix', 'deadpooled_year': 2013},
 {'name': 'Orgoo', 'deadpooled_year': 2013},
 {'name': 'Nemedia', 'deadpooled_year': 2013},
 {'name': 'Multiply', 'deadpooled_year': 2013}]