# Advanced Querying Mongo

Importing libraries and setting up connection

In [2]:
from pymongo import MongoClient
client = MongoClient("localhost:27017")

In [3]:
db = client.get_database("ironhack")

In [4]:
db.list_collection_names()

['companies']

In [5]:
companies = db.get_collection("companies")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [16]:
proj = {'name': 1, '_id': 0}
query = {'name': 'Babelgum'}
list(companies.find(query, proj))

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [21]:
query = ({"number_of_employees": {"$gt": 5000}})
proj = {'name': 1, '_id': 0, 'number_of_employees' : 1}
list(companies.find(query,proj).sort("number_of_employees",1).limit(10))

[{'name': 'Nintendo', 'number_of_employees': 5080},
 {'name': 'Hexaware Technologies', 'number_of_employees': 5200},
 {'name': 'Facebook', 'number_of_employees': 5299},
 {'name': 'OpenText', 'number_of_employees': 5300},
 {'name': 'CPM Braxis', 'number_of_employees': 5400},
 {'name': 'LSI', 'number_of_employees': 5400},
 {'name': 'Microchip Technologies', 'number_of_employees': 5500},
 {'name': 'Mediaset', 'number_of_employees': 5729},
 {'name': 'Mindray Medical International', 'number_of_employees': 5763},
 {'name': 'Dentsu', 'number_of_employees': 6000}]

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [22]:
query = [{'founded_year': {'$gte': 2000}},{'founded_year': {'$lte': 2005}} ]
proj = {'name': 1, '_id': 0, 'founded_year': 1}
list(companies.find({"$and": query},proj).limit(10))

[{'name': 'Wetpaint', 'founded_year': 2005},
 {'name': 'Zoho', 'founded_year': 2005},
 {'name': 'Digg', 'founded_year': 2004},
 {'name': 'Facebook', 'founded_year': 2004},
 {'name': 'Omnidrive', 'founded_year': 2005},
 {'name': 'StumbleUpon', 'founded_year': 2002},
 {'name': 'Gizmoz', 'founded_year': 2003},
 {'name': 'Helio', 'founded_year': 2005},
 {'name': 'Plaxo', 'founded_year': 2002},
 {'name': 'Technorati', 'founded_year': 2002}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [54]:
query = [{'founded_year': {'$lt': 2010}},{'ipo.valuation_amount': {'$gt': 100_000_000}} ]
proj = {'name': 1, '_id': 0, 'ipo': 1}
list(companies.find({"$and": query},proj).limit(5))

[{'name': 'Facebook',
  'ipo': {'valuation_amount': 104000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 5,
   'pub_day': 18,
   'stock_symbol': 'NASDAQ:FB'}},
 {'name': 'Twitter',
  'ipo': {'valuation_amount': 18100000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2013,
   'pub_month': 11,
   'pub_day': 7,
   'stock_symbol': 'NYSE:TWTR'}},
 {'name': 'Yelp',
  'ipo': {'valuation_amount': 1300000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 3,
   'pub_day': 2,
   'stock_symbol': 'NYSE:YELP'}},
 {'name': 'LinkedIn',
  'ipo': {'valuation_amount': 9310000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2011,
   'pub_month': 7,
   'pub_day': 20,
   'stock_symbol': 'NYSE:LNKD'}},
 {'name': 'Amazon',
  'ipo': {'valuation_amount': 100000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 1997,
   'pub_month': 5,
   'pub_day': None,
   'stock_symbol': 'NASDAQ:AMZN'}}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [48]:
query = [{'number_of_employees': {'$lt': 1000}},{'founded_year': {'$lt': 2005}} ]
proj = {'name': 1, '_id': 0, 'founded_year': 1, 'number_of_employees': 1}
list(companies.find({"$and": query},proj).sort("number_of_employees",-1).limit(10))

[{'name': 'Infinera Corporation',
  'number_of_employees': 974,
  'founded_year': 2000},
 {'name': 'NorthPoint Communications Group',
  'number_of_employees': 948,
  'founded_year': 1997},
 {'name': '888 Holdings', 'number_of_employees': 931, 'founded_year': 1997},
 {'name': 'Forrester Research',
  'number_of_employees': 903,
  'founded_year': 1983},
 {'name': 'Webmetrics', 'number_of_employees': 900, 'founded_year': 1999},
 {'name': 'SonicWALL', 'number_of_employees': 900, 'founded_year': 1991},
 {'name': 'Cornerstone OnDemand',
  'number_of_employees': 881,
  'founded_year': 1999},
 {'name': 'Buongiorno', 'number_of_employees': 800, 'founded_year': 1999},
 {'name': 'Cvent', 'number_of_employees': 800, 'founded_year': 1999},
 {'name': 'ZoomInfo', 'number_of_employees': 800, 'founded_year': 2000}]

### 6. All the companies that don't include the `partners` field.

In [49]:
list(companies.find({'partners': {"$exists": False}}).limit(10))

[]

### 7. All the companies that have a null type of value on the `category_code` field.

In [87]:
#OPCIÓN 1
query = {"category_code": { '$type' : "null" }}
proj = {'name': 1, '_id': 0, 'category_code' : 1}
list(companies.find(query,proj).limit(10))

[{'name': 'Collective', 'category_code': None},
 {'name': 'Snimmer', 'category_code': None},
 {'name': 'KoolIM', 'category_code': None},
 {'name': 'Level9 Media', 'category_code': None},
 {'name': 'VidKing', 'category_code': None},
 {'name': 'Drigg', 'category_code': None},
 {'name': 'SpaceTime', 'category_code': None},
 {'name': 'Touch Clarity', 'category_code': None},
 {'name': 'MMDAYS', 'category_code': None},
 {'name': 'Inside Group', 'category_code': None}]

In [88]:
#OPCIÓN 2
query = {"category_code": None }
proj = {'name': 1, '_id': 0, 'category_code' : 1}
list(companies.find(query,proj).limit(10))

[{'name': 'Collective', 'category_code': None},
 {'name': 'Snimmer', 'category_code': None},
 {'name': 'KoolIM', 'category_code': None},
 {'name': 'Level9 Media', 'category_code': None},
 {'name': 'VidKing', 'category_code': None},
 {'name': 'Drigg', 'category_code': None},
 {'name': 'SpaceTime', 'category_code': None},
 {'name': 'Touch Clarity', 'category_code': None},
 {'name': 'MMDAYS', 'category_code': None},
 {'name': 'Inside Group', 'category_code': None}]

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [67]:
query = [{'number_of_employees': {'$gte': 100}},{'number_of_employees': {'$lt': 1000}} ]
proj = {'name': 1, '_id': 0, 'number_of_employees': 1}
list(companies.find({"$and": query},proj).limit(10))

[{'name': 'AdventNet', 'number_of_employees': 600},
 {'name': 'AddThis', 'number_of_employees': 120},
 {'name': 'OpenX', 'number_of_employees': 305},
 {'name': 'LifeLock', 'number_of_employees': 644},
 {'name': 'Jajah', 'number_of_employees': 110},
 {'name': 'Livestream', 'number_of_employees': 120},
 {'name': 'Ustream', 'number_of_employees': 250},
 {'name': 'iContact', 'number_of_employees': 300},
 {'name': 'Yelp', 'number_of_employees': 800},
 {'name': 'Dailymotion', 'number_of_employees': 120}]

### 9. Order all the companies by their IPO price in a descending order.

In [80]:
proj = {'name': 1, '_id': 0, 'ipo.valuation_amount': 1}
query = {}
list(companies.find(query,proj).sort("ipo.valuation_amount",-1).limit(10))

[{'name': 'GREE', 'ipo': {'valuation_amount': 108960000000}},
 {'name': 'Facebook', 'ipo': {'valuation_amount': 104000000000}},
 {'name': 'Amazon', 'ipo': {'valuation_amount': 100000000000}},
 {'name': 'Twitter', 'ipo': {'valuation_amount': 18100000000}},
 {'name': 'Groupon', 'ipo': {'valuation_amount': 12800000000}},
 {'name': 'Tencent', 'ipo': {'valuation_amount': 11000000000}},
 {'name': 'Western Digital', 'ipo': {'valuation_amount': 9430000000}},
 {'name': 'LinkedIn', 'ipo': {'valuation_amount': 9310000000}},
 {'name': 'BMC Software', 'ipo': {'valuation_amount': 6000000000}},
 {'name': 'Rackspace', 'ipo': {'valuation_amount': 5440000000}}]

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [81]:
proj = {'name': 1, '_id': 0, 'number_of_employees': 1}
query = {}
list(companies.find(query,proj).sort("number_of_employees",-1).limit(10))

[{'name': 'Siemens', 'number_of_employees': 405000},
 {'name': 'IBM', 'number_of_employees': 388000},
 {'name': 'Toyota', 'number_of_employees': 320000},
 {'name': 'PayPal', 'number_of_employees': 300000},
 {'name': 'Nippon Telegraph and Telephone Corporation',
  'number_of_employees': 227000},
 {'name': 'Samsung Electronics', 'number_of_employees': 221726},
 {'name': 'Accenture', 'number_of_employees': 205000},
 {'name': 'Tata Consultancy Services', 'number_of_employees': 200300},
 {'name': 'Flextronics International', 'number_of_employees': 200000},
 {'name': 'Safeway', 'number_of_employees': 186000}]

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [89]:
query = {"founded_month": { '$gt' : 6 } }
proj = {'name': 1, '_id': 0, 'founded_month' : 1}
list(companies.find(query,proj).limit(5))

[{'name': 'Wetpaint', 'founded_month': 10},
 {'name': 'Zoho', 'founded_month': 9},
 {'name': 'Digg', 'founded_month': 10},
 {'name': 'Omnidrive', 'founded_month': 11},
 {'name': 'eBay', 'founded_month': 9}]

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [91]:
query = [{'founded_year': {'$lt': 2000}},{'acquisition.price_amount': {'$gt': 10_000_000}} ]
proj = {'name': 1, '_id': 0, 'acquisition.price_amount': 1}
list(companies.find({"$and": query},proj).limit(5))

[{'name': 'Postini', 'acquisition': {'price_amount': 625000000}},
 {'name': 'SideStep', 'acquisition': {'price_amount': 180000000}},
 {'name': 'Recipezaar', 'acquisition': {'price_amount': 25000000}},
 {'name': 'PayPal', 'acquisition': {'price_amount': 1500000000}},
 {'name': 'Snapfish', 'acquisition': {'price_amount': 300000000}}]

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [96]:
query = {'acquisition.acquired_year': {'$gt': 2010}}
proj = {'name': 1, '_id': 0, 'acquisition': 1}
list(companies.find(query,proj).sort('acquisition.price_amount',-1).limit(2))

[{'name': 'T-Mobile',
  'acquisition': {'price_amount': 39000000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/03/20/in-the-race-for-more-spectrum-att-is-acquiring-t-mobile-for-39-billion/',
   'source_description': 'In The Race For More Spectrum, AT&T Is Acquiring T-Mobile For $39 Billion',
   'acquired_year': 2011,
   'acquired_month': 3,
   'acquired_day': 20,
   'acquiring_company': {'name': 'AT&T', 'permalink': 'at-t'}}},
 {'name': 'Goodrich Corporation',
  'acquisition': {'price_amount': 18400000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://www.masshightech.com/stories/2011/09/19/daily37-UTC-shells-out-184-billion-for-Goodrich.html',
   'source_description': 'UTC shells out $18.4 billion for Goodrich',
   'acquired_year': 2011,
   'acquired_month': 9,
   'acquired_day': 22,
   'acquiring_company': {'name': 'United Technologies',
    'permalink': 'united-technologies'}}}]

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [100]:
proj = {'name': 1, '_id': 0, 'founded_year': 1}
query = {}
list(companies.find(query,proj).sort("founded_year",-1).limit(10))

[{'name': 'Wamba', 'founded_year': 2013},
 {'name': 'Gimigo', 'founded_year': 2013},
 {'name': 'Clowdy', 'founded_year': 2013},
 {'name': 'Fluc', 'founded_year': 2013},
 {'name': 'Advaliant', 'founded_year': 2013},
 {'name': 'Pikk', 'founded_year': 2013},
 {'name': 'WhosCall', 'founded_year': 2013},
 {'name': 'SEOGroup', 'founded_year': 2013},
 {'name': 'iBazar', 'founded_year': 2013},
 {'name': 'Fixya', 'founded_year': 2013}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [116]:
proj = {'name': 1, '_id': 0, 'acquisition.price_amount': 1, 'founded_day' : 1}
query = {'founded_day' : {'$lte' : 7}}
list(companies.find(query,proj).sort('acquisition.price_amount',-1).limit(5))

[{'name': 'Netscape',
  'founded_day': 4,
  'acquisition': {'price_amount': 4200000000}},
 {'name': 'PayPal',
  'founded_day': 1,
  'acquisition': {'price_amount': 1500000000}},
 {'name': 'Zappos',
  'founded_day': 1,
  'acquisition': {'price_amount': 1200000000}},
 {'name': 'Alibaba',
  'founded_day': 1,
  'acquisition': {'price_amount': 1000000000}},
 {'name': 'Postini',
  'founded_day': 2,
  'acquisition': {'price_amount': 625000000}}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [117]:
query = [{'category_code' : 'web'},{'number_of_employees' : {'$gt' : 4000}}]
proj = {'name': 1, '_id': 0, 'category_code': 1, 'number_of_employees' : 1 }
list(companies.find({"$and": query},proj).sort('number_of_employees',1).limit(5))

[{'name': 'Expedia', 'category_code': 'web', 'number_of_employees': 4400},
 {'name': 'AOL', 'category_code': 'web', 'number_of_employees': 8000},
 {'name': 'Webkinz', 'category_code': 'web', 'number_of_employees': 8657},
 {'name': 'Los Angeles Times Media Group',
  'category_code': 'web',
  'number_of_employees': 10000},
 {'name': 'Rakuten', 'category_code': 'web', 'number_of_employees': 10000}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [132]:
proj = {'name': 1, '_id': 0, 'acquisition.price_amount': 1, 'acquisition.price_currency_code' : 1}
query = {'acquisition.price_currency_code' : 'EUR'}, {'acquisition.price_amount': {'$gt' : 10_000_000}}
list(companies.find({'$and' : query},proj).limit(5))

[{'name': 'ZYB',
  'acquisition': {'price_amount': 31500000, 'price_currency_code': 'EUR'}},
 {'name': 'Apertio',
  'acquisition': {'price_amount': 140000000, 'price_currency_code': 'EUR'}},
 {'name': 'Greenfield Online',
  'acquisition': {'price_amount': 40000000, 'price_currency_code': 'EUR'}},
 {'name': 'Webedia',
  'acquisition': {'price_amount': 70000000, 'price_currency_code': 'EUR'}},
 {'name': 'Wayfinder',
  'acquisition': {'price_amount': 24000000, 'price_currency_code': 'EUR'}}]

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [120]:
query = {"acquisition.acquired_month": { '$lte' : 3 } }
proj = {'name': 1, '_id': 0, 'acquisition' : 1}
list(companies.find(query,proj).limit(3))

[{'name': 'Kyte',
  'acquisition': {'price_amount': None,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/',
   'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million',
   'acquired_year': 2011,
   'acquired_month': 1,
   'acquired_day': 31,
   'acquiring_company': {'name': 'KIT digital', 'permalink': 'kit-digital'}}},
 {'name': 'NetRatings',
  'acquisition': {'price_amount': 327000000,
   'price_currency_code': 'USD',
   'term_code': 'cash',
   'source_url': 'http://login.vnuemedia.com/hr/login/login_subscribe.jsp?id=0oqDem1gYIfIclz9i2%2Ffqj5NxCp2AC5DPbVnyT2da8GyV2mXjasabE128n69OrmcAh52%2FGE3pSG%2F%0AEKRYD9vh9EhrJrxukmUzh532fSMTZXL42gwPB80UWVtF1NwJ5UZSM%2BCkLU1mpYBoHFgiH%2Fi0f6Ax%0A9yMIVxt47t%2BHamhEQ0nkOEK24L',
   'source_description': 'Nielsen buys rest of NetRatings',
   'acquired_year': 2007,
   'acquired_month': 2,
   

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

Aquí interpreto que se consideran las que han sido adquiridas después de 2011 o NO han sido adquiridas

In [131]:
query_1 = {'$and' : [{'founded_year': {'$gte': 2000}},{'founded_year': {'$lte': 2010}}]}
query_2 = {'$or' : [{'acquisition.acquired_year' : {'$gt': 2011}},{'acquisition.acquired_year' : {'$exists': False}}]}
query = {'$and' : [query_1, query_2]}
proj = {'name': 1, '_id': 0, 'founded_year': 1, "acquisition.acquired_year": 1}
list(companies.find(query,proj).limit(5))

[{'name': 'Wetpaint',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Zoho', 'founded_year': 2005},
 {'name': 'Digg',
  'founded_year': 2004,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'Facebook', 'founded_year': 2004},
 {'name': 'Omnidrive', 'founded_year': 2005}]

Si solo se consideran las que han sido adquiridas después de 2011 sería:

In [150]:
query = [{'founded_year': {'$gte': 2000}},{'founded_year': {'$lte': 2010}}, {"acquisition.acquired_year" : {'$gt' : 2011}}]
proj = {'name': 1, '_id': 0, 'founded_year': 1, "acquisition.acquired_year": 1}
list(companies.find({"$and": query},proj).limit(5))

[{'name': 'Wetpaint',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Digg',
  'founded_year': 2004,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'Geni',
  'founded_year': 2006,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'blogTV',
  'founded_year': 2006,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Revision3',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2012}}]

### 20. All the companies that have been 'deadpooled' after the third year.

In [184]:
query =  {'$where':  'this.deadpooled_year - this.founded_year >= 3'}                    
proj = {'name': 1, '_id': 0, 'deadpooled_year': 1}
list(companies.find(query,proj).sort('deadpooled_year',1).limit(5))

[{'name': 'sportgate AG', 'deadpooled_year': 2001},
 {'name': 'Webvan', 'deadpooled_year': 2001},
 {'name': 'Excite@Home', 'deadpooled_year': 2001},
 {'name': 'Ardesic', 'deadpooled_year': 2001},
 {'name': 'RealNames', 'deadpooled_year': 2002}]