# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
import pandas as pd
import time

In [12]:
client = MongoClient("localhost:27017")

In [13]:
client.list_database_names()
db = client['Ironhack']
c = db.get_collection('companies')

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [103]:
# Your Code
filter_ = {'name': 'Babelgum'}
projection = {'name':1, '_id':0}

result = list(c.find(filter_, projection))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name
0,Babelgum


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [99]:
# Your Code
filter_ = {'number_of_employees':{'$gt':5000}}
projection = {'name':1, '_id':0}

result = list(c.find(filter_, projection).sort('number_of_employees', -1).limit(20))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name
0,Siemens
1,IBM
2,Toyota
3,PayPal
4,Nippon Telegraph and Telephone Corporation


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [104]:
# Your Code
filter_ = {'$and': [{'founded_year':{'$gt': 1999}}, {'founded_year':{'$lt': 2006}}]}
projection = {'founded_year':1, 'name':1, '_id':0}

result = list(c.find(filter_, projection).sort('founded_year', 1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,founded_year
0,AllofMP3,2000
1,Steorn,2000
2,MeeVee,2000
3,PhotoBox,2000
4,Boonex,2000


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [105]:
# Your Code
filter_ = {'ipo.valuation_amount':{'$gt':1000000}}
projection = {'name':1, 'ipo.valuation_amount':1, '_id':0}

result = list(c.find(filter_, projection).sort('ipo.valuation_amount', -1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,ipo
0,GREE,{'valuation_amount': 108960000000}
1,Facebook,{'valuation_amount': 104000000000}
2,Amazon,{'valuation_amount': 100000000000}
3,Twitter,{'valuation_amount': 18100000000}
4,Groupon,{'valuation_amount': 12800000000}


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [106]:
# Your Code
filter_ = {'number_of_employees':{'$lt':1000}, 'founded_year':{'$lt':2005}}
projection = {'name':1, '_id':0}

result = list(c.find(filter_, projection).sort('number_of_employees', -1).limit(10))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name
0,Infinera Corporation
1,NorthPoint Communications Group
2,888 Holdings
3,Forrester Research
4,SonicWALL


### 6. All the companies that don't include the `partners` field.

In [94]:
# Your Code
filter_ = {'partners':{'$exists':False}}
projection = {'name':1, '_id':0}

result = list(c.find(filter_, projection))
df = pd.DataFrame(result)
df

### 7. All the companies that have a null type of value on the `category_code` field.

In [107]:
# Your Code
filter_ = {'category_code':{'$type':10}}
projection = {'name':1, '_id':0}

result = list(c.find(filter_, projection))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name
0,Collective
1,Snimmer
2,KoolIM
3,Level9 Media
4,VidKing


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [108]:
# Your Code
filter_ = {'$and':[{'number_of_employees':{'$gte':100}}, {'number_of_employees':{'$lt':1000}}]}
projection = {'name':1, 'number_of_employees':1, '_id':0}

result = list(c.find(filter_, projection).sort('number_of_employees',-1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,number_of_employees
0,Datamonitor,984
1,Infinera Corporation,974
2,Box,950
3,NorthPoint Communications Group,948
4,888 Holdings,931


### 9. Order all the companies by their IPO price in a descending order.

In [109]:
# Your Code
filter_ = {}
projection = {'name':1, 'ipo.valuation_amount':1, '_id':0}

result = list(c.find(filter_, projection).sort('ipo.valuation_amount',-1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,ipo
0,GREE,{'valuation_amount': 108960000000}
1,Facebook,{'valuation_amount': 104000000000}
2,Amazon,{'valuation_amount': 100000000000}
3,Twitter,{'valuation_amount': 18100000000}
4,Groupon,{'valuation_amount': 12800000000}


### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [110]:
# Your Code
filter_ = {}
projection = {'name':1, '_id':0}

result = list(c.find(filter_, projection).sort('number_of_employees',-1).limit(10))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name
0,Siemens
1,IBM
2,Toyota
3,PayPal
4,Nippon Telegraph and Telephone Corporation


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [111]:
# Your Code
filter_ = {'$and':[{'founded_month':{'$gte':4}}, {'founded_month':{'$lte':6}}]}
projection = {'name':1, '_id':0, 'founded_month':1}

result = list(c.find(filter_, projection).limit(1000))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,founded_month
0,Postini,6
1,Geni,6
2,Fox Interactive Media,6
3,Technorati,6
4,OpenX,5


### 12. All the companies founded before 2000 that have an acquisition amount of more than 1.000.000

In [112]:
# Your Code
filter_ = {'founded_year':{'$lt':2000}, 'acquisition.price_amount':{'$gt':1000000}}
projection = {'name':1, '_id':0, 'founded_year':1, 'acquisition.price_amount':1}

result = list(c.find(filter_, projection))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,founded_year,acquisition
0,Postini,1999,{'price_amount': 625000000}
1,SideStep,1999,{'price_amount': 180000000}
2,Recipezaar,1999,{'price_amount': 25000000}
3,Cyworld,1999,{'price_amount': 7140000}
4,PayPal,1998,{'price_amount': 1500000000}


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [113]:
# Your Code
filter_ = {'acquisition.acquired_year':{'$gt':2010}}
projection = {'name':1, '_id':0, 'acquisition':1}

result = list(c.find(filter_, projection).sort('acquisition.price_amount',-1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,acquisition
0,T-Mobile,"{'price_amount': 39000000000, 'price_currency_..."
1,Goodrich Corporation,"{'price_amount': 18400000000, 'price_currency_..."
2,LSI,"{'price_amount': 6600000000, 'price_currency_c..."
3,National Semiconductor,"{'price_amount': 6500000000, 'price_currency_c..."
4,Ariba,"{'price_amount': 4300000000, 'price_currency_c..."


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [114]:
# Your Code
filter_ = {}
projection = {'name':1, 'founded_year':1, '_id':0}

result = list(c.find(filter_, projection).sort('founded_year',-1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,founded_year
0,Fixya,2013.0
1,Wamba,2013.0
2,Advaliant,2013.0
3,Fluc,2013.0
4,iBazar,2013.0


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [115]:
# Your Code
filter_ = {'$and':[{'founded_day':{'$gte':0}}, {'founded_day':{'$lte':7}}]}
projection = {'name':1, 'acquisition.price_amount':1, '_id':0}

result = list(c.find(filter_, projection).sort('acquisition.price_amount',-1).limit(10))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,acquisition
0,Netscape,{'price_amount': 4200000000}
1,PayPal,{'price_amount': 1500000000}
2,Zappos,{'price_amount': 1200000000}
3,Alibaba,{'price_amount': 1000000000}
4,Postini,{'price_amount': 625000000}


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [116]:
# Your Code
filter_ = {'category_code':'web', 'number_of_employees':{'$gt':4000}}
projection = {'name':1, 'number_of_employees':1, '_id':0}

result = list(c.find(filter_, projection).sort('number_of_employees',1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,number_of_employees
0,Expedia,4400
1,AOL,8000
2,Webkinz,8657
3,Rakuten,10000
4,Los Angeles Times Media Group,10000


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [117]:
# Your Code
filter_ = {'acquisition.price_currency_code':'EUR', 'acquisition.price_amount':{'$gt':10000000}}
projection = {'name':1, 'acquisition.price_amount':1, 'acquisition.price_currency_code':1, '_id':0}

result = list(c.find(filter_, projection))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,acquisition
0,ZYB,"{'price_amount': 31500000, 'price_currency_cod..."
1,Apertio,"{'price_amount': 140000000, 'price_currency_co..."
2,Greenfield Online,"{'price_amount': 40000000, 'price_currency_cod..."
3,Webedia,"{'price_amount': 70000000, 'price_currency_cod..."
4,Wayfinder,"{'price_amount': 24000000, 'price_currency_cod..."


### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [118]:
# Your Code
filter_ = {'$and':[{'acquisition.acquired_month':{'$gte':0}}, {'acquisition.acquired_month':{'$lte':3}}]}
projection = {'name':1, 'acquisition':1, '_id':0}

result = list(c.find(filter_, projection).limit(10))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,acquisition
0,Kyte,"{'price_amount': None, 'price_currency_code': ..."
1,NetRatings,"{'price_amount': 327000000, 'price_currency_co..."
2,blogTV,"{'price_amount': None, 'price_currency_code': ..."
3,Livestream,"{'price_amount': None, 'price_currency_code': ..."
4,iContact,"{'price_amount': 169000000, 'price_currency_co..."


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [119]:
# Your Code
filter_ = {'$and':[{'founded_year':{'$gte':2000}}, {'founded_year':{'$lte':2010}}, {'acquisition.acquired_year':{'$gt':2011}}]}
projection = {'name':1, 'acquisition':1, 'founded_year':1, '_id':0}

result = list(c.find(filter_, projection).sort('founded_year',1))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,founded_year,acquisition
0,SelectMinds,2000,"{'price_amount': None, 'price_currency_code': ..."
1,SnapNames,2000,"{'price_amount': None, 'price_currency_code': ..."
2,Handmark,2000,"{'price_amount': None, 'price_currency_code': ..."
3,Bestofmedia Group,2000,"{'price_amount': None, 'price_currency_code': ..."
4,Axis,2000,"{'price_amount': 865000000, 'price_currency_co..."


### 20. All the companies that have been 'deadpooled' after the third year.

In [120]:
# Your Code
filter_ = {'$where':"(this.deadpooled_year-this.founded_year) > 3" }
projection = {'name':1, 'deadpooled_year':1 , 'founded_year':1, '_id':0}

result = list(c.find(filter_, projection))
df = pd.DataFrame(result)
df.head()

Unnamed: 0,name,founded_year,deadpooled_year
0,Babelgum,2007.0,2013
1,Thoof,2006.0,2013
2,Mercora,,2008
3,Wesabe,2005.0,2010
4,Stickam,2006.0,2013
