# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
import pymongo 

In [2]:
from pymongo import MongoClient
cliente = pymongo.MongoClient()
cliente

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [19]:
mydb = cliente["Ironhack"]

In [23]:
coleccion = mydb.Companies

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [89]:
# Your Code
list(coleccion.find({"name":"Babelgum"}, {"name":1,"_id":0}))

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [37]:
import pandas as pd
pd.options.display.max_columns = None

In [114]:
# Your Code
companiesemp = pd.DataFrame(coleccion.find({'number_of_employees': {"$gt": 5000}}, {"name":1, "number_of_employees":1,"founded_year":1, "_id":0})[8:20])
companiesemp.sort_values(by = ["number_of_employees"]).head(20)

Unnamed: 0,name,number_of_employees,founded_year
3,AOL,8000,1985
10,NetApp,8000,1992
4,Webkinz,8657,2005
7,Rakuten,10000,1997
2,The Walt Disney Company,25000,1923
5,Sun Microsystems,33350,1982
11,Motorola Solutions,51000,1928
9,Apple,80000,1976
8,Microsoft,90000,1974
6,Nokia,125000,1865


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [195]:
# Your Code

cond1 = {"founded_year":{"$gte": 2000}}
cond2 = {'founded_year': {"$lte": 2005}}

companiesfound = pd.DataFrame(coleccion.find({'$and': [cond1, cond2]}, {"name":1, "founded_year":1, "_id":0}))
companiesfound.head(3)


Unnamed: 0,name,founded_year
0,Wetpaint,2005
1,Zoho,2005
2,Digg,2004


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [196]:
# Your Code
condic1 = {"ipo":{"$gt": 100000000}}
condic2 = {'founded_year':{"$lt": 2010}}

companiesipo = pd.DataFrame(coleccion.find({'$and': [condic1, condic2]}, {"name":1, "ipo":1, "_id":0}))
companiesipo.head(3)

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [198]:
# Your Code
condicion1 = {"number_of_employees":{"$lt": 1000}}
condicion2 = {'founded_year':{"$lt": 2005}}

companiesemploy = pd.DataFrame(coleccion.find({'$and': [condicion1, condicion2]}, {"number_of_employees":1, "_id":0})[8:20])
companiesemploy.sort_values(by = ["number_of_employees"]).head(10)

Unnamed: 0,number_of_employees
0,0
2,5
5,21
1,23
7,26
11,28
9,36
10,50
8,72
6,75


### 6. All the companies that don't include the `partners` field.

In [201]:
# Your Code
companiesnopart = pd.DataFrame(coleccion.find({"parteners": {"$nin": ["Array"]}}, {"name":1, "partners":1, "_id":0}))
companiesnopart.head(10)


Unnamed: 0,name,partners
0,Wetpaint,[]
1,AdventNet,[]
2,Zoho,[]
3,Digg,[]
4,Facebook,[]
5,Omnidrive,[]
6,Postini,[]
7,Geni,[]
8,Flektor,[]
9,Fox Interactive Media,[]


### 7. All the companies that have a null type of value on the `category_code` field.

In [144]:
# Your Code
companiesnullcat = pd.DataFrame(coleccion.find({"category_code": {"$type": "null"}}, {"_id":0, "name":1,"category_code":1}))
companiesnullcat.head(10)

Unnamed: 0,name,category_code
0,Collective,
1,Snimmer,
2,KoolIM,
3,Level9 Media,
4,VidKing,
5,Drigg,
6,SpaceTime,
7,Touch Clarity,
8,MMDAYS,
9,Inside Group,


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [204]:
# Your Code
con1 = {"number_of_employees":{"$gt": 100}}
con2 = {'number_of_employees':{"$lt": 1000}}

companiesemployees = pd.DataFrame(coleccion.find({'$and': [con1, con2]}, {"name":1,"number_of_employees":1, "_id":0}))
companiesemployees.head(10)

Unnamed: 0,name,number_of_employees
0,AdventNet,600
1,AddThis,120
2,OpenX,305
3,LifeLock,644
4,Jajah,110
5,Livestream,120
6,Ustream,250
7,iContact,300
8,Yelp,800
9,Dailymotion,120


### 9. Order all the companies by their IPO price in a descending order.

In [164]:
compan = pd.DataFrame(coleccion.distinct("ipo"))
compan.head()

Unnamed: 0,0
0,
1,"{'valuation_amount': None, 'valuation_currency..."
2,"{'valuation_amount': None, 'valuation_currency..."
3,"{'valuation_amount': None, 'valuation_currency..."
4,"{'valuation_amount': None, 'valuation_currency..."


In [168]:
compan.dropna(how = "any", inplace = True)
compan.isnull().sum()

0    0
dtype: int64

In [205]:
#coleccion.distinct( "ipo" )

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [157]:
# Your Code
companiesem = pd.DataFrame(coleccion.find({'number_of_employees': {"$gt": 5000}}, {"name":1, "number_of_employees":1, "_id":0}).sort("number_of_employees", -1))[:10]
companiesem.sort_values(by = ["number_of_employees"]).head(10)

Unnamed: 0,name,number_of_employees
9,Safeway,186000
8,Flextronics International,200000
7,Tata Consultancy Services,200300
6,Accenture,205000
5,Samsung Electronics,221726
4,Nippon Telegraph and Telephone Corporation,227000
3,PayPal,300000
2,Toyota,320000
1,IBM,388000
0,Siemens,405000


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [171]:
# Your Code
companiessem = pd.DataFrame(coleccion.find({'founded_month': {"$gt": 6}}, {"name":1,"founded_month":1, "_id":0})[8:20])
companiessem.head(2)

Unnamed: 0,name,founded_month
0,Kyte,12
1,Thoof,12


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [181]:
con3 = {"founded_year": {"$lt": 2000}}
con4 = {'valuation_amount':{"$gt": 1000000}}

companiesemployees3 = pd.DataFrame(coleccion.find({'$and': [con3, con4]}, {"name":1,"number_of_employees":1, "_id":0})[8:20])
companiesemployees3.head(10)

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [187]:
# Your Code
companies2 = pd.DataFrame(coleccion.find({'founded_year': {"$lt": 2010}}, {"name":1,"valuation_amount":1, "_id":0})[8:20])
companies2.head(2)

Unnamed: 0,name
0,Fox Interactive Media
1,Twitter


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [177]:
# Your Code
companiesfoundedyear = pd.DataFrame(coleccion.distinct('founded_year').sort("founded_year", 1), {"name":1,"founded_month":1, "_id":0})
companiesfoundedyear.head()

TypeError: sort() takes no positional arguments

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [206]:
# Your Code
companies3 = pd.DataFrame(coleccion.find({'founded_day': {"$lte": 7}}, {"name":1,"founded_day":1, "_id":0}))
companies3.head(2)

Unnamed: 0,name,founded_day
0,Facebook,1
1,Omnidrive,1


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [193]:
# Your Code

con5 = {"category_code":"web"}
con6 = {"number_of_employees":{"$gt": 4000}}

companiesemployeesweb = pd.DataFrame(coleccion.find({'$and': [con5, con6]}, {"name":1,"number_of_employees":1, "_id":0}))
companiesemployeesweb.sort_values(by = ["number_of_employees"]).head(5)

Unnamed: 0,name,number_of_employees
5,Expedia,4400
2,AOL,8000
3,Webkinz,8657
4,Rakuten,10000
7,Los Angeles Times Media Group,10000


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [None]:
# Your Code

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [207]:
# Your Code
companies4 = pd.DataFrame(coleccion.find({'acquired_month': {"$lte": 3}}, {"name":1,"acquired_month":1, "_id":0}))
companies4.head(2)

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [None]:
# Your Code

### 20. All the companies that have been 'deadpooled' after the third year.

In [None]:
# Your Code