# Advanced Querying Mongo

Importing libraries and setting up connection

In [69]:
%pip install pymongo




In [70]:
from pymongo import MongoClient
import pandas as pd
client = MongoClient("mongodb://localhost:27017/")


### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [71]:
# Your Code

client.list_database_names()

# Seleccionar la base de datos "companies"
db = client["companies"]

# Acceder a la colección "connection"
collection = db["connection"]

# Craer una query para obtener el nombre "Babelgum" y que solo me devuelva el nombre

query = {"name": "Babelgum"}

res = list(collection.find(query))

# Convertirlo en un DataFrame

df = pd.DataFrame(res)

df[["name"]]




Unnamed: 0,name
0,Babelgum


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [72]:
# Your Code

# En collection buscamos las compañías que tengan mas de 5000 empleados, limtamos a 20 resultados y ordenamos por numero de empleados de mayor a menor

query = {"number_of_employees": {"$gt": 5000}}

res = list(collection.find(query).limit(20).sort("number_of_employees", -1))

# Convertirlo en un DataFrame de pandas

df1 = pd.DataFrame(res)

# Solo mostrar la compañía y el número de empleados

df1[["name", "number_of_employees"]]




Unnamed: 0,name,number_of_employees
0,Siemens,405000
1,IBM,388000
2,Toyota,320000
3,PayPal,300000
4,Nippon Telegraph and Telephone Corporation,227000
5,Samsung Electronics,221726
6,Accenture,205000
7,Tata Consultancy Services,200300
8,Flextronics International,200000
9,Safeway,186000


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [73]:
# Your Code

# Crear una query para obtener las compañías que fueron fundadadas entre 2000 y 2005 incluidos. Solo necesitamos el nombre y el año de fundación

query = {"founded_year": {"$gte": 2000, "$lte": 2005}}

res1 = list(collection.find(query))

#convertirlo en un DataFrame

df2 = pd.DataFrame(res1)

df2[["name", "founded_year"]]



Unnamed: 0,name,founded_year
0,Wetpaint,2005
1,Zoho,2005
2,Digg,2004
3,Facebook,2004
4,Omnidrive,2005
...,...,...
3729,EnterSys Group,2000
3730,Axon Solutions,2004
3731,Intergy,2003
3732,AfterLogic,2002


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [74]:
# Your Code

# Todas las compañías que tienen un "Valuation Amount" mayor que 100.000.000 y fundadas antes de 2010

query = {"$and": [{"founded_year": {"$lt": 2010}}, {"ipo.valuation_amount": {"$gt": 100000000}}]}

res2 = list(collection.find(query))

# Convertirlo en un DataFrame

df3 = pd.DataFrame(res2)

df3[["name","ipo"]]

Unnamed: 0,name,ipo
0,Facebook,"{'valuation_amount': 104000000000.0, 'valuatio..."
1,Twitter,"{'valuation_amount': 18100000000.0, 'valuation..."
2,Yelp,"{'valuation_amount': 1300000000, 'valuation_cu..."
3,LinkedIn,"{'valuation_amount': 9310000000.0, 'valuation_..."
4,Amazon,"{'valuation_amount': 100000000000.0, 'valuatio..."
5,Brightcove,"{'valuation_amount': 290000000, 'valuation_cur..."
6,KIT digital,"{'valuation_amount': 235000000, 'valuation_cur..."
7,Nielsen,"{'valuation_amount': 1600000000, 'valuation_cu..."
8,OpenTable,"{'valuation_amount': 1050000000, 'valuation_cu..."
9,ChannelAdvisor,"{'valuation_amount': 287000000, 'valuation_cur..."


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [75]:
# Your Code

# Todas las compañías que tienen un número de empleados < 1000 y fundadas antes de 2005

query = {"$and": [{"founded_year": {"$lt": 2005}}, {"number_of_employees": {"$lt": 1000}}]}

# Ahora ordenarlas por número de empleados y limitar a 10 resultados

res3 = list(collection.find(query).sort("number_of_employees", -1).limit(10))

# Convertirlo en un DataFrame

df4 = pd.DataFrame(res3)

df4[["founded_year","name","number_of_employees"]]




Unnamed: 0,founded_year,name,number_of_employees
0,2000,Infinera Corporation,974
1,1997,NorthPoint Communications Group,948
2,1997,888 Holdings,931
3,1983,Forrester Research,903
4,1991,SonicWALL,900
5,1999,Webmetrics,900
6,1999,Cornerstone OnDemand,881
7,1998,Mozilla,800
8,1999,Buongiorno,800
9,2004,Yelp,800


### 6. All the companies that don't include the `partners` field.

In [91]:
# Your Code
# Creamos una query para ver las compañías que no incluyan el campo "partners"

query = {"partners": {"$exists": False}}

res4 = list(collection.find(query))

res4

[]

### 7. All the companies that have a null type of value on the `category_code` field.

In [94]:
# Your Code

# Creamos una query para las compañias que tengan null el campo "category_code"

query = {"category_code": {"$eq": None}}

res5 = list(collection.find(query))

# Creamos un DataFrame

df5 = pd.DataFrame(res5)

df5[["name","category_code"]]

Unnamed: 0,name,category_code
0,Collective,
1,Snimmer,
2,KoolIM,
3,Level9 Media,
4,VidKing,
...,...,...
2746,Nellix,
2747,Cantimer,
2748,cruisecritic,
2749,Coloroot,


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [95]:
# Your Code

# Creamos una query para las compañias que tengan >= 100 empleados y < 1000

query = {"$and": [{"number_of_employees": {"$gte": 100}}, {"number_of_employees": {"$lt": 1000}}]}

res6 = list(collection.find(query))

# Creamos un DataFrame

df6 = pd.DataFrame(res6)

df6[["name","number_of_employees"]]

Unnamed: 0,name,number_of_employees
0,AdventNet,600
1,AddThis,120
2,OpenX,305
3,LifeLock,644
4,Jajah,110
...,...,...
912,UOL (Universo Online),500
913,NextLabs,100
914,OfficialVirtualDJ,102
915,Willdan Group,385


### 9. Order all the companies by their IPO price in a descending order.

In [79]:
# Your Code

# Creamos una query para todas las compañias por IPO price y ordenamos de mayor a menor

q

q

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [97]:
# Your Code

# Creamos una query para las compañias con mayor numero de empleados y ordenamos de mayor a menor y limitamos a 10 resultados

query = {"number_of_employees": {"$gt": 1000}}

res7 = list(collection.find(query).sort("number_of_employees", -1).limit(10))

# Creamos un DataFrame

df7 = pd.DataFrame(res7)

df7[["name","number_of_employees"]]



Unnamed: 0,name,number_of_employees
0,Siemens,405000
1,IBM,388000
2,Toyota,320000
3,PayPal,300000
4,Nippon Telegraph and Telephone Corporation,227000
5,Samsung Electronics,221726
6,Accenture,205000
7,Tata Consultancy Services,200300
8,Flextronics International,200000
9,Safeway,186000


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [98]:
# Your Code

# Creamos una query para compañias fundad en el los ultimos seis meses de cada año

query = {"founded_month": {"$gte": 6}}

# Limitamos a 1000 resultados

res8 = list(collection.find(query).limit(1000))

# Creamos un DataFrame

df8 = pd.DataFrame(res8)

df8[["name","founded_month"]]

Unnamed: 0,name,founded_month
0,Wetpaint,10
1,Zoho,9
2,Digg,10
3,Omnidrive,11
4,Postini,6
...,...,...
995,Openfilm,11
996,uCubd,9
997,MyGreat,7
998,SquareClock,12


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [99]:
# Your Code

# Creamos una query para las compañias fundadas antes de 2000 que tengan un acquisition amount mayor que 10.000.000

query = {"$and": [{"founded_year": {"$lt": 2000}}, {"acquisition.price_amount": {"$gt": 10000000}}]}

res9 = list(collection.find(query))

# Creamos un DataFrame

df9 = pd.DataFrame(res9)

df9[["name","founded_year","acquisition"]]

Unnamed: 0,name,founded_year,acquisition
0,Postini,1999,"{'price_amount': 625000000, 'price_currency_co..."
1,SideStep,1999,"{'price_amount': 180000000, 'price_currency_co..."
2,Recipezaar,1999,"{'price_amount': 25000000, 'price_currency_cod..."
3,PayPal,1998,"{'price_amount': 1500000000, 'price_currency_c..."
4,Snapfish,1999,"{'price_amount': 300000000, 'price_currency_co..."
...,...,...,...
200,Savvion,1994,"{'price_amount': 49000000, 'price_currency_cod..."
201,Inventa Technologies,1993,"{'price_amount': 30000000, 'price_currency_cod..."
202,Universal Microwave,1998,"{'price_amount': 23200000, 'price_currency_cod..."
203,Advanced Control Components,1982,"{'price_amount': 18780000, 'price_currency_cod..."


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [100]:
# Your Code

# Creamos una query que han sido adquiridas despues de 2010

query = {"acquisition.acquired_year": {"$gt": 2010}}

# Las ordenamos por acquisition amount de mayor a menor

res10 = list(collection.find(query).sort("acquisition.price_amount", -1))

# Creamos un DataFrame

df10 = pd.DataFrame(res10)

df10[["name","acquisition"]]

Unnamed: 0,name,acquisition
0,T-Mobile,"{'price_amount': 39000000000.0, 'price_currenc..."
1,Goodrich Corporation,"{'price_amount': 18400000000.0, 'price_currenc..."
2,LSI,"{'price_amount': 6600000000.0, 'price_currency..."
3,National Semiconductor,"{'price_amount': 6500000000.0, 'price_currency..."
4,Ariba,"{'price_amount': 4300000000.0, 'price_currency..."
...,...,...
731,MediaPal,"{'price_amount': None, 'price_currency_code': ..."
732,Vertro,"{'price_amount': None, 'price_currency_code': ..."
733,ALOT,"{'price_amount': None, 'price_currency_code': ..."
734,Celestial Semiconductor,"{'price_amount': None, 'price_currency_code': ..."


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [102]:
# Your Code

# Creamos una query para ordenar las compañias por su founded year de mayor a menor

query = {"founded_year": {"$exists": True}}

select = {"name": 1, "founded_year": 1}

res11 = list(collection.find(query, select).sort("founded_year", -1))

# Creamos un DataFrame

df11 = pd.DataFrame(res11)

df11

Unnamed: 0,_id,name,founded_year
0,52cdef7c4bab8bd675297fec,Fixya,2013.0
1,52cdef7c4bab8bd67529801f,Wamba,2013.0
2,52cdef7c4bab8bd6752982d4,Advaliant,2013.0
3,52cdef7c4bab8bd67529830a,Fluc,2013.0
4,52cdef7d4bab8bd675298ea7,iBazar,2013.0
...,...,...,...
18796,52cdef7f4bab8bd67529c6ec,Embedster,
18797,52cdef7f4bab8bd67529c6f1,Willdan Group,
18798,52cdef7f4bab8bd67529c6f4,Geekdive,
18799,52cdef7f4bab8bd67529c6f8,goBookmaker,


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [103]:
# Your Code

# Creamos una query para las compañias que hayan sido fundadas en los 7 primeros dias del mes e incluido el dia 7

query = {"founded_day": {"$lte": 7}}

# Las ordenaamos aquisition price de mayor a menor y limitamos a 10 resultados

res12 = list(collection.find(query).sort("acquisition.price_amount", -1).limit(10))

# Creamos un DataFrame

df12 = pd.DataFrame(res12)

df12[["name","founded_day","acquisition"]]

Unnamed: 0,name,founded_day,acquisition
0,Netscape,4,"{'price_amount': 4200000000.0, 'price_currency..."
1,PayPal,1,"{'price_amount': 1500000000, 'price_currency_c..."
2,Zappos,1,"{'price_amount': 1200000000, 'price_currency_c..."
3,Alibaba,1,"{'price_amount': 1000000000, 'price_currency_c..."
4,Postini,2,"{'price_amount': 625000000, 'price_currency_co..."
5,Danger,1,"{'price_amount': 500000000, 'price_currency_co..."
6,Clearwell Systems,6,"{'price_amount': 410000000, 'price_currency_co..."
7,PrimeSense,1,"{'price_amount': 345000000, 'price_currency_co..."
8,Amobee,1,"{'price_amount': 321000000, 'price_currency_co..."
9,BlueLithium,1,"{'price_amount': 300000000, 'price_currency_co..."


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [104]:
# Your Code

# Creamos una query para las compañias que estan en la categoria web y tienen mas de 4000 empleados

query = {"$and": [{"category_code": "web"}, {"number_of_employees": {"$gt": 4000}}]}

# Las ordenamos por numero de empleados de mayor a menor

res13 = list(collection.find(query).sort("number_of_employees", -1))

# Creamos un DataFrame

df13 = pd.DataFrame(res13)

df13[["name","category_code","number_of_employees"]]

Unnamed: 0,name,category_code,number_of_employees
0,Experian,web,15500
1,eBay,web,15000
2,Yahoo!,web,13600
3,Rakuten,web,10000
4,Los Angeles Times Media Group,web,10000
5,Groupon,web,10000
6,Webkinz,web,8657
7,AOL,web,8000
8,Expedia,web,4400


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [110]:
# Your Code

# Creamos una query para las compañias que tengan un valor de adquisicion mayor que 10.000.000 y la currency sea EUR

query = {"$and": [{"acquisition.price_amount": {"$gt": 10000000}}, {"acquisition.price_currency_code": "EUR"}]}

res14 = list(collection.find(query))

# Creamos un DataFrame

df14 = pd.DataFrame(res14)

import dtale

dtale.show(df14)







### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [111]:
# Your Code

# Creamos una query para las compañias que hayan sido adquiridas en los tres primeros meses del año

query = {"acquisition.acquired_month": {"$lte": 3}}

# Limitamos a 10 resultados

res15 = list(collection.find(query).limit(10))

# Creamos un DataFrame

df15 = pd.DataFrame(res15)

df15[["name","acquisition"]]



Unnamed: 0,name,acquisition
0,Kyte,"{'price_amount': None, 'price_currency_code': ..."
1,NetRatings,"{'price_amount': 327000000, 'price_currency_co..."
2,blogTV,"{'price_amount': None, 'price_currency_code': ..."
3,Livestream,"{'price_amount': None, 'price_currency_code': ..."
4,iContact,"{'price_amount': 169000000, 'price_currency_co..."
5,Coghead,"{'price_amount': None, 'price_currency_code': ..."
6,Dailymotion,"{'price_amount': 168000000, 'price_currency_co..."
7,Netvibes,"{'price_amount': None, 'price_currency_code': ..."
8,Flickr,"{'price_amount': None, 'price_currency_code': ..."
9,BabyCenter,"{'price_amount': None, 'price_currency_code': ..."


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [89]:
# Your Code

### 20. All the companies that have been 'deadpooled' after the third year.

In [90]:
# Your Code