# Advanced Querying Mongo

Importing libraries and setting up connection

In [3]:
from pymongo import MongoClient
client = MongoClient("localhost:27017")

In [5]:
#%pip install pymongo

Collecting pymongo
  Using cached pymongo-3.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (527 kB)
Installing collected packages: pymongo
Successfully installed pymongo-3.12.1
Note: you may need to restart the kernel to use updated packages.


In [99]:
client.list_database_names()

['admin', 'companies', 'config', 'ironhack', 'local']

In [100]:
db = client.get_database("companies")

In [101]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'companies')

In [102]:
c = db.get_collection("companies")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [105]:
proy = {"name": 1}
cond = {"name": "Babelgum"}
name_babel = list(c.find(cond,proy))

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [106]:
cond = {"number_of_employees": {"$gt": 5000}}
employ_5000 = list(c.find(cond).sort("number_of_employees",1).limit(20))

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [107]:
proy = {"name": 1, "founded_year":1}
cond1 = {"founded_year": {"$gte": 2000}}
cond2 = {"founded_year": {"$lte": 2005}}
comp_2000_2005 = list(c.find({"$and": [cond1,cond2]},proy))

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [108]:
proy = {"name": 1, "ipo": 1}
cond1 = {"founded_year": {"$lt": 2010}}
cond2 = {'acquisition' : {"$gt": 100000000}}
val_100k_b2010 = list(c.find(cond,proy))

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [109]:
cond = {"number_of_employees": {"$lt": 1000}}
cond1 = {"founded_year": {"$lt": 2005}}
empl_1k_b2005 = list(c.find({"$and": [cond,cond1]}).sort("number_of_employees",1).limit(10))

### 6. All the companies that don't include the `partners` field.

In [110]:
cond = {"partners": {"$exists": False}}
partners_none = list(c.find(cond))

### 7. All the companies that have a null type of value on the `category_code` field.

In [148]:
cond = {'category_code': {'$type': 'null' } }
categ_null = list(c.find(cond).limit(1))


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [114]:
proy = {"name": 1, "number_of_employees": 1}
cond = {"number_of_employees": {"$gte": 100}}
cond1 = {"number_of_employees": {"$lt": 1000}}
empl_100_1k = list(c.find({"$and": [cond,cond1]}))

### 9. Order all the companies by their IPO price in a descending order.

In [157]:
# IPO: initial public offering
ipo = list(c.find().sort('IPO',-1))

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [151]:
proy = {'name':1,'number_of_employees':1}
cond = {"number_of_employees": {"$gte": 0}}
max_empl = list(c.find(cond,proy).sort('number_of_employees',1).limit(10))

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [153]:
cond = {"founded_month": {'$gt':6}}
fo_sec_sem = list(c.find(cond).limit(1000))

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [155]:
cond = {"founded_year": {'$lt':2000}}
cond1 = {'acquisition' : {"$gt": 100000000}}
b_2000_acq_100k = list(c.find({"$and": [cond,cond1]}))

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [156]:
proy = {"name": 1, "acquisition": 1}
cond = {"founded_year": {'$gt':2010}}
aft_2010_acq = list(c.find(cond,proy).sort('acquisition',1))

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [158]:
proy = {"name": 1, "founded_year": 1}
f_year_order = list(c.find(proy).sort('founded_year',1))

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [159]:
cond = {"founded_day": {'$lte':7}}
fo_7_day = list(c.find(cond).sort('acquisition',-1).limit(10))

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [160]:
cond = {"category": 'web'}
cond1 = {"number_of_employees": {"$gt": 4000}}
cat_web = list(c.find({"$and": [cond,cond1]}).sort("number_of_employees",1))

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [162]:
cond = {'acquisition' : {"$gt": 100000000}}
cond1 = {"currency": 'EUR'}
acq_100k_curr_eu = list(c.find({"$and": [cond,cond1]}))

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [164]:
proy = {'name':1,'acquisition':1}
cond = {'acquired_month' : {"$lte": 3}}
acq_first_tri = list(c.find(cond,proy).limit(10))

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [177]:
cond = {'acquired_year' : {"$gte": 2011}}
cond1 = {"founded_year": {'$gte':2000}}
cond2 = {"founded_year": {'$gte':2010}}

bonus1 = list(c.find({'$and':[cond, cond1, cond2]}))

### 20. All the companies that have been 'deadpooled' after the third year.

In [178]:
bonus2 = list(c.find({'deadpool_year':{'$gt':3}}))