# Advanced Querying Mongo

Importing libraries and setting up connection

In [None]:
from pymongo import MongoClient
client = MongoClient("localhost:27017")

In [None]:
client

In [None]:
client.list_database_names()

In [None]:
db = client.get_database("Lab")

In [None]:
db

In [None]:
c = db.get_collection("Oficina")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [None]:
proj = {"_id": 0, "name":1}

In [None]:
babelgum = list(c.find({'name' : 'Babelgum'}, proj))

In [None]:
babelgum

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [None]:
big_companies = list(c.find({"number_of_employees":{"$gt": 5000}}).limit(20).sort("number_of_employees", 1))

In [None]:
big_companies

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [None]:
foundation_proj = {"_id": 0, "name":1, "founded year": 1}

In [None]:
foundation = list(c.find({"founded_year" : {"$gte": 2000}, "founded_year" : {"$lte": 2005}}, proj))

In [None]:
foundation

In [None]:
foundation_2 = list(c.find({"$and": [{"founded_year" : {"$gte": 2000}, "founded_year" : {"$lte": 2005}}]}, proj))

In [None]:
foundation_2

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [None]:
ipo_proj = {"_id": 0, "name": 1, "ipo": 1}

In [None]:
ipo = list(c.find({"&and": [{"ipo.valuation_amount": {"$gt": 100000000},"founded_year" : {"$lt" : 2010}}]}, ipo_proj))

In [None]:
ipo2 = list(c.find({"ipo.valuation_amount": {"$gt": 100000000},"founded_year" : {"$lt" : 2010}}, ipo_proj))

In [None]:
ipo2

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [None]:
small_companies = list(c.find({"number_of_employees": {"$lt" :1000}, 
                               "founded_year": {"$lt" : 2005}}).limit(10).sort("number_of_employees"))

In [None]:
small_companies

### 6. All the companies that don't include the `partners` field.

In [None]:
lista = ["None"]

In [None]:
no_partners = list(c.find({"partners": {"$nin": lista}}))

In [None]:
no_partners

### 7. All the companies that have a null type of value on the `category_code` field.

In [None]:
# This is what I understood from the documentation of Mongo, in the section of query for null and missing fields. 
# The list is empty, I am not sure if it is correct. 
null_type = list(c.find({"category_code": "null"}))

In [None]:
null_type

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [None]:
proj_employees = {"_id": 0, "number_of_employees" : 1}

In [None]:
employees = list(c.find({"$and": [{"number_of_employees" : {"$gte": 100}, "number_of_employees" : {"$lte": 1000}}]}, proj_employees))

In [None]:
employees

### 9. Order all the companies by their IPO price in a descending order.

In [None]:
ipo = list(c.find({"ipo.valuation_amount": {"$exists": True}}).sort("ipo.valuation_amount", -1))

In [None]:
ipo

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [None]:
biggest_c = list(c.find().sort("number_of_employees", -1).limit(10))

In [None]:
biggest_c

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [None]:
second_semester = list(c.find({"founded_month" : {"$gte" : 6}}).limit(1000))

In [None]:
second_semester

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [None]:
earlier_2000 = list(c.find({"$and": [{"founded_year" : {"$lt": 2000}, "ipo.valuation_amount" : {"$gt": 1000000}}]}))

In [None]:
earlier_2000

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [None]:
proj_acquired = {"_id": 0, "name" : 1, "acquisition" : 1}

In [None]:
after_2010 = list(c.find({"acquisition.acquired_year" : {"$gt": 2010}}, proj_acquired).sort("acquisition.price_amount"))

In [None]:
after_2010

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [None]:
proj_name_year = {"_id": 0, "name" : 1, "founded_year" : 1}

In [None]:
founded_year = list(c.find({}, proj_name_year).sort("founded_year", -1))

In [None]:
founded_year

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [None]:
first_week = list(c.find({"founded_day" : {"$lte" : 7}}).sort("acquisition.price_amount", -1))

In [None]:
first_week

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [None]:
web = list(c.find({"category_code" : "web", "number_of_employees":{"$gt": 5000}}).sort("number_of_employees", -1))

In [None]:
web

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [None]:
currency = list(c.find({"acquisition.price_amount" : {"$gt": 10000000}, "acquisition.price_currency_code" : 'EUR'}))

In [None]:
currency

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [None]:
proj_f_trim = {"_id" : 0, "name": 1, "acquisition" : 1}

In [None]:
f_trim = list(c.find({"acquisition.acquired_month" : {"$lte" : 3}}, proj_f_trim).limit(10))

In [None]:
f_trim

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [None]:
founded_b_2000_2010 = list(c.find({"founded_year" : {"$gte" : 2000}, "founded_year" : {"$lte": 2010}, "acquisition.acquired_year" :{"$lt": 2011}}))

In [None]:
founded_b_2000_2010

### 20. All the companies that have been 'deadpooled' after the third year.

In [None]:
deadpooled = list(c.find({"deadpooled_year" : {"$gt": 3}}))

In [None]:
deadpooled