# Advanced Querying Mongo

Importing libraries and setting up connection

In [4]:
from pymongo import MongoClient
import pandas as pd
import time

client = MongoClient("localhost:27017")
db = client["ironhack"]
c=db.get_collection("Crunchbase")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [128]:
condition={"name":"Babelgum"}
projection = {"_id":0,"name":1}

list(c.find(condition, projection))

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [169]:
filter_ = {"number_of_employees":{"$gt": 5000}}

result_2 = list(c.find(filter_).sort("number_of_employees",-1).limit(20))


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [170]:
filter_1 = {"founded_year":{ "$gte": 2000}}
filter_2 = {"founded_year":{ "$lte": 2005}}
projection = {"_id":0,"name":1, "founded_year":1}

result_3 = list(c.find({"$and": [filter_1, filter_2]}, projection))


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [171]:
filter_1 = {"ipo.valuation_amount":{ "$gt": 100000000}}
filter_2 = {"founded_year":{ "$lt": 2010}}
projection = {"_id":0,"name":1, "ipo":1}

result_4 = list(c.find({"$and": [filter_1, filter_2]}, projection))


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [172]:
filter_1 = {"number_of_employees":{ "$lt": 1000}}
filter_2 = {"founded_year":{ "$lt": 2005}}
projection = {"_id":0,"name":1}

result_5 = list(c.find({"$and": [filter_1, filter_2]}, projection).sort("number_of_employees",-1).limit(10))


### 6. All the companies that don't include the `partners` field.

In [173]:
result_6 = list(c.find({"partners": {"$exists":False}}))


### 7. All the companies that have a null type of value on the `category_code` field.

In [174]:
projection = {"_id":0,"name":1}


result_7 = list(c.find({"category_code": {"$type":10}},projection))


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [175]:
filter_1 = {"number_of_employees":{ "$gte": 100}}
filter_2 = {"number_of_employees":{ "$lt": 1000}}
projection = {"_id":0,"name":1, "number_of_employees":1}


result_8 = list(c.find({"$and": [filter_1, filter_2]}, projection))


### 9. Order all the companies by their IPO price in a descending order.

In [176]:
filter_1 = {"ipo.valuation_amount": {"$exists":True}}
projection = {"_id":0,"name":1}

result_9 = list(c.find(filter_1, projection).sort("ipo.valuation_amount", -1))


In [137]:
len(result)

254

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [177]:

filter_1 = {"number_of_employees":{ "$gt": 0}}
projection = {"_id":0,"name":1, "number_of_employees":1}

result_10 = list(c.find(filter_1, projection).sort("number_of_employees", -1))[:10]


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [178]:

filter_2 = {"founded_month":{ "$lt": 6}}
projection = {"_id":0,"name":1}


result_11 = list(c.find({"$and": [filter_2]}, projection).limit(1000))


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [179]:
filter_1 = {"acquisition.price_amount":{ "$gt": 10000}}
filter_2 = {"founded_year":{ "$lt": 2000}}
projection = {"_id":0,"name":1}


result_12 = list(c.find({"$and": [filter_1, filter_2]}, projection))


In [141]:
len(result)

254

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [180]:
filter_1 = {"acquisition.acquired_year":{ "$gt": 2010}}
projection = {"_id":0,"name":1, "acquisition":1}


result_13 = list(c.find(filter_1, projection).sort("acquisition.price_amount",-1))


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [181]:
projection = {"_id":0,"name":1, "founded_year":1}

result_14 = list(c.find({}, projection).sort("founded_year",-1))


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [182]:
filter_1 = {"founded_day":{ "$lte": 7}}
projection = {"_id":0,"name":1, "acquisition":1}


result_15 = list(c.find(filter_1, projection).sort("acquisition.price_amount",-1).limit(10))


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [183]:
filter_1 = {"category_code":"web"}
filter_2 = {"number_of_employees":{ "$gt": 4000}}

projection = {"_id":0,"name":1, "number_of_employees":1}


result_16 = list(c.find({"$and": [filter_1, filter_2]}, projection).sort("number_of_employees",-1))


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [184]:
filter_1 = {"acquisition.price_currency_code":"EUR"}
filter_2 = {"acquisition.price_amount":{ "$gt": 10000000}}

projection = {"_id":0,"name":1}


result_17 = list(c.find({"$and": [filter_1, filter_2]}, projection).sort("number_of_employees",-1))



### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [185]:
filter_ = {"acquisition.acquired_month":{ "$lte": 3}}

projection = {"_id":0,"name":1, "acquisition":1}


result_18 = list(c.find(filter_, projection).limit(10))


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [186]:
filter_1 = {"founded_year":{"$gt":2000}}
filter_2 = {"founded_year":{"$lt":2010}}
filter_3 = {"acquisition.acquired_year":{ "$gt": 2011}}

projection = {"_id":0,"name":1}


result_b19 = list(c.find({"$and": [filter_1, filter_2, filter_3]}, projection))
result_b19

In [188]:
len(result_b19)

254

### 20. All the companies that have been 'deadpooled' after the third year.

In [187]:
projection = {"_id":0,"name":1, "deadpooled_year":1, "founded_year":1 }


result_b20 = list(c.find({ "$where": "this.deadpooled_year - this.founded_year > 3"}, projection))
