# Advanced Querying Mongo

Importing libraries and setting up connection

In [132]:
from pymongo import MongoClient
client = MongoClient("localhost:27017")
import pandas as pd

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [64]:
db = client["Ironhack"]
c = db.get_collection("Companies")
name = c.find_one({"name":"Babelgum"}, {"name":1, "_id":0})
# c.count_documents({"name":"Babelgum"})
#name

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [66]:
filter_ = {"number_of_employees": {"$gt":5000}}
employe =list(c.find(filter_).limit(20).sort("number_of_employees", 1))
# employe

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [4]:
filter_2 = {"founded_year": {"$lte":2005}}
filter_3 = {"founded_year": {"$gte":2000}}
projection = {"name":1, "_id":0, "founded_year":1,}
# list(c.find({"$and": [filter_2, filter_3]}, projection))

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [5]:
filter_2 = {"founded_year": {"$lte":2005}}
filter_3 = {"ipo.valuation_amount": {"$gt":100000000}}
projection = {"name":1, "_id":0, "ipo":1,}
# list(c.find({"$and": [filter_2, filter_3]}, projection))

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [6]:
filter_2 = {"number_of_employees": {"$lte":1000}}
filter_3 = {"founded_year": {"$lte":2005}}
projection = {"name":1, "_id":0, "founded_year":1,"number_of_employees":1}
# list(c.find({"$and": [filter_2, filter_3]}, projection).sort("number_of_employees", 1).limit(10))

### 6. All the companies that don't include the `partners` field.

In [7]:
# list(c.find({"partners": "$nin"}))

### 7. All the companies that have a null type of value on the `category_code` field.

In [8]:
# list(c.find({"category_code": {"$type":"null"}}))

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [9]:
filter_2 = {"number_of_employees": {"$gt":100}}
filter_3 = {"number_of_employees": {"$lt":1000}}
projection = {"name":1, "_id":0,"number_of_employees":1}
# list(c.find({"$and": [filter_2, filter_3]}, projection))

### 9. Order all the companies by their IPO price in a descending order.

In [10]:
projection = {"name":1, "_id":0, "ipo": 1}

# list(c.find({}, projection).sort("ipo.valuation_amount", -1))

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [11]:
projection = {"name":1, "_id":0, "number_of_employees": 1}

# list(c.find({}, projection).sort("number_of_employees", -1).limit(10))

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [12]:
filter_2 = {"founded_month": {"$gt":6}}
filter_3 = {"founded_month": {"$lte":12}}
projection = {"name":1, "_id":0,"founded_month":1}
# list(c.find({"$and": [filter_2, filter_3]}, projection).limit(1000))

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [24]:
filter_2 = {"founded_year": {"$lt":2000}}
filter_3 = {"acquisition.price_amount": {"$gt":1000000}}
projection = {"name":1, "_id":0,"founded_year": 1,"acquisition.price_amount":1}
# list(c.find({"$and": [filter_2, filter_3]}, projection))

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [26]:
filter_2 = {"acquisition.acquired_year": {"$gt":2000}}
projection = {"name":1, "_id":0,"acquisition":1}
# list(c.find({"$and": [filter_2, filter_3]}, projection))

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [29]:
projection = {"name":1, "_id":0, "founded_year": 1}

#list(c.find({}, projection).sort("founded_year", -1))
# I did sort -1 on this one; otherwise the founded_year would start by none

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [34]:
filter_2 = {"founded_day": {"$gte":1}}
filter_3 = {"founded_day": {"$lte":7}}
projection = {"name":1, "_id":0,"acquisition.price_amount":1}
list(c.find({"$and": [filter_2, filter_3]}, projection).sort("acquisition.price_amount", -1).limit(10))

[{'name': 'Netscape', 'acquisition': {'price_amount': 4200000000}},
 {'name': 'PayPal', 'acquisition': {'price_amount': 1500000000}},
 {'name': 'Zappos', 'acquisition': {'price_amount': 1200000000}},
 {'name': 'Alibaba', 'acquisition': {'price_amount': 1000000000}},
 {'name': 'Postini', 'acquisition': {'price_amount': 625000000}},
 {'name': 'Danger', 'acquisition': {'price_amount': 500000000}},
 {'name': 'Clearwell Systems', 'acquisition': {'price_amount': 410000000}},
 {'name': 'PrimeSense', 'acquisition': {'price_amount': 345000000}},
 {'name': 'Amobee', 'acquisition': {'price_amount': 321000000}},
 {'name': 'BlueLithium', 'acquisition': {'price_amount': 300000000}}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [43]:
filter_1 = {"category_code": "web"}
filter_2 = {"number_of_employees": {"$gt":4000}}
projection = {"name":1, "_id":0,"category":1}
# list(c.find({"$and": [filter_1, filter_2]}, projection).sort("number_of_employees", 1))

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [53]:
condition = {"$regex": "(€)"}
filter_1 = {"total_money_raised": condition}
filter_2 = {"acquisition.price_amount": {"$gt":10000000}}
projection = {"name":1, "_id":0,"acquisition.price_amount":1}
# list(c.find({"$and": [filter_1, filter_2]}, projection))

# filter_1 = {"acquisition.price_currency_code": "EUR"}
# filter_2 = {"acquisition.price_amount": {"$gt":10000000}}
# projection = {"name":1, "_id":0,"acquisition.price_amount":1}
# list(c.find({"$and": [filter_1, filter_2]}, projection))

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [68]:
filter_2 = {"acquisition.acquired_month": {"$gte":1}}
filter_3 = {"acquisition.acquired_month": {"$lte":3}}
projection = {"name":1, "_id":0,"acquisition":1}
# list(c.find({"$and": [filter_2, filter_3]}, projection).limit(10))




# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [85]:
filter_1 = {"founded_year": {"$gt":2000}}
filter_2 = {"founded_year": {"$lt":2010}}
filter_3 = {"acquisition.acquired_year": {"$gte":2011}}
projection = {"name":1, "_id":0, "founded_year": 1, "acquisition.acquired_year": 1}
# list(c.find({"$and": [filter_1, filter_2, filter_3]}, projection))


### 20. All the companies that have been 'deadpooled' after the third year.

In [178]:
filter1 = {"founded_year": {"$type":"int"}}
filter3 = {"$where": "this.deadpooled_year - this.founded_year > 3"}
filter4 = {"deadpooled_year": {"$gte":3}}

projection = {"name":1, "_id":0, "founded_year": 1, "deadpooled_year": 1}

# a = c.find({"$and": [filter1, filter3, filter4]}, projection)

# df=pd.DataFrame(a)

# df


Unnamed: 0,name,founded_year,deadpooled_year
0,Babelgum,2007,2013
1,Thoof,2006,2013
2,Wesabe,2005,2010
3,Stickam,2006,2013
4,AllPeers,2004,2008
...,...,...,...
432,SpeakSoft,2007,2012
433,Tagito,2008,2012
434,Nordic Windpower,2007,2013
435,Nethra Imaging,2003,2012
