# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
#%pip install pymongo
from pymongo import MongoClient
cursor = MongoClient("mongodb://localhost:27017/")
cursor

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [2]:
db=cursor.companies
col=db.companies

In [3]:
cursor.list_database_names()
db.list_collection_names()


['companies']

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [11]:
# Your Code 
list(col.find({'name': 'Babelgum'}, {'name': 1}))

[{'_id': ObjectId('52cdef7c4bab8bd675297da0'), 'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [12]:
# Your Code
query = {'number_of_employees': {'$gt': 5000}}
list(col.find(query).sort('number_of_employees', -1).limit(20))

[{'_id': ObjectId('52cdef7d4bab8bd67529941a'),
  'name': 'Siemens',
  'permalink': 'siemens',
  'crunchbase_url': 'http://www.crunchbase.com/company/siemens',
  'homepage_url': 'http://www.siemens.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'Siemens',
  'category_code': 'hardware',
  'number_of_employees': 405000,
  'founded_year': 1847,
  'founded_month': None,
  'founded_day': None,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'automation, building-technologies, drive-technology, energy',
  'alias_list': '',
  'email_address': 'contact@siemens.com',
  'phone_number': '49 89 636 34134',
  'description': 'Electronics and Electrical Engineering',
  'created_at': 'Thu Jul 31 09:29:43 UTC 2008',
  'updated_at': 'Thu Nov 28 20:32:55 UTC 2013',
  'overview': '<p>Siemens AG, an electronics and electrical engineering company, operates in the industry, energy, and healthcare sectors worldwide. 

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [18]:
# Your Code
query = {'$and': [{'founded_year': {'$gt': 1999}},
                  {'founded_year': {'$lt': 2501}}]}
list(col.find(query, {'name': 1}).limit(2))


[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8c'), 'name': 'Zoho'}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [19]:
# Your Code
query = {'$and': [{'ipo.valuation_amount': {'$gt': 100000000}},
                  {'founded_year': {'$lt': 2010}}]}
list(col.find(query, {'name': 1, 'ipo': 1}).limit(2))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8e'),
  'name': 'Facebook',
  'ipo': {'valuation_amount': 104000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 5,
   'pub_day': 18,
   'stock_symbol': 'NASDAQ:FB'}},
 {'_id': ObjectId('52cdef7c4bab8bd675297d94'),
  'name': 'Twitter',
  'ipo': {'valuation_amount': 18100000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2013,
   'pub_month': 11,
   'pub_day': 7,
   'stock_symbol': 'NYSE:TWTR'}}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [23]:
# Your Code
query = {'$and': [{'number_of_employees': {'$lt': 1000}},
                  {'founded_year': {'$lt': 2005}}]}
list(col.find(query).sort('number_of_employees', -1).limit(10))

[{'_id': ObjectId('52cdef7d4bab8bd675298933'),
  'name': 'Infinera Corporation',
  'permalink': 'infinera',
  'crunchbase_url': 'http://www.crunchbase.com/company/infinera',
  'homepage_url': 'http://www.infinera.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'InfineraCorp',
  'category_code': 'network_hosting',
  'number_of_employees': 974,
  'founded_year': 2000,
  'founded_month': None,
  'founded_day': None,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': None,
  'alias_list': 'Zepton Networks',
  'email_address': '',
  'phone_number': '408-572-5200',
  'description': 'Optical Networking Systems',
  'created_at': 'Tue Apr 29 19:43:09 UTC 2008',
  'updated_at': 'Tue Jan 10 22:07:21 UTC 2012',
  'overview': '<p>Infinera Corporation provides optical networking systems based on photonic integration technology in the United States. Its digital transport node (DTN) system utilizes the photonic 

### 6. All the companies that don't include the `partners` field.

In [26]:
# Your Code
query = {'partners': {'$exists': False}}

list(col.find(query))

[]

### 7. All the companies that have a null type of value on the `category_code` field.

In [32]:
# Your Code
query = {'category_code': {'$eq': 'null'}}

list(col.find(query))

[]

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [35]:
# Your Code
query = {'$and': [{'number_of_employees': {'$gte': 100}},
                  {'number_of_employees': {'$lt': 1000}}]}
list(col.find(query, {'name': 1, 'number_of_employees': 1}).limit(5))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8b'),
  'name': 'AdventNet',
  'number_of_employees': 600},
 {'_id': ObjectId('52cdef7c4bab8bd675297da7'),
  'name': 'AddThis',
  'number_of_employees': 120},
 {'_id': ObjectId('52cdef7c4bab8bd675297da8'),
  'name': 'OpenX',
  'number_of_employees': 305},
 {'_id': ObjectId('52cdef7c4bab8bd675297db5'),
  'name': 'LifeLock',
  'number_of_employees': 644},
 {'_id': ObjectId('52cdef7c4bab8bd675297dbb'),
  'name': 'Jajah',
  'number_of_employees': 110}]

### 9. Order all the companies by their IPO price in a descending order.

In [40]:
# Your Code
list(col.find().sort('ipo', -1).limit(2))

[{'_id': ObjectId('52cdef7e4bab8bd67529a8b4'),
  'name': 'GREE',
  'permalink': 'gree',
  'crunchbase_url': 'http://www.crunchbase.com/company/gree',
  'homepage_url': 'http://www.gree-corp.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'gree_corp',
  'category_code': 'games_video',
  'number_of_employees': 700,
  'founded_year': 2004,
  'founded_month': 12,
  'founded_day': 7,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'mobile-web, japan, tokyo, social-network, mobile-social-network, mobile-games',
  'alias_list': None,
  'email_address': 'inquiry@gree-corp.com',
  'phone_number': '',
  'description': 'Internet media business,SNS,  free game',
  'created_at': 'Sat Dec 20 16:42:57 UTC 2008',
  'updated_at': 'Tue Jan 01 21:37:04 UTC 2013',
  'overview': '<p>GREE provides Japan&#8217;s leading mobile social network, and is at the forefront of mobile technology. GREE was ranked as Japan&#82

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [42]:
# Your Code
list(col.find().sort('number_of_employees', -1).limit(10))

[{'_id': ObjectId('52cdef7d4bab8bd67529941a'),
  'name': 'Siemens',
  'permalink': 'siemens',
  'crunchbase_url': 'http://www.crunchbase.com/company/siemens',
  'homepage_url': 'http://www.siemens.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'Siemens',
  'category_code': 'hardware',
  'number_of_employees': 405000,
  'founded_year': 1847,
  'founded_month': None,
  'founded_day': None,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'automation, building-technologies, drive-technology, energy',
  'alias_list': '',
  'email_address': 'contact@siemens.com',
  'phone_number': '49 89 636 34134',
  'description': 'Electronics and Electrical Engineering',
  'created_at': 'Thu Jul 31 09:29:43 UTC 2008',
  'updated_at': 'Thu Nov 28 20:32:55 UTC 2013',
  'overview': '<p>Siemens AG, an electronics and electrical engineering company, operates in the industry, energy, and healthcare sectors worldwide. 

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [44]:
query = {'founded_month': {'$gt': 6}}

list(col.find(query, {'name':1}).limit(1000))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8c'), 'name': 'Zoho'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8d'), 'name': 'Digg'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8f'), 'name': 'Omnidrive'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d9b'), 'name': 'eBay'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d9d'), 'name': 'Joost'},
 {'_id': ObjectId('52cdef7c4bab8bd675297da1'), 'name': 'Plaxo'},
 {'_id': ObjectId('52cdef7c4bab8bd675297da4'), 'name': 'Powerset'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dab'), 'name': 'Kyte'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dae'), 'name': 'Thoof'},
 {'_id': ObjectId('52cdef7c4bab8bd675297daf'), 'name': 'Jingle Networks'},
 {'_id': ObjectId('52cdef7c4bab8bd675297db5'), 'name': 'LifeLock'},
 {'_id': ObjectId('52cdef7c4bab8bd675297db6'), 'name': 'Wesabe'},
 {'_id': ObjectId('52cdef7c4bab8bd675297db8'), 'name': 'SmugMug'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dba'), 'name': 'Google'},
 {

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [64]:
# Your Code
query = {'$and': [{'founded_year': {'$lt': 2000}},
                  {'acquisition.price_amount': {'$gt': 10000}}]}
list(col.find(query, {'name': 1}).limit(5))

[{'_id': ObjectId('52cdef7c4bab8bd675297d90'), 'name': 'Postini'},
 {'_id': ObjectId('52cdef7c4bab8bd675297deb'), 'name': 'SideStep'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e2c'), 'name': 'Recipezaar'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e37'), 'name': 'Cyworld'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e89'), 'name': 'PayPal'}]

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [80]:
query = {'acquisition.acquired_year': {'$gt': 2010}}
list(col.find(query, {'name': 1, 'acquisition':1}).sort('acquisition.price_amount', -1).limit(5))
#, {'name': 1, 'acquisition':1}).sort('acquisition.price_amount', -1).limit(5))

[{'_id': ObjectId('52cdef7c4bab8bd675298876'),
  'name': 'T-Mobile',
  'acquisition': {'price_amount': 39000000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/03/20/in-the-race-for-more-spectrum-att-is-acquiring-t-mobile-for-39-billion/',
   'source_description': 'In The Race For More Spectrum, AT&T Is Acquiring T-Mobile For $39 Billion',
   'acquired_year': 2011,
   'acquired_month': 3,
   'acquired_day': 20,
   'acquiring_company': {'name': 'AT&T', 'permalink': 'at-t'}}},
 {'_id': ObjectId('52cdef7f4bab8bd67529c228'),
  'name': 'Goodrich Corporation',
  'acquisition': {'price_amount': 18400000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://www.masshightech.com/stories/2011/09/19/daily37-UTC-shells-out-184-billion-for-Goodrich.html',
   'source_description': 'UTC shells out $18.4 billion for Goodrich',
   'acquired_year': 2011,
   'acquired_month': 9,
   'acquired_day': 22,
   'acquiring_comp

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [85]:
# Your Code
list(col.find((), {'name': 1, 'founded_year':1}).sort('founded_year', -1).limit(5))

[{'_id': ObjectId('52cdef7c4bab8bd675297fec'),
  'name': 'Fixya',
  'founded_year': 2013},
 {'_id': ObjectId('52cdef7d4bab8bd675298ea7'),
  'name': 'iBazar',
  'founded_year': 2013},
 {'_id': ObjectId('52cdef7c4bab8bd67529830a'),
  'name': 'Fluc',
  'founded_year': 2013},
 {'_id': ObjectId('52cdef7c4bab8bd6752982d4'),
  'name': 'Advaliant',
  'founded_year': 2013},
 {'_id': ObjectId('52cdef7c4bab8bd67529801f'),
  'name': 'Wamba',
  'founded_year': 2013}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [87]:
# Your Code
query = {'founded_day': {'$lte': 7}}
list(col.find(query).sort('acquisition.price_amount', -1).limit(10))

[{'_id': ObjectId('52cdef7d4bab8bd6752989a1'),
  'name': 'Netscape',
  'permalink': 'netscape',
  'crunchbase_url': 'http://www.crunchbase.com/company/netscape',
  'homepage_url': 'http://netscape.aol.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': '',
  'category_code': 'software',
  'number_of_employees': None,
  'founded_year': 1994,
  'founded_month': 4,
  'founded_day': 4,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': None,
  'alias_list': '',
  'email_address': '',
  'phone_number': '',
  'description': '',
  'created_at': 'Tue May 06 00:27:28 UTC 2008',
  'updated_at': 'Thu Nov 14 00:57:06 UTC 2013',
  'overview': '<p>Netscape Communications Corporation offers development, marketing, sale, and support of enterprise software solutions. Its products include e-commerce infrastructure and e-commerce applications targeted primarily at corporate intranets and extranets, and Internet. The co

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [90]:
# Your Code
query = {'$and': [{'category_code': 'web'},
                  {'number_of_employees': {'$gt': 4000}}]}
list(col.find(query, {'name':1}).sort('number_of_employees', -1).limit(5))

[{'_id': ObjectId('52cdef7c4bab8bd6752982a8'), 'name': 'Experian'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d9b'), 'name': 'eBay'},
 {'_id': ObjectId('52cdef7c4bab8bd675297da3'), 'name': 'Yahoo!'},
 {'_id': ObjectId('52cdef7c4bab8bd67529834c'),
  'name': 'Los Angeles Times Media Group'},
 {'_id': ObjectId('52cdef7c4bab8bd675297fcb'), 'name': 'Rakuten'}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [96]:
# Your Code
query = {'$and': [{'acquisition.price_amount': {'$gt': 1000000}},
                  {'price_currency_code': 'EUR'}]}
list(col.find(query, {'name':1}))

[]

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [98]:
# Your Code
query = {'acquisition.acquired_month': {'$lt': 5}}

list(col.find(query, {'name':1, 'acquisition':1}).limit(10))


[{'_id': ObjectId('52cdef7c4bab8bd675297d95'),
  'name': 'StumbleUpon',
  'acquisition': {'price_amount': 29000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2009/04/13/ebay-unacquires-stumbleupon/',
   'source_description': "StumbleUpon Beats Skype In Escaping EBay's Clutches",
   'acquired_year': 2009,
   'acquired_month': 4,
   'acquired_day': None,
   'acquiring_company': {'name': 'StumbleUpon', 'permalink': 'stumbleupon'}}},
 {'_id': ObjectId('52cdef7c4bab8bd675297dab'),
  'name': 'Kyte',
  'acquisition': {'price_amount': None,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/',
   'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million',
   'acquired_year': 2011,
   'acquired_month': 1,
   'acquired_day': 31,
   'acquiring_company': {'name': 'KIT digital', 'permalink':

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [103]:
# Your Code
query = {'$and': [{'founded_year': {'$gt': 2000}},
                  {'founded_year': {'$lt': 2010}},
                  {'acquisition.acquired_year': {'$gt': 2011}}]}
list(col.find(query, {'name':1}).limit(10))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d8d'), 'name': 'Digg'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d91'), 'name': 'Geni'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dbf'), 'name': 'blogTV'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dcb'), 'name': 'Revision3'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dda'), 'name': 'iContact'},
 {'_id': ObjectId('52cdef7c4bab8bd675297ddd'), 'name': 'Mashery'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dee'), 'name': 'Dailymotion'},
 {'_id': ObjectId('52cdef7c4bab8bd675297def'), 'name': 'KickApps'},
 {'_id': ObjectId('52cdef7c4bab8bd675297df4'), 'name': 'Netvibes'}]

### 20. All the companies that have been 'deadpooled' after the third year.

In [133]:
# Your Code
import pandas as pd
filtro={'name':True, 'deadpooled_year':True, 'founded_year':True, '_id':False}
df = pd.DataFrame(col.find((), filtro))
df.head()
#listna = df[df["deadpooled_year"].isna()]
#df = df.drop(listna.index)
#df.founded_year.isna().any()
#df.deadpooled_year.isna().any()
new_df = df[df['deadpooled_year'] == df['founded_year'] + 3]
new_df

Unnamed: 0,name,founded_year,deadpooled_year
5,Omnidrive,2005.0,2008.0
45,Jangl SMS,2005.0,2008.0
114,TripHub,2005.0,2008.0
336,EventVue,2007.0,2010.0
490,CrowdSpirit,2007.0,2010.0
...,...,...,...
15910,Social Collective,2009.0,2012.0
16743,inversearch,2008.0,2011.0
17317,Eclector,2008.0,2011.0
17345,Siruna,2007.0,2010.0
