# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
client = MongoClient()
db=client.companies
colec=db.companies

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [2]:
list(colec.find({'name':'Babelgum'}, {'_id':0, 'name':1}))

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [3]:
print(list(colec.find({'number_of_employees':{'$gte':5000}}).limit(20).sort('number_of_employees',1))[0])

{'_id': ObjectId('52cdef7e4bab8bd67529aa77'), 'name': 'JDS Uniphase Corporation', 'permalink': 'jds-uniphase-corporation', 'crunchbase_url': 'http://www.crunchbase.com/company/jds-uniphase-corporation', 'homepage_url': 'http://www.jdsu.com', 'blog_url': 'http://blogs.jdsu.com/perspectives/default.aspx', 'blog_feed_url': 'http://feeds.feedburner.com/JdsuPerspectivesPosts', 'twitter_username': 'JDSU', 'category_code': 'hardware', 'number_of_employees': 5000, 'founded_year': None, 'founded_month': None, 'founded_day': None, 'deadpooled_year': None, 'deadpooled_month': None, 'deadpooled_day': None, 'deadpooled_url': None, 'tag_list': '', 'alias_list': '', 'email_address': '', 'phone_number': '+1 408 546 5000', 'description': '', 'created_at': 'Sat Jan 03 04:11:21 UTC 2009', 'updated_at': 'Thu Dec 12 04:51:28 UTC 2013', 'overview': '<p>JDSU is the worldwide leading provider of broadband test and measurement solutions and optical products for communications, commercial and consumer markets.<

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [4]:
print(list(colec.find({"founded_year":{"$gte":2000,"$lte":2005}}

,{'_id':0, 'name':1, 'founded_year':1} ))[0])

{'name': 'Wetpaint', 'founded_year': 2005}


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [5]:
print(list(colec.find({'$and':[{'ipo.valuation_amount':{"$gte":2000}},{'founded_year':{"$lte":2010}}]},{'_id':0, 'name':1, 'ipo':1}))[0])

{'name': 'Facebook', 'ipo': {'valuation_amount': 104000000000, 'valuation_currency_code': 'USD', 'pub_year': 2012, 'pub_month': 5, 'pub_day': 18, 'stock_symbol': 'NASDAQ:FB'}}


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [10]:
print(list(colec.find({'$and':[{'founded_year':{"$gt":2005}},{'number_of_employees':{"$lt":1000}}]}).limit(10).sort('number_of_employees',1))[0])

{'_id': ObjectId('52cdef7c4bab8bd675297dd2'), 'name': 'CastTV', 'permalink': 'casttv', 'crunchbase_url': 'http://www.crunchbase.com/company/casttv', 'homepage_url': 'http://www.casttv.com', 'blog_url': '', 'blog_feed_url': '', 'twitter_username': 'casttv', 'category_code': 'games_video', 'number_of_employees': 0, 'founded_year': 2006, 'founded_month': None, 'founded_day': None, 'deadpooled_year': None, 'deadpooled_month': None, 'deadpooled_day': None, 'deadpooled_url': None, 'tag_list': 'videosearch, techcrunch40', 'alias_list': '', 'email_address': 'info@casttv.com', 'phone_number': '', 'description': '', 'created_at': 'Tue Feb 05 22:28:15 UTC 2008', 'updated_at': 'Thu Mar 14 21:33:45 UTC 2013', 'overview': '<p>CastTV is working to build the web&#8217;s best video search engine. CastTV lets users find all their favorite online videos, from TV shows to movies to the latest celebrity, sports, news, and viral videos. The company&#8217;s proprietary technology addresses two main video sea

### 6. All the companies that don't include the `partners` field.

In [16]:
print(list(colec.find({'partners':{'$exists':False}}))[0])

IndexError: list index out of range

### 7. All the companies that have a null type of value on the `category_code` field.

In [17]:
print(list(colec.find({'category_code':None}))[0])

{'_id': ObjectId('52cdef7c4bab8bd6752980f6'), 'name': 'Collective', 'permalink': 'collective', 'crunchbase_url': 'http://www.crunchbase.com/company/collective', 'homepage_url': None, 'blog_url': None, 'blog_feed_url': None, 'twitter_username': None, 'category_code': None, 'number_of_employees': None, 'founded_year': None, 'founded_month': None, 'founded_day': None, 'deadpooled_year': None, 'deadpooled_month': None, 'deadpooled_day': None, 'deadpooled_url': None, 'tag_list': None, 'alias_list': None, 'email_address': None, 'phone_number': None, 'description': None, 'created_at': 'Thu Sep 26 13:15:02 UTC 2013', 'updated_at': 'Thu Sep 26 13:15:02 UTC 2013', 'overview': None, 'image': None, 'products': [], 'relationships': [], 'competitions': [], 'providerships': [], 'total_money_raised': '$0', 'funding_rounds': [], 'investments': [], 'acquisition': None, 'acquisitions': [], 'offices': [], 'milestones': [], 'ipo': None, 'video_embeds': [], 'screenshots': [], 'external_links': [], 'partners

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [18]:
print(list(colec.find({'$and':[{'number_of_employees':{"$gte":100}},{'number_of_employees':{"$lte":1000}}]},{'_id':0, 'name':1, 'number_of_employees':1}))[0])

{'name': 'Zoho', 'number_of_employees': 1600}


### 9. Order all the companies by their IPO price in a descending order.

In [20]:
print(list(colec.find().sort('ipo.price',-1))[0])

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint', 'permalink': 'abc2', 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint', 'homepage_url': 'http://wetpaint-inc.com', 'blog_url': 'http://digitalquarters.net/', 'blog_feed_url': 'http://digitalquarters.net/feed/', 'twitter_username': 'BachelrWetpaint', 'category_code': 'web', 'number_of_employees': 47, 'founded_year': 2005, 'founded_month': 10, 'founded_day': 17, 'deadpooled_year': 1, 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system', 'alias_list': '', 'email_address': 'info@wetpaint.com', 'phone_number': '206.859.6300', 'description': 'Technology Platform Company', 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27), 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013', 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wet

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [21]:
print(list(colec.find().limit(10).sort('number_of_employees',1))[0])

{'_id': ObjectId('52cdef7c4bab8bd675297d92'), 'name': 'Flektor', 'permalink': 'flektor', 'crunchbase_url': 'http://www.crunchbase.com/company/flektor', 'homepage_url': 'http://www.flektor.com', 'blog_url': 'http://www.flektor-blog.com', 'blog_feed_url': 'http://www.flektor-blog.com/video_editing_software/index.rdf', 'twitter_username': None, 'category_code': 'games_video', 'number_of_employees': None, 'founded_year': None, 'founded_month': None, 'founded_day': None, 'deadpooled_year': None, 'deadpooled_month': None, 'deadpooled_day': None, 'deadpooled_url': None, 'tag_list': 'flektor, photo, video', 'alias_list': None, 'email_address': None, 'phone_number': None, 'description': None, 'created_at': 'Thu May 31 21:11:51 UTC 2007', 'updated_at': 'Sat Nov 05 08:42:23 UTC 2011', 'overview': '<p>Flektor is a rich-media mash-up platform that enables consumers to create, remix and share photos and videos on the internet without the need for advanced video-editing skills or software.</p>\n\n<p>

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [23]:
print(list(colec.find({'founded_month':{'$gt':6}}).limit(1000))[0])

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint', 'permalink': 'abc2', 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint', 'homepage_url': 'http://wetpaint-inc.com', 'blog_url': 'http://digitalquarters.net/', 'blog_feed_url': 'http://digitalquarters.net/feed/', 'twitter_username': 'BachelrWetpaint', 'category_code': 'web', 'number_of_employees': 47, 'founded_year': 2005, 'founded_month': 10, 'founded_day': 17, 'deadpooled_year': 1, 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system', 'alias_list': '', 'email_address': 'info@wetpaint.com', 'phone_number': '206.859.6300', 'description': 'Technology Platform Company', 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27), 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013', 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for digital publishers. Wet

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [28]:
print(list(colec.find({'acquisition.price_amount':{'$gt':1e6}}, {'_id':0, 'name':1,'acquisition.price_amount':1}))[0])

{'name': 'Wetpaint', 'acquisition': {'price_amount': 30000000}}


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [31]:
print(list(colec.find({'acquisition.acquired_year':{'$gt':2010}}, {'_id':0, 'name':1,'acquisition':1}).sort('acquisition.price_amount',1))[0])

{'name': 'Geni', 'acquisition': {'price_amount': None, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://techcrunch.com/2012/11/28/all-in-the-family-myheritage-buys-former-yammer-stablemate-geni-com-raises-25m/', 'source_description': 'MyHeritage acquires Geni and $25M to build family tree of the whole world', 'acquired_year': 2012, 'acquired_month': 11, 'acquired_day': 28, 'acquiring_company': {'name': 'MyHeritage', 'permalink': 'myheritage'}}}


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [34]:
print(list(colec.find(projection = {'_id':0, 'name':1,'founded_year':1}).sort('founded_year',-1))[0])

{'name': 'Fixya', 'founded_year': 2013}


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [35]:
print(list(colec.find({'acquisition.acquired_day':{'$lte':7}}, {'_id':0, 'name':1,'acquisition.price_amount':1}).sort('acquisition.price_amount',-1))[0])

{'name': 'National Semiconductor', 'acquisition': {'price_amount': 6500000000}}


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [42]:
print(list(colec.find({'$and':[{'amount_of_employees':{"$gt":4000}},{'category_code':'web'}]},{'_id':0, 'name':1}).sort('number_of_employees',1))[0])


IndexError: list index out of range

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [43]:
print(list(colec.find({'$and':[{'acquisition.price_amount':{"$gt":1e7}},{'price_currency_code':'EUR'}]},{'_id':0, 'name':1,'acquisition.price_amount':1}))[0])

IndexError: list index out of range

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [44]:
print(list(colec.find({'acquisition.acquired_month':{'$lte':4}}, {'_id':0, 'name':1,'acquisition':1}).limit(10))[0])

{'name': 'StumbleUpon', 'acquisition': {'price_amount': 29000000, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://techcrunch.com/2009/04/13/ebay-unacquires-stumbleupon/', 'source_description': "StumbleUpon Beats Skype In Escaping EBay's Clutches", 'acquired_year': 2009, 'acquired_month': 4, 'acquired_day': None, 'acquiring_company': {'name': 'StumbleUpon', 'permalink': 'stumbleupon'}}}


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [46]:
print(list(colec.find({"$and": [{"$and": [{"founded_year":{'$gte':2000}}, {"founded_year":{'$lte':2010}}]},{'acquisition.acquired_year':{'$gte':2011}}]},{'_id':0, 'name':1,'acquisition.acquired_year':1,"founded_year":1}))[0])



{'name': 'Wetpaint', 'founded_year': 2005, 'acquisition': {'acquired_year': 2013}}


### 20. All the companies that have been 'deadpooled' after the third year.

In [54]:
### print(list(colec.find({'deadpooled_year'-'founded_year':{'$gt':3}}, {'_id':0, 'name':1, 'deadpooled_year':1}))[0])


TypeError: unsupported operand type(s) for -: 'str' and 'str'