# Advanced Querying Mongo

Importing libraries and setting up connection

In [4]:
from pymongo import MongoClient
client = MongoClient("localhost:27017")

In [5]:
client.list_database_names()

['admin', 'config', 'ironhack', 'local']

In [6]:
db_companies = client.get_database('ironhack')

In [8]:
db_companies.list_collection_names()

['restaurants', 'companies']

In [9]:
db_companies = db_companies.get_collection('companies')

In [20]:
db_companies.find_one()

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
 'name': 'Wetpaint',
 'permalink': 'abc2',
 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
 'homepage_url': 'http://wetpaint-inc.com',
 'blog_url': 'http://digitalquarters.net/',
 'blog_feed_url': 'http://digitalquarters.net/feed/',
 'twitter_username': 'BachelrWetpaint',
 'category_code': 'web',
 'number_of_employees': 47,
 'founded_year': 2005,
 'founded_month': 10,
 'founded_day': 17,
 'deadpooled_year': 1,
 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
 'alias_list': '',
 'email_address': 'info@wetpaint.com',
 'phone_number': '206.859.6300',
 'description': 'Technology Platform Company',
 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for di

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [12]:
db_companies.find_one().keys()

dict_keys(['_id', 'name', 'permalink', 'crunchbase_url', 'homepage_url', 'blog_url', 'blog_feed_url', 'twitter_username', 'category_code', 'number_of_employees', 'founded_year', 'founded_month', 'founded_day', 'deadpooled_year', 'tag_list', 'alias_list', 'email_address', 'phone_number', 'description', 'created_at', 'updated_at', 'overview', 'image', 'products', 'relationships', 'competitions', 'providerships', 'total_money_raised', 'funding_rounds', 'investments', 'acquisition', 'acquisitions', 'offices', 'milestones', 'video_embeds', 'screenshots', 'external_links', 'partners'])

In [16]:
db_companies.find_one({'name':'Babelgum'})['name']

'Babelgum'

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [62]:
list(db_companies.find({'number_of_employees': {'$gt':5000}}, {'name':1}).limit(20))

[{'_id': ObjectId('52cdef7c4bab8bd675297d8e'), 'name': 'Facebook'},
 {'_id': ObjectId('52cdef7c4bab8bd675297d9b'), 'name': 'eBay'},
 {'_id': ObjectId('52cdef7c4bab8bd675297da2'), 'name': 'Cisco'},
 {'_id': ObjectId('52cdef7c4bab8bd675297da3'), 'name': 'Yahoo!'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dba'), 'name': 'Google'},
 {'_id': ObjectId('52cdef7c4bab8bd675297dc4'), 'name': 'Intel'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e49'), 'name': 'Nintendo'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e5d'), 'name': 'Adobe Systems'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e6f'), 'name': 'Sony'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e89'), 'name': 'PayPal'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e8e'),
  'name': 'The Walt Disney Company'},
 {'_id': ObjectId('52cdef7c4bab8bd675297e96'), 'name': 'AOL'},
 {'_id': ObjectId('52cdef7c4bab8bd675297ea4'), 'name': 'Webkinz'},
 {'_id': ObjectId('52cdef7c4bab8bd675297ee9'), 'name': 'Sun Microsystems'},
 {'_id': ObjectId('52cdef7c4bab8bd675297fc2'

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [73]:
# founded_year
proj = {'_id':0,'name':1, 'founded_year':1}
c1 = {'founded_year': {'$gt':1999}}
c2 = {'founded_year': {'$lt':2006}}

list(db_companies.find({'$and':[c1, c2]}, proj).limit(3))

[{'name': 'Wetpaint', 'founded_year': 2005},
 {'name': 'Zoho', 'founded_year': 2005},
 {'name': 'Digg', 'founded_year': 2004}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [72]:
# gt 100000 valuation_amount

proj = {'_id':0,'name':1, 'ipo':1}
c1 = {'ipo.valuation_amount': {'$gt': 100000000}}
c2 = {'founded_year': {'$lt':2011}}

list(db_companies.find({'$and':[c1,c2]}, proj).limit(3))

[{'name': 'Facebook',
  'ipo': {'valuation_amount': 104000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 5,
   'pub_day': 18,
   'stock_symbol': 'NASDAQ:FB'}},
 {'name': 'Twitter',
  'ipo': {'valuation_amount': 18100000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2013,
   'pub_month': 11,
   'pub_day': 7,
   'stock_symbol': 'NYSE:TWTR'}},
 {'name': 'Yelp',
  'ipo': {'valuation_amount': 1300000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 3,
   'pub_day': 2,
   'stock_symbol': 'NYSE:YELP'}}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [75]:
# Your Code
proj = {'_id':0, 'name':1, 'number_of_employees': 1}
c1 = {'number_of_employees': {'$lt':1000}}
c2 = {'founded_year': {'$lt':2006}}

list(db_companies.find({'$and':[c1,c2]}, proj).limit(10).sort('number_of_employees', -1))

[{'name': 'Infinera Corporation', 'number_of_employees': 974},
 {'name': 'Box', 'number_of_employees': 950},
 {'name': 'NorthPoint Communications Group', 'number_of_employees': 948},
 {'name': '888 Holdings', 'number_of_employees': 931},
 {'name': 'Forrester Research', 'number_of_employees': 903},
 {'name': 'SonicWALL', 'number_of_employees': 900},
 {'name': 'Webmetrics', 'number_of_employees': 900},
 {'name': 'Workday', 'number_of_employees': 900},
 {'name': 'Cornerstone OnDemand', 'number_of_employees': 881},
 {'name': 'Mozilla', 'number_of_employees': 800}]

### 6. All the companies that don't include the `partners` field.

In [76]:
# Your Code
proj = {'_id':0, 'name':1}

list(db_companies.find({'partners': {'$exists': False}}, proj).limit(10))

[]

### 7. All the companies that have a null type of value on the `category_code` field.

In [77]:
proj = {'_id':0, 'name':1}

list(db_companies.find({'category_code': {'$type': 'null'}}, proj).limit(10))

[{'name': 'Collective'},
 {'name': 'Snimmer'},
 {'name': 'KoolIM'},
 {'name': 'Level9 Media'},
 {'name': 'VidKing'},
 {'name': 'Drigg'},
 {'name': 'SpaceTime'},
 {'name': 'Touch Clarity'},
 {'name': 'MMDAYS'},
 {'name': 'Inside Group'}]

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [82]:
# Your Code

proj = {'_id':0, 'name':1, 'number_of_employees': 1}

c1 = {'number_of_employees': {'$gt':99}}
c2 = {'number_of_employees': {'$lt':1001}}

list(db_companies.find({'$and':[c1,c2]}, proj).sort('number_of_employees', -1).limit(5))

[{'name': 'Omniture', 'number_of_employees': 1000},
 {'name': 'Telenav', 'number_of_employees': 1000},
 {'name': 'Akamai Technologies', 'number_of_employees': 1000},
 {'name': 'Sonus Networks', 'number_of_employees': 1000},
 {'name': 'Yodle', 'number_of_employees': 1000}]

### 9. Order all the companies by their IPO price in a descending order.

In [95]:
# Your Code

proj = {'_id':0,'name':1, 'ipo.valuation_amount':1}

list(db_companies.find({'ipo.valuation_amount': {'$gt':0}}, proj).sort('ipo.valuation_amount', 1).limit(5))

[{'name': 'DeNA', 'ipo': {'valuation_amount': 117900}},
 {'name': 'DeNA', 'ipo': {'valuation_amount': 117900}},
 {'name': 'GT Solar', 'ipo': {'valuation_amount': 5000000}},
 {'name': 'Vringo', 'ipo': {'valuation_amount': 11003200}},
 {'name': 'Energy and Power Solutions', 'ipo': {'valuation_amount': 25000000}}]

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [96]:
# Your Code

proj = {'_id':0,'name':1, 'number_of_employees':1}

list(db_companies.find({'number_of_employees': {'$gt':0}}, proj).sort('number_of_employees', -1).limit(10))

[{'name': 'Siemens', 'number_of_employees': 405000},
 {'name': 'IBM', 'number_of_employees': 388000},
 {'name': 'Toyota', 'number_of_employees': 320000},
 {'name': 'PayPal', 'number_of_employees': 300000},
 {'name': 'Nippon Telegraph and Telephone Corporation',
  'number_of_employees': 227000},
 {'name': 'Samsung Electronics', 'number_of_employees': 221726},
 {'name': 'Accenture', 'number_of_employees': 205000},
 {'name': 'Tata Consultancy Services', 'number_of_employees': 200300},
 {'name': 'Flextronics International', 'number_of_employees': 200000},
 {'name': 'Safeway', 'number_of_employees': 186000}]

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [101]:
# founded_month

proj = {'_id':0,'name':1, 'founded_month': 1}

list(db_companies.find({'founded_month': {'$gt':6}}, proj).sort('founded_month', -1).limit(10))

[{'name': 'Ikan', 'founded_month': 12},
 {'name': 'Swivel', 'founded_month': 12},
 {'name': 'FlickIM', 'founded_month': 12},
 {'name': 'blinkx', 'founded_month': 12},
 {'name': 'Kyte', 'founded_month': 12},
 {'name': 'hi5', 'founded_month': 12},
 {'name': 'Socialtext', 'founded_month': 12},
 {'name': 'Sway', 'founded_month': 12},
 {'name': 'Wesabe', 'founded_month': 12},
 {'name': 'Thoof', 'founded_month': 12}]

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [106]:
# Your Code
proj = {'_id':0,'name':1, 'acquisition.price_amount':1}
c1 = {'founded_year': {'$lt':2000}}
c2 = {'acquisition.price_amount': {'$gt': 10000}}

list(db_companies.find({'$and':[c1,c2]}, proj).sort('acquisition.price_amount', -1).limit(5))

[{'name': 'BEA Systems', 'acquisition': {'price_amount': 8500000000}},
 {'name': 'Navteq', 'acquisition': {'price_amount': 8100000000}},
 {'name': 'Sun Microsystems', 'acquisition': {'price_amount': 7400000000}},
 {'name': 'Pixar', 'acquisition': {'price_amount': 7400000000}},
 {'name': 'LSI', 'acquisition': {'price_amount': 6600000000}}]

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [107]:
proj = {'_id':0,'name':1, 'acquisition.price_amount':1}

list(db_companies.find({'acquisition.price_amount': {'$gt':0}}, proj).sort('acquisition.price_amount', -1).limit(5))

[{'name': 'T-Mobile', 'acquisition': {'price_amount': 39000000000}},
 {'name': 'Nextel Communications',
  'acquisition': {'price_amount': 35000000000}},
 {'name': 'Goodrich Corporation',
  'acquisition': {'price_amount': 18400000000}},
 {'name': 'BEA Systems', 'acquisition': {'price_amount': 8500000000}},
 {'name': 'Navteq', 'acquisition': {'price_amount': 8100000000}}]

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [108]:
proj = {'_id':0, 'name':1, 'founded_year': 1}

list(db_companies.find({'founded_year':{'$gt':0}}, proj).limit(10).sort('founded_year', -1))

[{'name': 'Wamba', 'founded_year': 2013},
 {'name': 'Gimigo', 'founded_year': 2013},
 {'name': 'Clowdy', 'founded_year': 2013},
 {'name': 'Fluc', 'founded_year': 2013},
 {'name': 'Advaliant', 'founded_year': 2013},
 {'name': 'Pikk', 'founded_year': 2013},
 {'name': 'WhosCall', 'founded_year': 2013},
 {'name': 'SEOGroup', 'founded_year': 2013},
 {'name': 'iBazar', 'founded_year': 2013},
 {'name': 'Fixya', 'founded_year': 2013}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [111]:
# founded_day

proj = {'_id':0, 'name':1, 'founded_day': 1}

list(db_companies.find({'founded_day':{'$lte':7}}, proj).limit(10).sort('acquisition.price_amount', -1))

[{'name': 'Netscape', 'founded_day': 4},
 {'name': 'PayPal', 'founded_day': 1},
 {'name': 'Zappos', 'founded_day': 1},
 {'name': 'Alibaba', 'founded_day': 1},
 {'name': 'Postini', 'founded_day': 2},
 {'name': 'Danger', 'founded_day': 1},
 {'name': 'Clearwell Systems', 'founded_day': 6},
 {'name': 'PrimeSense', 'founded_day': 1},
 {'name': 'Amobee', 'founded_day': 1},
 {'name': 'Vitrue', 'founded_day': 1}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [122]:
# category_code

proj = {'_id':0, 'name':1,'number_of_employees':1}
c1 = {'category_code': 'web'}
c2 = {'number_of_employees': {'$gt':4000}}


list(db_companies.find({'$and':[c1,c2]}, proj).limit(10).sort('number_of_employees', 1))

[{'name': 'Expedia', 'number_of_employees': 4400},
 {'name': 'AOL', 'number_of_employees': 8000},
 {'name': 'Webkinz', 'number_of_employees': 8657},
 {'name': 'Rakuten', 'number_of_employees': 10000},
 {'name': 'Los Angeles Times Media Group', 'number_of_employees': 10000},
 {'name': 'Groupon', 'number_of_employees': 10000},
 {'name': 'Yahoo!', 'number_of_employees': 13600},
 {'name': 'eBay', 'number_of_employees': 15000},
 {'name': 'Experian', 'number_of_employees': 15500}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [128]:
proj = {'_id':0,'name':1, 'acquisition.price_amount':1}

c1 = {'acquisition.price_amount': {'$gt':10000000}}
c2 = {'acquisition.price_currency_code': 'EUR'}

list(db_companies.find({'$and':[c1,c2]}, proj).limit(10).sort('acquisition.price_amount', -1))

[{'name': 'Apertio', 'acquisition': {'price_amount': 140000000}},
 {'name': 'Webedia', 'acquisition': {'price_amount': 70000000}},
 {'name': 'Tuenti Technologies', 'acquisition': {'price_amount': 70000000}},
 {'name': 'BioMed Central', 'acquisition': {'price_amount': 43400000}},
 {'name': 'Greenfield Online', 'acquisition': {'price_amount': 40000000}},
 {'name': 'ZYB', 'acquisition': {'price_amount': 31500000}},
 {'name': 'Wayfinder', 'acquisition': {'price_amount': 24000000}}]

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [131]:
# acquired_month
proj = {'_id':0,'name':1, 'acquisition':1}

list(db_companies.find({'acquisition.acquired_month':{'$lte':3}}, proj).limit(10).sort('acquisition.price_amount', -1))

[{'name': 'T-Mobile',
  'acquisition': {'price_amount': 39000000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/03/20/in-the-race-for-more-spectrum-att-is-acquiring-t-mobile-for-39-billion/',
   'source_description': 'In The Race For More Spectrum, AT&T Is Acquiring T-Mobile For $39 Billion',
   'acquired_year': 2011,
   'acquired_month': 3,
   'acquired_day': 20,
   'acquiring_company': {'name': 'AT&T', 'permalink': 'at-t'}}},
 {'name': 'BEA Systems',
  'acquisition': {'price_amount': 8500000000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://www.oracle.com/corporate/press/2008_jan/bea.html',
   'source_description': 'Oracle to Acquire BEA Systems',
   'acquired_year': 2008,
   'acquired_month': 1,
   'acquired_day': 16,
   'acquiring_company': {'name': 'Oracle Corporation',
    'permalink': 'oracle'}}},
 {'name': 'Pixar',
  'acquisition': {'price_amount': 7400000000,
   'price_currency_code': 'U

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [138]:
proj = {'_id':0,'name':1}
c1 = {'founded_year': {'$gte':2000}}
c2 = {'founded_year': {'$lte':2010}}
c3 = {'acquisition.acquired_year': {'$gte':2011}}

list(db_companies.find({'$and':[c1, c2, c3]}, proj).limit(10))

[{'name': 'Wetpaint'},
 {'name': 'Digg'},
 {'name': 'Geni'},
 {'name': 'Kyte'},
 {'name': 'Jingle Networks'},
 {'name': 'blogTV'},
 {'name': 'delicious'},
 {'name': 'Revision3'},
 {'name': 'iContact'},
 {'name': 'Mashery'}]

### 20. All the companies that have been 'deadpooled' after the third year.

In [160]:
#deadpool year
# deadpooled_year

proj = {'_id':0,'deadpooled_year':1}

c1 = {'$founded_year':{'$gt':0}}
c2 = {'deadpooled_year':{'$gt':3}}
c3 = {'$substract': [c1, c2]}

list(db_companies.find({c3.values():{'$lte':3}}, proj).limit(3))

InvalidDocument: documents must have only string keys, key was dict_values([[{'$founded_year': {'$gt': 0}}, {'deadpooled_year': {'$gt': 3}}]])