# Libraries

In [1]:
from pymongo import MongoClient
import pandas as pd

# Create Mongo connection

In [2]:
client = MongoClient("localhost:27017")
db = client["Ironhack"]
c = db.get_collection("companies")

# Find startup companies and their countries

A startup has different quantitative characteristics depending on the author. These are the characteristics I will use to define a startup:
 - Has less than 100 employess: `number_of_employees`
 - Developers want to be closer to startups that have raised at least 1 million USD: `total_money_raised`

In [3]:
# employees = list(c.find())[i]["number_of_employees"]
# raised = list(c.find())[i]["total_money_raised"]
c.find()[0]

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
 'name': 'Wetpaint',
 'permalink': 'abc2',
 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
 'homepage_url': 'http://wetpaint-inc.com',
 'blog_url': 'http://digitalquarters.net/',
 'blog_feed_url': 'http://digitalquarters.net/feed/',
 'twitter_username': 'BachelrWetpaint',
 'category_code': 'web',
 'number_of_employees': 47,
 'founded_year': 2005,
 'founded_month': 10,
 'founded_day': 17,
 'deadpooled_year': 1,
 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
 'alias_list': '',
 'email_address': 'info@wetpaint.com',
 'phone_number': '206.859.6300',
 'description': 'Technology Platform Company',
 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for di

In [4]:
condition_1 = {"number_of_employees":{"$lt": 100}}
condition_2 = {"total_money_raised": {"$regex":".+M$"}}
condition_3 = {"total_money_raised": {"$regex":".+B$"}}
projection = {"offices.city": 1, "offices.country_code": 1, "_id": 0}
cities_countries = list(c.find({"$and": [condition_1, {"$or": [condition_2, condition_3]}]}, projection))

In [5]:
cities_countries

[{'offices': [{'city': 'Seattle', 'country_code': 'USA'},
   {'city': 'New York', 'country_code': 'USA'}]},
 {'offices': [{'city': 'San Francisco', 'country_code': 'USA'}]},
 {'offices': [{'city': 'West Hollywood', 'country_code': 'USA'}]},
 {'offices': [{'city': 'San Francisco', 'country_code': 'USA'}]},
 {'offices': [{'city': 'New York City', 'country_code': 'USA'}]},
 {'offices': [{'city': 'New York', 'country_code': 'USA'}]},
 {'offices': [{'city': 'Sunnyvale', 'country_code': 'USA'}]},
 {'offices': [{'city': 'San Francisco', 'country_code': 'USA'}]},
 {'offices': [{'city': 'San Francisco', 'country_code': 'USA'}]},
 {'offices': [{'city': 'Culver City', 'country_code': 'USA'}]},
 {'offices': [{'city': 'San Francisco', 'country_code': 'USA'}]},
 {'offices': [{'city': 'New York', 'country_code': 'USA'}]},
 {'offices': [{'city': 'New York', 'country_code': 'USA'}]},
 {'offices': [{'city': 'Pleasanton', 'country_code': 'USA'}]},
 {'offices': [{'city': 'Luxembourg City', 'country_code':

In [6]:
cities_countries[0]["offices"][1]["city"] # Some copanies have headquarters in different cities and countries
# I will use all of them

'New York'

In [7]:
cities_countries[0]

{'offices': [{'city': 'Seattle', 'country_code': 'USA'},
  {'city': 'New York', 'country_code': 'USA'}]}

In [8]:
cities = [j["city"] for i in cities_countries for j in i["offices"]]
countries = [j["country_code"] for i in cities_countries for j in i["offices"]]

In [9]:
dict_countrycity = {"city": cities, "country": countries}

In [10]:
pd.DataFrame(pd.DataFrame(dict_countrycity)["country"].value_counts()).head()

Unnamed: 0_level_0,count
country,Unnamed: 1_level_1
USA,1067
GBR,121
ISR,43
CAN,40
FRA,36


In [11]:
pd.DataFrame(pd.DataFrame(dict_countrycity)["city"].value_counts()).head()

Unnamed: 0_level_0,count
city,Unnamed: 1_level_1
San Francisco,181
New York,120
London,72
Seattle,42
Palo Alto,40


In [12]:
# Most of the offices of starups with these characteristics are in San Francisco.

Designer wanted to be closer to design companies, let's see where these are in San Francisco:

In [13]:
condition_1 = {"number_of_employees":{"$lt": 100}}
condition_2 = {"tag_list": {"$regex": "[Dd]esign"}}
condition_3 = {"offices.city": "San Francisco"}
condition_4 = {"total_money_raised": {"$regex":".+M$"}}
condition_5 = {"total_money_raised": {"$regex":".+B$"}}

projection = {"offices.latitude": 1, "offices.longitude": 1, "_id": 0}
designSF_coordinates = list(c.find({"$and": [condition_1, condition_2, condition_3, {"$or": [condition_4, condition_5]}]}, projection))

In [14]:
designSF_coordinates # To take into consideration later

[{'offices': [{'latitude': 37.764726, 'longitude': -122.394523}]}]