In [36]:
from pymongo import MongoClient
import pandas as pd
from collections import Counter
import seaborn as sns
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster


In [37]:
client = MongoClient("localhost:27017")
client.list_database_names()


['admin', 'config', 'ironhack', 'local']

In [38]:
db = client.get_database("ironhack")
db.list_collection_names()


['companies']

In [39]:
comps = db.get_collection("companies")
comps

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'ironhack'), 'companies')

In [40]:
comps.count_documents({})

18801

Query the Database

In [41]:
comps_query = comps.find(
    {"$and":[
        {"number_of_employees": {"$gte":20}}, 
        {"number_of_employees": {"$lt": 500}},
        {"category_code": "games_video"},
        {"deadpooled_year": None},
        {
            "$or": [ 
                {"total_money_raised": {"$regex": "[A-Za-z]"}},
                {"total_money_raised" : {"$gte": 1000000}},
                    ]
        },              
            ]
    }, {"_id":0, "name":1, "number_of_employees":1, "deadpooled_year":1,  "category_code": 1, "tag_list":1, "total_money_raised": 1, "offices": 1})

comps_query.count()

  comps_query.count()


75

In [28]:
comps_list = list(comps_query)

In [29]:
df = pd.DataFrame(comps_list)
df.sample(20)

Unnamed: 0,name,category_code,number_of_employees,deadpooled_year,tag_list,total_money_raised,offices
34,MUBI,games_video,20,,"film, movie, cinema, video, art-house, social-...",$8.25M,"[{'description': '', 'address1': '668 High Str..."
60,Unity Technologies,games_video,200,,"3d, interactive-3d, game-development, content-...",$17.5M,"[{'description': 'US Office', 'address1': '795..."
16,Musicshake,games_video,28,,"musicshake, musiccreation, user-generated-musi...",$3.5M,"[{'description': '', 'address1': '3460 Wilshir..."
63,SCVNGR,games_video,60,,"mobile, game, princeton, dreamit, google-ventu...",$40.8M,"[{'description': 'SCVNGR', 'address1': 'One Co..."
1,Pando Networks,games_video,23,,"p2p, video, streaming, download, cdn",$11M,"[{'description': None, 'address1': '520 Broadw..."
10,Aniboom,games_video,25,,"animation-production, cartoon-production, cart...",$14.5M,"[{'description': None, 'address1': None, 'addr..."
29,Mytopia,games_video,29,,"techcrunch50, tc50, gaming, social-networks",$3.5M,"[{'description': '', 'address1': '', 'address2..."
7,Curse,games_video,58,,"mmo, curse",$12M,"[{'description': 'San Francisco', 'address1': ..."
53,RGB Networks,games_video,125,,"multiscreen-ip-video, video-processing, transc...",$52M,"[{'description': 'HQ', 'address1': '390 West J..."
15,hulu,games_video,153,,"hulu, video, embeddable, professional, content...",$100M,"[{'description': None, 'address1': '12312 W. O..."


In [30]:
city_list = list()
for i, offices in enumerate(df["offices"]):
    for office in offices:
        city_list.append(office["city"])
cities_df = pd.DataFrame(city_list, columns=['City'])
cities_df["City"].value_counts()


San Francisco    14
New York         11
Los Angeles       6
Paris             5
Seattle           4
                 ..
Dundee            1
Brooklyn          1
Vancouver         1
Dublin            1
Boca Raton        1
Name: City, Length: 62, dtype: int64

In [31]:
data_list = list()
for index, row in df.iterrows():
    for office in row["offices"]:
        data_list.append([row["name"], row["number_of_employees"], office["city"],office["latitude"], office["longitude"]])
        
companies_df = pd.DataFrame(data_list, columns= ["name", "number_of_employees", "city", "latitude", "longitude"])      

In [32]:
companies_df

Unnamed: 0,name,number_of_employees,city,latitude,longitude
0,Kyte,40,San Francisco,37.788482,-122.409173
1,Pando Networks,23,New York,40.722655,-73.998730
2,Livestream,120,New York,40.726155,-73.995625
3,Ustream,250,San Francisco,37.392936,-122.079480
4,Ustream,250,Los Angeles,,
...,...,...,...,...,...
108,Blip,47,Santa Monica,34.027225,-118.468233
109,Exent,100,New York,40.752380,-74.005568
110,Exent,100,Petach-Tikva,,
111,Exent,100,San Francisco,37.787646,-122.402759


In [33]:
companies_df["city"].value_counts()

San Francisco    14
New York         11
Los Angeles       6
Paris             5
Seattle           4
                 ..
Dundee            1
Brooklyn          1
Vancouver         1
Dublin            1
Boca Raton        1
Name: city, Length: 62, dtype: int64

In [34]:
sanfran = companies_df.where(companies_df["city"]=="San Francisco").dropna(subset=['latitude', 'longitude'])
sanfran


Unnamed: 0,name,number_of_employees,city,latitude,longitude
0,Kyte,40.0,San Francisco,37.788482,-122.409173
3,Ustream,250.0,San Francisco,37.392936,-122.07948
8,hi5,100.0,San Francisco,37.788668,-122.400558
9,Curse,58.0,San Francisco,37.787092,-122.399972
14,Kongregate,20.0,San Francisco,37.786942,-122.401245
18,Crunchyroll,50.0,San Francisco,37.781265,-122.393229
23,Rupture,25.0,San Francisco,37.783898,-122.395234
27,Zynga,115.0,San Francisco,37.765158,-122.404234
50,Serious Business,22.0,San Francisco,37.789321,-122.401362
53,Citizen Sports,27.0,San Francisco,37.777513,-122.397044


In [35]:
sanfran.to_csv("data/sanfran.csv")

In [18]:
newyork = companies_df.where(companies_df["city"]=="New York").dropna(subset=['latitude', 'longitude'])
newyork

Unnamed: 0,name,number_of_employees,city,latitude,longitude
1,Pando Networks,23.0,New York,40.722655,-73.99873
2,Livestream,120.0,New York,40.726155,-73.995625
16,OMGPOP,50.0,New York,40.723384,-74.001704
37,Boonty,150.0,New York,40.717248,-74.002662
43,Cellufun,30.0,New York,40.73993,-73.993049
82,Major League Gaming,45.0,New York,40.752672,-73.97593
105,Saavn,25.0,New York,40.743877,-73.98618
109,Exent,100.0,New York,40.75238,-74.005568


In [19]:
paris = companies_df.where(companies_df["city"]=="Paris").dropna(subset=['latitude', 'longitude'])
paris

Unnamed: 0,name,number_of_employees,city,latitude,longitude
26,Kewego,65.0,Paris,46.227638,2.213749
87,Owlient,40.0,Paris,48.830809,2.30322
106,2 Minutes,105.0,Paris,48.850206,2.369803


In [20]:
sanfran_map = folium.Map(location= [37.788482,-122.409173], zoom_start= 13, height="25%", width="25%", tiles="cartodbpositron")
for index, row in sanfran.iterrows():
    Marker(location = [row["latitude"], row["longitude"]], icon=folium.Icon(color='lightred', icon='building', prefix='fa'), tooltip=row["name"]).add_to(sanfran_map)

newyork_map = folium.Map(location= [40.722655,-73.998730], zoom_start= 10, height="25%", width="25%", tiles="cartodbpositron")
for index, row in newyork.iterrows():
    Marker(location = [row["latitude"], row["longitude"]], icon=folium.Icon(color='lightblue', icon='building', prefix='fa'), tooltip=row["name"]).add_to(newyork_map)
    
paris_map = folium.Map(location= [48.830809,2.303220], zoom_start= 12, height="25%", width="25%", tiles="cartodbpositron")
for index, row in paris.iterrows():
    Marker(location = [row["latitude"], row["longitude"]], icon=folium.Icon(color='lightgreen', icon='building', prefix='fa'), tooltip=row["name"]).add_to(paris_map)


In [24]:
sanfran_map

In [22]:

newyork_map


In [23]:
paris_map