In [131]:
from pymongo import MongoClient
import pandas as pd
import time

import os
import requests
import json
from dotenv import load_dotenv
import pandas as pd

import geopandas as gpd
from cartoframes.viz import Map, Layer, popup_element

import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
import pandas as pd

import haversine as hs

# Filtering and extracting the data

In [5]:
client = MongoClient("localhost:27017")
db = client["ironhack"]
c = db.get_collection("companies")

In [6]:
# c.distinct("category_code")

In [108]:
condition1 = {"total_money_raised" : {"$regex": "\d{2,}\.?\d?M|\d{1,}\.?\d?B"}}
condition2 = {"tag_list": {"$regex": ".*design.|.tech.|.software.|.fashion."}}
condition3 = {"category_code": "web"}
condition4 = {"offices": {"$ne": []}}

projection = {"_id": 0, "name":1, "offices.city":1, "offices.latitude":1, "offices.longitude": 1, "address1":1, "total_money_raised": 1}

query = {"$and": [condition1, condition4, {"$or": [condition2, condition3]}]}
             

filtered_companies = list(c.find(query, projection))

In [109]:
df = pd.DataFrame(filtered_companies)
df.sample(1)

Unnamed: 0,name,total_money_raised,offices
29,eBuddy,€11.5M,"[{'city': 'Amsterdam', 'latitude': 52.3640933,..."


In [110]:
df.offices[0]

[{'city': 'Seattle', 'latitude': 47.603122, 'longitude': -122.333253},
 {'city': 'New York', 'latitude': 40.7237306, 'longitude': -73.9964312}]

In [111]:
df = df.explode("offices")
df.sample(1)

Unnamed: 0,name,total_money_raised,offices
217,ClickandBuy,€25M,"{'city': 'London', 'latitude': None, 'longitud..."


In [112]:
df.reset_index(drop=True)
df.sample(1)

Unnamed: 0,name,total_money_raised,offices
182,Zemanta,$7.35M,"{'city': 'Ljubljana', 'latitude': 46.0607926, ..."


In [113]:
cities = []
latitudes = []
longitudes = []
addresses = []
money = []

for index, row in df.iterrows():
    
    try:
        cities.append(row["offices"]["city"])
        latitudes.append(row["offices"]["latitude"])
        longitudes.append(row["offices"]["longitude"])
    
    except IndexError:
        cities.append(None)
        latitudes.append(None)
        longitudes.append(None)

    
df["city"] = cities
df["latitude"] = latitudes
df["longitude"] = longitudes

In [114]:
df.sample()

Unnamed: 0,name,total_money_raised,offices,city,latitude,longitude
101,Vobile,$10M,"{'city': 'Santa Clara', 'latitude': 37.399208,...",Santa Clara,37.399208,-121.97895


In [115]:
df = df.drop("offices", axis=1)

In [116]:
df.city.value_counts().head(5) 
# Instead of Palo Alto, London. Being in Europe makes it easier for the employees
# to network with successful companies and/or employees from different countries and cultures.
# The US is massively big, so traveling to other countries for work becomes difficult.

San Francisco    65
New York         58
Palo Alto        19
San Mateo        17
London           17
Name: city, dtype: int64

In [117]:
df2 = df.copy()
df2["total_money_raised"] = df2["total_money_raised"].replace('M|\$|€|C|£', "", regex = True).replace('B', "000", regex = True)
df2["total_money_raised"] = pd.to_numeric(df2["total_money_raised"])

df2["total_money_raised"].max()

df2.to_csv("filtered_companies.csv", index=False)


In [191]:
df3_topcompanies = df2.loc[df2["city"].isin(['San Francisco', 'New York', 'London'])].sort_values("total_money_raised", ascending=False).groupby(["city"]).first().reset_index()
df3_topcompanies

Unnamed: 0,city,name,total_money_raised,latitude,longitude
0,London,OANDA,117.0,51.519204,-0.16261
1,New York,AOL,1000.0,40.731132,-73.991931
2,San Francisco,Xero,244.0,37.779507,-122.39071


In [237]:
#airports = pd.read_csv("../Project_3_Company_Headquarters/dataframes/airports.csv")
airports.loc[(airports["municipality"] == "London") & (~airports["type"].isin(["closed", "small_airport", "heliport"]))]
airports.loc[(airports["municipality"] == "San Francisco") & (~airports["type"].isin(["closed", "heliport"]))]
airports.loc[(airports["municipality"] == "New York") & (~airports["type"].isin(["closed", "small_airport", "heliport"]))]



Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords
7781,13406,6N6,seaplane_base,Evers Seaplane Base,40.845901,-73.8162,,,US,US-NY,New York,no,6N6,,6N6,,,
7782,13407,6N7,seaplane_base,New York Skyports Inc Seaplane Base,40.735061,-73.972814,,,US,US-NY,New York,yes,,QNY,6N7,,https://en.wikipedia.org/wiki/New_York_Skyport...,
35870,3622,KJFK,large_airport,John F Kennedy International Airport,40.639447,-73.779317,13.0,,US,US-NY,New York,yes,KJFK,JFK,JFK,https://www.jfkairport.com/,https://en.wikipedia.org/wiki/John_F._Kennedy_...,"Manhattan, New York City, NYC, Idlewild, IDL, ..."
36011,3643,KLGA,large_airport,La Guardia Airport,40.777199,-73.872597,21.0,,US,US-NY,New York,yes,KLGA,LGA,LGA,https://www.laguardiaairport.com/,https://en.wikipedia.org/wiki/LaGuardia_Airport,"Manhattan, New York City, NYC, Glenn H. Curtis..."


# Accessing the API

In [18]:
token_fsq = os.getenv("key")

In [119]:
def get_results_from_foursquare (query, location, limit):

    ll = f"{location[1]}%2C{location[0]}"
    url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={ll}&sort=DISTANCE&limit={str(limit)}"

    headers = {
        "accept": "application/json",
        "Authorization": token_fsq,
    }

    response = requests.get(url, headers=headers).json()
    
    return response

In [205]:
def making_requests(query):
    
    location = []
    dict_of_dfs = dict()

    for index, row in df3_topcompanies.iterrows():

        location.append(row["longitude"])
        location.append(row["latitude"])

        places = get_results_from_foursquare(query, location, 50)

        dict_of_dfs[row["city"]] = creating_dfs(places, query, row["city"])

        location = []
               
    return dict_of_dfs

In [206]:
def creating_dfs(res, type_of_place, city):
    
    new_list = []
    
    for i in res["results"]:
    
        name = i["name"]
        address =  i["location"]["formatted_address"]
        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]

        type_ = {"typepoint": 
                              {"type": "Point", 
                               "coordinates": [lat, lon]}}

        new_list.append({"city": city, "name":name, "lat":lat, "lon":lon, "type":type_, "address": address, "type_of_place": type_of_place})
        
    df = pd.DataFrame.from_records(new_list)
        
    return df


In [225]:
# bars = dict_of_dfs
#dict_of_dfs = making_requests("bar")
# nightclubs = making_requests("Night Club")
# elementary_schools = making_requests("Elementary School")
nurseries = making_requests("Nursery School")
#airports = making_requests("International Airport")


In [88]:
bars = dict_of_dfs

In [None]:
df2.to_csv("filtered_companies.csv", index=False)


In [226]:
bars1 = bars
nightclubs1 = nightclubs
elementary_schools1 = elementary_schools
nurseries1 = nurseries
airports1 = airports

In [127]:
# def names(word, dic):
    
#     for value in dic.values():
        
#         value["type_of_place"] = word
#         
        
# names("Bar", bars)
# names("Night Club", nightclubs)
# names("Elementary School", elementary_schools)
# names("Nursery School", nurseries)

In [227]:
def distances(df, dic):
    
    distance_from_company = []
    
    for index, row in df.iterrows():
        company_loc = (row["latitude"], row["longitude"])
        
        for df in dic.values():
            
            for index2, row2 in df.iterrows():
            
                try:

                    location = (row2["lat"], row2["lon"])

                    distance_from_company.append(hs.haversine(location, company_loc))
        

                except TypeError:
                    distance_from_company.append(None)
                
            df["distance_from_company"] = pd.Series(distance_from_company)


In [230]:
#distances(df3_topcompanies, nurseries)
#nurseries["New York"]


Unnamed: 0,city,name,lat,lon,type,address,type_of_place,distance_from_company
0,New York,All My Children Day Care & Nursery Schools,40.718759,-73.983154,"{'typepoint': {'type': 'Point', 'coordinates':...","112 Ridge St, New York, NY 10002",Nursery School,0.668536
1,New York,Cpc Jacob Riis Child Care Center,40.723487,-73.975831,"{'typepoint': {'type': 'Point', 'coordinates':...","108 Avenue D (E 8 Street), New York, NY 10009",Nursery School,0.714277
2,New York,Murray Hill Wee Ones Club,40.747777,-73.979311,"{'typepoint': {'type': 'Point', 'coordinates':...",128 E 36th St (between Lexington Avenue and Pa...,Nursery School,2.254976
3,New York,Chabad Of Tribeca,40.714902,-74.006048,"{'typepoint': {'type': 'Point', 'coordinates':...","54 Reade St, New York, NY 10007",Nursery School,3.528324
4,New York,Hands On For Music,40.71399,-74.007845,"{'typepoint': {'type': 'Point', 'coordinates':...","19 Warren St (Church St), New York, NY 10007",Nursery School,3.735764
5,New York,Hoboken Little School,40.736348,-74.0286,"{'typepoint': {'type': 'Point', 'coordinates':...","1 Newark St, Hoboken, NJ 07030",Nursery School,4.161132
6,New York,Bright Horizons,40.760271,-73.972197,"{'typepoint': {'type': 'Point', 'coordinates':...","410 Park Ave (55th), New York, NY 10022",Nursery School,4.395007
7,New York,Two By Two Childcare Academy,40.711485,-73.952844,"{'typepoint': {'type': 'Point', 'coordinates':...","418 Keap St, Brooklyn, NY 11211",Nursery School,4.577518
8,New York,Kiddie Korner Preschool,40.694051,-73.993487,"{'typepoint': {'type': 'Point', 'coordinates':...","117 Remsen St, Brooklyn, NY 11201",Nursery School,4.831164
9,New York,Scandinavian School of Jersey City,40.728324,-74.041813,"{'typepoint': {'type': 'Point', 'coordinates':...","513 Manila Ave (9th St), Jersey City, NJ 07302",Nursery School,4.756965


In [None]:
London = concat_dfs([bars, nightclubs, nurseries, elementary_schools], "London")

In [210]:

def concat_dfs(list_, city):
    
    for dic in list_:
        for df in dic.values():
            if df.loc[df["city"] == city]:
                df1 = pd.concat([df1, df], axis=0, join= "outer").reset_index()
                
    return df1


                
#             for index, row in df:
#                 cities[city] = pd.concat([row[city], nightclubs["London"]], axis=0, join= "outer").reset_index()

Unnamed: 0,index,name,lat,lon,type,address,type_of_place
0,0,The Mirror Bar,51.521536,-0.162908,"{'typepoint': {'type': 'Point', 'coordinates':...","222 Marylebone Rd, London, Greater London, NW1...",Bar
1,1,Sara Cafe,51.520111,-0.158643,"{'typepoint': {'type': 'Point', 'coordinates':...","13A Crawford St, London, Greater London, W1U 6BZ",Bar
2,2,Marriott Marble Arch - Executive Lounge,51.516266,-0.163932,"{'typepoint': {'type': 'Point', 'coordinates':...","134 George St, London, Greater London, W1H 5DN",Bar
3,3,Gino's Coffee Bar,51.522114,-0.162098,"{'typepoint': {'type': 'Point', 'coordinates':...","Great Central St, London, Greater London, NW1 6JH",Bar
4,4,Sports Bar & Grill,51.522365,-0.163686,"{'typepoint': {'type': 'Point', 'coordinates':...","Marylebone Station, Melcombe Pl, Marylebone, G...",Bar
...,...,...,...,...,...,...,...
95,45,Popworld Watling Street,51.513057,-0.094700,"{'typepoint': {'type': 'Point', 'coordinates':...","17 Watling St, London, Greater London, EC4M 9BB",Night Club
96,46,The Hoist,51.484716,-0.121544,"{'typepoint': {'type': 'Point', 'coordinates':...","32 Vauxhall Grove, London, SW8 1SY",Night Club
97,47,Ministry of Sound,51.497637,-0.099503,"{'typepoint': {'type': 'Point', 'coordinates':...","103 Gaunt St, London, Greater London, SE1 6DP",Night Club
98,48,The Steelyard,51.510136,-0.090236,"{'typepoint': {'type': 'Point', 'coordinates':...","Allhallow St, London, Greater London, EC4R 3UL",Night Club
