In [4]:
from pymongo import MongoClient
import pandas as pd
import time

import os
import requests
import json
from dotenv import load_dotenv
import pandas as pd

import geopandas as gpd
from cartoframes.viz import Map, Layer, popup_element

import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
import pandas as pd

load_dotenv()

True

# Filtering and extracting the data

In [5]:
client = MongoClient("localhost:27017")
db = client["ironhack"]
c = db.get_collection("companies")

In [6]:
# c.distinct("category_code")

In [7]:
condition1 = {"total_money_raised" : {"$regex": "\d{2,}\.?\d?M|\d{1,}\.?\d?B"}}
condition2 = {"tag_list": {"$regex": ".*design.|.tech.|.software.|.fashion."}}
condition3 = {"category_code": "web"}
condition4 = {"offices": {"$ne": []}}

projection = {"_id": 0, "name":1, "offices.city":1, "offices.latitude":1, "offices.longitude": 1, "address1":1, "total_money_raised": 1}

query = {"$and": [condition1, condition4, {"$or": [condition2, condition3]}]}
             

filtered_companies = list(c.find(query, projection))

In [8]:
df = pd.DataFrame(filtered_companies)
df.sample(1)

Unnamed: 0,name,total_money_raised,offices
217,ClickandBuy,€25M,"[{'city': 'Cologne', 'latitude': 50.947699, 'l..."


In [9]:
df.offices[0]

[{'city': 'Seattle', 'latitude': 47.603122, 'longitude': -122.333253},
 {'city': 'New York', 'latitude': 40.7237306, 'longitude': -73.9964312}]

In [10]:
df = df.explode("offices")
df.sample(1)

Unnamed: 0,name,total_money_raised,offices
444,Talenthouse,$15.1M,"{'city': 'West Hollywood', 'latitude': None, '..."


In [11]:
df.reset_index(drop=True)
df.sample(1)

Unnamed: 0,name,total_money_raised,offices
282,HammerKit,€1.25M,"{'city': 'Helsinki', 'latitude': 60.16441, 'lo..."


In [12]:
cities = []
latitudes = []
longitudes = []
addresses = []
money = []

for index, row in df.iterrows():
    
    try:
        cities.append(row["offices"]["city"])
        latitudes.append(row["offices"]["latitude"])
        longitudes.append(row["offices"]["longitude"])
    
    except IndexError:
        cities.append(None)
        latitudes.append(None)
        longitudes.append(None)

    
df["city"] = cities
df["latitude"] = latitudes
df["longitude"] = longitudes

In [13]:
df.sample()

Unnamed: 0,name,total_money_raised,offices,city,latitude,longitude
39,Sampa,$1.31M,"{'city': 'Redmond', 'latitude': 47.682233, 'lo...",Redmond,47.682233,-122.135521


In [14]:
df = df.drop("offices", axis=1)

In [15]:
df.city.value_counts().head(5) 
# Instead of Palo Alto, London. Being in Europe makes it easier for the employees
# to network with successful companies and/or employees from different countries and cultures.
# The US is massively big, so traveling to other countries for work becomes difficult.

San Francisco    65
New York         58
Palo Alto        19
San Mateo        17
London           17
Name: city, dtype: int64

In [16]:
df2 = df.copy()
df2["total_money_raised"] = df2["total_money_raised"].replace('M|\$|€|C|£', "", regex = True).replace('B', "000", regex = True)
df2["total_money_raised"] = pd.to_numeric(df2["total_money_raised"])

df2["total_money_raised"].max()

df2.to_csv("filtered_companies.csv", index=False)


In [41]:
df3_topcompanies = df2.loc[df2["city"].isin(['San Francisco', 'New York', 'London'])].sort_values("total_money_raised", ascending=False).groupby(["city"]).first().reset_index()
df3_topcompanies

Unnamed: 0,city,name,total_money_raised,latitude,longitude
0,London,OANDA,117.0,51.519204,-0.16261
1,New York,AOL,1000.0,40.731132,-73.991931
2,San Francisco,Xero,244.0,37.779507,-122.39071


# Accessing the API

In [18]:
token_fsq = os.getenv("key")

In [78]:
def get_results_from_foursquare (query, location, limit):

    ll = f"{location[1]}%2C{location[0]}"
    url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={ll}&sort=DISTANCE&limit={str(limit)}"

    headers = {
        "accept": "application/json",
        "Authorization": token_fsq,
    }

    response = requests.get(url, headers=headers).json()
    
    return response

In [79]:
def making_requests(query):
    
    location = []
    dict_of_dfs = dict()

    for index, row in df3_topcompanies.iterrows():

        location.append(row["longitude"])
        location.append(row["latitude"])

        places = get_results_from_foursquare(query, location, 50)

        dict_of_dfs[row["city"]] = creating_dfs(places)

        location = []
               
    return dict_of_dfs

In [80]:
def creating_dfs(res):
    
    new_list = []
    
    for i in res["results"]:
    
        name = i["name"]
        address =  i["location"]["formatted_address"]
        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]

        type_ = {"typepoint": 
                              {"type": "Point", 
                               "coordinates": [lat, lon]}}

        new_list.append({"name":name, "lat":lat, "lon":lon, "type":type_, "address": address})
        
    df = pd.DataFrame.from_records(new_list)
        
    return df


In [93]:
# bars = dict_of_dfs
#dict_of_dfs = making_requests("bar")
# nightclubs = making_requests("Night Club")
# elementary_schools = making_requests("Elementary School")
# nurseries = making_requests("Nursery School")
airports = making_requests("Airport Terminal")


In [88]:
bars = dict_of_dfs

In [90]:
bars1 = bars
nightclubs1 = nightclubs
elementary_schools1 = elementary_schools
nurseries1 = nurseries
airports1 = airports

In [94]:
airports["New York"]

Unnamed: 0,name,lat,lon,type,address
0,Atlantic Aviation,40.743639,-73.972686,"{'typepoint': {'type': 'Point', 'coordinates':...","499 E 34th St, New York, NY 10016"
1,JF Terminal 4,40.72111,-73.951905,"{'typepoint': {'type': 'Point', 'coordinates':...","Gate B30, Brooklyn, NY 11211"
2,Marine Air Terminal,40.773679,-73.885848,"{'typepoint': {'type': 'Point', 'coordinates':...","10205 Ditmars Blvd (LaGuardia Airport), Queens..."
3,American Airlines,40.773097,-73.877132,"{'typepoint': {'type': 'Point', 'coordinates':...","LaGuardia Airport, East Elmhurst, NY 11371"
4,Terminal B,40.773608,-73.872038,"{'typepoint': {'type': 'Point', 'coordinates':...","Queens, NY 11369"
5,Riverbank Park Helipad,40.827262,-73.952188,"{'typepoint': {'type': 'Point', 'coordinates':...","679 Riverside Dr, New York, NY 10031"
6,Jet Aviation,40.840832,-74.066978,"{'typepoint': {'type': 'Point', 'coordinates':...",112 Charles A Lindbergh Dr (Teterboro Airport)...
7,Teterboro Air Traffic Control Tower,40.852392,-74.055282,"{'typepoint': {'type': 'Point', 'coordinates':...","225 Fred Wehran Dr, Teterboro, NJ 07608"
8,Dassault Falcon Jet,40.853987,-74.052282,"{'typepoint': {'type': 'Point', 'coordinates':...","200 Riser Rd, Little Ferry, NJ 07643"
9,Meridian Teterboro,40.851708,-74.068299,"{'typepoint': {'type': 'Point', 'coordinates':...","485 Industrial Ave, Teterboro, NJ 07608"


In [None]:
def names(word, dic):
    
    for value in dic.values():
        
        df["type_of_place"] = word