In [1]:
import sys
sys.path.append('../')

In [2]:
import os
from dotenv import load_dotenv
import requests
import json
import time
from functools import reduce
import operator
import pandas as pd
import src.functions as f

The list of cities I got from the database filtering is the following:

In [3]:
cities = ['San Francisco, CA, USA', 'Ellensburg, WA, USA', 'Brooklyn, NY, USA']

Making the names of the cities shorter

In [4]:
cities_short = [i.split(",")[0] for i in cities]

Creating a folder called "newdata" where I will save all my processed dataframes

In [5]:
if not os.path.exists(f"../newdata"):
    os.mkdir(f"../newdata")

Preparing the .env file, where I saved the 4square API tokens.

In [6]:
load_dotenv()

True

In [7]:
client_id = os.getenv("tok1")
client_secret = os.getenv("tok2")

The coordinates below are the ones from the city center of the cities I got from filtering the database.

In [8]:
sanfrancisco = {'type': 'Point', 'coordinates':[37.778480215129015, -122.40559118863916]}
ellensburg =  {'type': 'Point', 'coordinates':[46.996517823587766, -120.54550769638885]}
brooklyn =  {'type': 'Point', 'coordinates':[40.692147496548735, -73.98495174207916]}

These are the queries I want to make in order to find how far the cities coordinates are from these locations.

In [9]:
queries = ["vegan", "starbucks", "school","train station subway", "basketball court"]

In [10]:
def getFromDict(diccionario,mapa):
    return reduce (operator.getitem,mapa,diccionario)

In [11]:
def get_data (latitude, longitude, query):
    d = {}
    time.sleep(2)
    parametres = {"client_id" : client_id,
              "client_secret" : client_secret,
              "v": "20180323",
              "ll": f"{latitude},{longitude}", 
              "query":query,
              "limit": 100}
    time.sleep(1)

    resp = requests.get(url= 'https://api.foursquare.com/v2/venues/search', params=parametres).json()

    return resp

In [12]:
def respven(dic):
    return dic['response']['venues']

In [13]:
def extraetodo(json):
    todo = {"name": ["name"], "lat": ["location", "lat"], "lon": ["location", "lng"], "distance":['location', 'distance']} 
    total = []
    for elemento in json:
        dic = {key: getFromDict(elemento, value) for key,value in todo.items()}
        dic["location"] = {'type': 'Point', 'coordinates': [dic["lat"], dic["lon"]]}
        total.append(dic)
    return total

In [14]:
def fin(city, n):
    return extraetodo(respven(get_data(city['coordinates'][0], city['coordinates'][1], queries[n])))

In [15]:
def todf(fincity):
    fordf = []
    for dic in fincity:
            for i in dic:
                todf = {}
                todf["name"] = getFromDict(i,["name"])
                todf["lat"] = getFromDict(i,["lat"])
                todf["lon"] = getFromDict(i,["lon"])
                todf["location"] = getFromDict(i,["location"])
                todf["category"] = getFromDict(i,["category"])
                todf["distance"] = getFromDict (i,["distance"])
                fordf.append(todf)
    df = pd.DataFrame(fordf)
    return df

In [16]:
def dropbydist(df, dis, cat):
    return df.drop(df[(df.distance > dis) & (df.category == cat)].index, inplace=True)

These are the name of the data categories, which are based in my queries.

In [17]:
category = ["vegan_venues", "starbucks", "schools","train_stations", "basketball_courts" ]

# Making the API call and creating the cleaned dataframe from the API data

## San Fracisco

In [20]:
vegsanfrancisco = fin(sanfrancisco, 0)
for  i in  vegsanfrancisco:
    i['category']= 'vegan_venues'
    
stasanfrancisco = fin(sanfrancisco, 1)
for  i in  stasanfrancisco:
    i['category']= 'starbucks'
    
schsanfrancisco = fin(sanfrancisco, 2)
for  i in  schsanfrancisco:
    i['category']= 'schools'

trasanfrancisco = fin(sanfrancisco, 3)
for  i in  trasanfrancisco:
    i['category']= 'train_stations'

bassanfrancisco = fin(sanfrancisco, 4)
for  i in  bassanfrancisco:
    i['category']= 'basketball_courts'

In [21]:
finsanfrancisco = [vegsanfrancisco,stasanfrancisco, schsanfrancisco, trasanfrancisco, bassanfrancisco]

In [22]:
dfsanfrancisco = f.todf(finsanfrancisco)

In [23]:
dropbydist(dfsanfrancisco, 1500, category[0])
dropbydist(dfsanfrancisco, 1300, category[1])
dropbydist(dfsanfrancisco, 3000, category[2])
dropbydist(dfsanfrancisco, 2500, category[3])
dropbydist(dfsanfrancisco, 2000, category[4])

In [24]:
dfsanfrancisco.to_csv(f"../newdata/dfsanfran.csv", index = False)

## Ellensburg

In [25]:
vegell = fin(ellensburg, 0)
for  i in  vegell:
    i['category'] = 'vegan_venues'
    
staell = fin(ellensburg, 1)
for  i in  staell:
    i['category'] = 'starbucks'

schell = fin(ellensburg, 2)
for  i in  schell:
    i['category'] = 'schools'

traell = fin(ellensburg, 3)
for  i in  traell:
    i['category'] = 'train_stations'

basell = fin(ellensburg, 4)
for  i in  basell:
    i['category'] = 'basketball_courts'

In [26]:
finellensburg = [vegell, staell, schell, traell, basell]

In [27]:
dfellensburg = todf(finellensburg)

In [28]:
dropbydist(dfellensburg, 1500, category[0])
dropbydist(dfellensburg, 1300, category[1])
dropbydist(dfellensburg, 3000, category[2])
dropbydist(dfellensburg, 2500, category[3])
dropbydist(dfellensburg, 2000, category[4])

In [30]:
dfellensburg.to_csv(f"../newdata/dfellensburg.csv", index = False)

## Brooklyn

In [31]:
vegbro = fin(brooklyn, 0)
for  i in  vegbro:
    i['category'] = 'vegan_venues'

stabro = fin(brooklyn, 1)
for  i in  stabro:
    i['category'] = 'starbucks'
    
schbro = fin(brooklyn, 2)
for  i in  schbro:
    i['category'] = 'schools'
    
trabro = fin(brooklyn, 3)
for  i in  trabro:
    i['category'] = 'train_stations'

basbro = fin(brooklyn, 4)
for  i in  basbro:
    i['category'] = 'basketball_courts'

In [32]:
finbrooklyn = [vegbro, stabro, schbro, trabro, basbro]

In [33]:
dfbrooklyn = todf(finbrooklyn)

In [34]:
dropbydist(dfbrooklyn, 1500, category[0])
dropbydist(dfbrooklyn, 1300, category[1])
dropbydist(dfbrooklyn, 3000, category[2])
dropbydist(dfbrooklyn, 2500, category[3])
dropbydist(dfbrooklyn, 2000, category[4])

In [35]:
dfbrooklyn.to_csv(f"../newdata/dfbrooklyn.csv", index = False)