In [328]:
import requests
import pandas as pd
import json
import os
from dotenv import load_dotenv

load_dotenv()

api_key_4square = os.getenv('FOURSQUARE_KEY')
api_key_yelp = os.getenv('YELP_KEY')

In [354]:
# import bikes_df
bikes_df =pd.read_csv(r"B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\bikes_df.csv",sep=',')

In [386]:
# function to clean dataframe
def df_clean (dataframe):
    dataframe = dataframe.drop_duplicates(subset='name')
    dataframe = dataframe.drop_duplicates(subset='id')
    dataframe = dataframe.query('distance < 1000 & (bar == 1 | dog_park == 1 | restaurant == 1)')
    return dataframe


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [330]:
# 4square api prep function
def places_4square(latitude, longitude, radius, categories,limit, api_key):
    params = {}
    ll = str(latitude)+","+str(longitude)
    params["ll"] = ll
    params['radius'] = radius
    categories = ','.join(str(x)for x in categories)
    params["categories"] = categories
    params["limit"] = limit

    url = "https://api.foursquare.com/v3/places/search?sort=RATING"
    headers = {"accept": "application/json","Authorization": api_key}
    response = requests.get(url, params=params, headers=headers)
    return response

In [331]:
# Getting and saving response data as a local json file
# category_list=[16033,13003,13029,13065]
# foursquare_places_json = places_4square(bikes_df['latitude'][0],bikes_df['longitude'][0],1000,category_list,50,api_key_4square).json()
# with open('data/foursquare_places_data.json','w') as f:
#     json.dump(foursquare_places_json,f)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [383]:
# pulling data from json into lists
# lists for temp busckets
id = []
distance = []
name = []
bar = []
dog_park = []
restaurant = []
rating = []
# lists to normalize category list
bar_lookup = ['bar', 'pub', 'speakeasy', 'beer', 'Brewery']
dog_park_lookup = ['dog park']



#loading json file
with open(r"B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\foursquare_places_data.json",'r') as f:
    data= json.load(f)
    
# run through all results for each place info
for result in range(len(data['results'])):
    id.append(data['results'][result]['fsq_id'])
    # run thorugh all categories for each place
    for i in range(len(data['results'][result]['categories'])):
        if any(map(data['results'][result]['categories'][i]['short_name'].lower().__contains__, bar_lookup)):
            bar.append(1)
            dog_park.append(0)
            restaurant.append(0)
            break

        elif any(map(data['results'][result]['categories'][i]['short_name'].lower().__contains__, dog_park_lookup)):
            bar.append(0)
            dog_park.append(1)
            restaurant.append(0)
            break
        else:
            bar.append(0)
            dog_park.append(0)
            restaurant.append(1)
            break
    if 'rating' in data['results'][result]:
        rating.append(data['results'][result]['rating'])
    else:
        rating.append(None)
        
    distance.append(data['results'][result]['distance'])
    name.append(data['results'][result]['name'])

Put your parsed results into a DataFrame

In [387]:
# dataframe creation
foursquare_places_df = pd.DataFrame({'name':name,'id':id, "bar":bar,'dog_park': dog_park,'restaurant':restaurant,'distance':distance, 'rating':rating})
foursquare_places_df = df_clean(foursquare_places_df)
foursquare_places_df

Unnamed: 0,name,id,bar,dog_park,restaurant,distance,rating
0,BrewDog Glasgow,4e1c4c481fc714dc5c3143ed,1,0,0,884,
1,Mother India's Cafe,4b7b007cf964a5205b4b2fe3,0,0,1,931,
2,Meadow Road Coffee,54c61ff3498ec3e20f6a64b2,0,0,1,902,
3,Tantrum Doughnuts,56752941498ef438347e5c5e,0,0,1,851,
4,Baffo,57f54b2d498e574bd31426ea,0,0,1,908,
5,Kimchi Cult,5635275a498e569169b27825,0,0,1,800,
6,The Sparkle Horse,50d20bc9e4b029fc6804c7a9,1,0,0,705,
7,Basta,59d12b1befa82a766c5f27ec,0,0,1,860,
8,Banana Leaf,4b71383ef964a520023d2de3,0,0,1,804,
9,Big Mouth Coffee Co,4bc8632f14d7952115eb68e9,0,0,1,634,


In [334]:
# save dataframe as CSV to access in other notebooks
foursquare_places_df.to_csv(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\foursquare_places_df.csv',sep =',',index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [335]:
# yelp api prep function
def places_yelp(latitude, longitude, radius, categories, limit, api_key):
    params = {}
    params["longitude"] = longitude
    params["latitude"] = latitude
    params['radius'] = radius
    params["categories"] = categories
    params["limit"] = limit

    url = f"https://api.yelp.com/v3/businesses/search?sort_by=rating"
    headers = {"accept": "application/json","Authorization": f'bearer {api_key}'}
    response = requests.get(url, params=params, headers=headers)
    return response

In [336]:
#Getting and saving response data as a local json file
category_list=['bars', 'dog_parks']
yelp_places_json = places_yelp(bikes_df['latitude'][0],bikes_df['longitude'][0],1000,category_list,50,api_key_yelp).json()
with open('data/yelp_places_data.json','w') as f:
    json.dump(yelp_places_json,f)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [388]:
# pulling data from json into dataframe
# lists for temp busckets
id = []
distance = []
name = []
bar = []
dog_park = []
restaurant = []
rating= []

# lists to normalize category list
bar_lookup = ['bar', 'pub', 'speakeasy', 'beer', 'Brewery']
dog_park_lookup = ['dog park']


#loading json file
with open(r"B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\yelp_places_data.json",'r') as f:
    data= json.load(f)
    
# run through all results for each place info
for result in range(len(data['businesses'])):
    id.append(data['businesses'][result]['id'])
    for i in range(len(data['businesses'][result]['categories'])):
        if any(map(data['businesses'][result]['categories'][i]['alias'].lower().__contains__, bar_lookup)):
                bar.append(1)
                dog_park.append(0)
                restaurant.append(0)
                break

        elif any(map(data['businesses'][result]['categories'][i]['alias'].lower().__contains__, dog_park_lookup)):
            bar.append(0)
            dog_park.append(1)
            restaurant.append(0)
            break
                
        else:
            bar.append(0)
            dog_park.append(0)
            restaurant.append(1)
            break
    if 'rating' in data['businesses'][result]:
        rating.append(data['businesses'][result]['rating'])
    else:
        rating.append(None)
    distance.append(data['businesses'][result]['distance'])
    name.append(data['businesses'][result]['name'])


Put your parsed results into a DataFrame

In [391]:
# dataframe creation
yelp_places_df = pd.DataFrame({'name':name,'id':id, "bar":bar ,'dog_park': dog_park,'restaurant':restaurant, 'distance':distance, 'rating':rating})
yelp_places_df = df_clean(yelp_places_df) 
yelp_places_df

Unnamed: 0,name,id,bar,dog_park,restaurant,distance,rating
1,Dukes,c2fwMshDF0wtCKGLe3IGtw,1,0,0,838.51839,4.4
2,BrewDog Glasgow,qjjmYSR3CVsJlr75vyIEBg,1,0,0,871.079804,4.3
3,6 Degrees North,_S0eZCB0-qNCAMTrlAIqCw,1,0,0,854.560573,4.2
4,Nam Tuk,djfs-a4vk1jUD70F0-5NGg,0,0,1,596.695005,4.7
5,Bag O Nails,G_9pgFkyGLwmew0n_FYxHg,1,0,0,616.824474,4.1
6,The Sparkle Horse,PkLiavkwqsGzE5D2tqfsyg,1,0,0,689.080576,4.1
8,Bay Inn,jPcimhU3e9cXenG2hkrJkg,1,0,0,763.459668,5.0
9,The Sanctuary,U_H225i84DpkdAY6ConB9A,1,0,0,721.622736,5.0
10,Three Judges,Gi2Fs1Ve7lyJAXWKeQkthg,1,0,0,637.835306,4.0
11,Firebird,dMdrLM-SMYuiGzzo6Si2Cw,0,0,1,937.443536,3.9


In [392]:
# save dataframe as CSV to access in other notebooks
yelp_places_df.to_csv(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\yelp_places_df.csv',sep =',',index=False)

# Comparing Results

In [393]:
yelp_places_df=yelp_places_df.sort_values(by='name')
foursquare_places_df=foursquare_places_df.sort_values(by='name')

yelp_places_df

Unnamed: 0,name,id,bar,dog_park,restaurant,distance,rating
3,6 Degrees North,_S0eZCB0-qNCAMTrlAIqCw,1,0,0,854.560573,4.2
5,Bag O Nails,G_9pgFkyGLwmew0n_FYxHg,1,0,0,616.824474,4.1
15,Bar Gumbo,eVrLnqu0q33tKmjvOz24rQ,0,0,1,827.61831,3.9
8,Bay Inn,jPcimhU3e9cXenG2hkrJkg,1,0,0,763.459668,5.0
28,Brechin Bar,Ytb9Nbv-i6ZI4PNpagFAyQ,1,0,0,549.960606,3.1
2,BrewDog Glasgow,qjjmYSR3CVsJlr75vyIEBg,1,0,0,871.079804,4.3
16,Celino's,9Rk6e_gZknKyLrDYfO0nIQ,0,0,1,584.539304,3.8
37,Clyde Valley,av13jE5Nf5ptv48SOJs4uA,1,0,0,575.779743,3.0
21,Cottier's,I7q8ZT7AurTrv8PKc0utSA,1,0,0,953.910414,3.6
13,Deoch an Dorus,UFpObmkOLs9aFEWPL1niRQ,1,0,0,623.279639,3.9


Which API provided you with more complete data? Provide an explanation. 

foursquare isn't giving rating for any of the locations and yelp gives more detailed distance. I feel that yelp give more, and more complete, data.

Get the top 10 restaurants according to their rating

In [396]:
yelp_places_df.sort_values(by='rating', ascending=False).head(10)

Unnamed: 0,name,id,bar,dog_park,restaurant,distance,rating
9,The Sanctuary,U_H225i84DpkdAY6ConB9A,1,0,0,721.622736,5.0
8,Bay Inn,jPcimhU3e9cXenG2hkrJkg,1,0,0,763.459668,5.0
4,Nam Tuk,djfs-a4vk1jUD70F0-5NGg,0,0,1,596.695005,4.7
1,Dukes,c2fwMshDF0wtCKGLe3IGtw,1,0,0,838.51839,4.4
2,BrewDog Glasgow,qjjmYSR3CVsJlr75vyIEBg,1,0,0,871.079804,4.3
3,6 Degrees North,_S0eZCB0-qNCAMTrlAIqCw,1,0,0,854.560573,4.2
5,Bag O Nails,G_9pgFkyGLwmew0n_FYxHg,1,0,0,616.824474,4.1
6,The Sparkle Horse,PkLiavkwqsGzE5D2tqfsyg,1,0,0,689.080576,4.1
25,Storm Queen,mYhrolNYGeH8Yhac9OWVSA,1,0,0,575.779743,4.0
10,Three Judges,Gi2Fs1Ve7lyJAXWKeQkthg,1,0,0,637.835306,4.0
