In [1]:
import requests
import pandas as pd
import json
import os
from dotenv import load_dotenv

load_dotenv()

api_key_4square = os.getenv('FOURSQUARE_KEY')
api_key_yelp = os.getenv('YELP_KEY')

In [2]:
# import bikes_df
bikes_df =pd.read_csv(r"B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\bikes_df.csv",sep=',')

In [3]:
# function to clean dataframe
def df_clean (dataframe):
    dataframe = dataframe.drop_duplicates(subset='name')
    dataframe = dataframe.drop_duplicates(subset='id')
    dataframe = dataframe.query('distance < 1000 & (bar == 1 | dog_park == 1 | restaurant == 1)')
    return dataframe


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [4]:
# 4square api prep function
def places_4square(latitude, longitude, radius, categories,limit, api_key):
    params = {}
    ll = str(latitude)+","+str(longitude)
    params["ll"] = ll
    params['radius'] = radius
    categories = ','.join(str(x)for x in categories)
    params["categories"] = categories
    params["limit"] = limit

    url = "https://api.foursquare.com/v3/places/search?sort=RATING"
    headers = {"accept": "application/json","Authorization": api_key}
    response = requests.get(url, params=params, headers=headers)
    return response

In [5]:
# Get request for all bike stations
category_list=[16033,13003,13029,13065]
foursquare_responses = []
for index, station in bikes_df.iterrows():
    filename = f"B:/LHL/StatsModeling_Project/LHL-statsModeling-priject/data/foursquare_station_data/{bikes_df['id'][index]}.json"
    foursquare_places_json = places_4square(bikes_df['latitude'][index],bikes_df['longitude'][index],1000,category_list,50,api_key_4square).json()
    with open(filename,'w') as f:
        json.dump(foursquare_places_json,f)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [6]:
# pulling data from json into lists
# lists for temp busckets
id = []
distance = []
name = []
bar = []
dog_park = []
restaurant = []
lat = []
long = []
rating = []
station_id = []
# lists to normalize category list
bar_lookup = ['bar', 'pub', 'speakeasy', 'beer', 'Brewery']
dog_park_lookup = ['dog park']


for file in os.listdir(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\foursquare_station_data'):

        # loading json file
        with open(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\foursquare_station_data/'+ file,'r') as f:
            data= json.load(f)
            
    
        # run through all results for each place info
        for result in range(len(data['results'])):
            id.append(data['results'][result]['fsq_id'])
            lat.append(data['results'][result]['geocodes']['main']['latitude'])
            long.append(data['results'][result]['geocodes']['main']['longitude'])
            # run thorugh all categories for each place
            for i in range(len(data['results'][result]['categories'])):
                if any(map(data['results'][result]['categories'][i]['short_name'].lower().__contains__, bar_lookup)):
                    bar.append(1)
                    dog_park.append(0)
                    restaurant.append(0)
                    break

                elif any(map(data['results'][result]['categories'][i]['short_name'].lower().__contains__, dog_park_lookup)):
                    bar.append(0)
                    dog_park.append(1)
                    restaurant.append(0)
                    break
                else:
                    bar.append(0)
                    dog_park.append(0)
                    restaurant.append(1)
                    break
            if 'rating' in data['results'][result]:
                rating.append(data['results'][result]['rating'])
            else:
                rating.append(None)
                
            distance.append(data['results'][result]['distance'])
            name.append(data['results'][result]['name'])
            filename_split = os.path.splitext(os.path.basename(file))[0]
            station_id.append(filename_split)

Put your parsed results into a DataFrame

In [7]:
# dataframe creation
foursquare_places_df = pd.DataFrame({'id':id, 'name':name, "bar":bar,'dog_park': dog_park,'restaurant':restaurant,'distance':distance, 'latitude':lat,'longitude':long, 'rating':rating, 'station_id' : station_id})
foursquare_places_df = df_clean(foursquare_places_df)
foursquare_places_df

Unnamed: 0,id,name,bar,dog_park,restaurant,distance,latitude,longitude,rating,station_id
0,4e1c4c481fc714dc5c3143ed,BrewDog Glasgow,1,0,0,884,55.868325,-4.292289,,066c99293af108ece27d9b0436c30cc4
1,4b7b007cf964a5205b4b2fe3,Mother India's Cafe,0,0,1,931,55.867287,-4.291100,,066c99293af108ece27d9b0436c30cc4
2,54c61ff3498ec3e20f6a64b2,Meadow Road Coffee,0,0,1,902,55.870559,-4.316758,,066c99293af108ece27d9b0436c30cc4
3,56752941498ef438347e5c5e,Tantrum Doughnuts,0,0,1,851,55.866930,-4.291825,,066c99293af108ece27d9b0436c30cc4
4,57f54b2d498e574bd31426ea,Baffo,0,0,1,908,55.867785,-4.291606,,066c99293af108ece27d9b0436c30cc4
...,...,...,...,...,...,...,...,...,...,...
4598,09706c86d5354117b1d0cfbb,Waterlock,1,0,0,901,55.855205,-4.235996,,f806566ccd2263bd5dbb624cdd0fb7f3
4599,53f3bb6f498e257ee8cb8615,Dannys Fast Food,0,0,1,921,55.855545,-4.236496,,f806566ccd2263bd5dbb624cdd0fb7f3
4601,53cc2fb5498e10c40b24046b,The Traders Tavern,1,0,0,924,55.854630,-4.237765,,f806566ccd2263bd5dbb624cdd0fb7f3
4603,58fb5afa356b4972b023ab90,A'challatainn,0,0,1,939,55.855151,-4.236940,,f806566ccd2263bd5dbb624cdd0fb7f3


In [8]:
# save dataframe as CSV to access in other notebooks
foursquare_places_df.to_csv(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\foursquare_places_df.csv',sep =',',index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [12]:
# yelp api prep function
def places_yelp(latitude, longitude, radius, categories, limit, api_key):
    params = {}
    params["longitude"] = longitude
    params["latitude"] = latitude
    params['radius'] = radius
    params["categories"] = categories
    params["limit"] = limit

    url = f"https://api.yelp.com/v3/businesses/search?sort_by=rating"
    headers = {"accept": "application/json","Authorization": f'bearer {api_key}'}
    response = requests.get(url, params=params, headers=headers)
    return response

In [13]:
# Get request for all bike stations
category_list=['bars', 'dog_parks']
yelp_responses = []
for index, station in bikes_df.iterrows():
    filename = f"B:/LHL/StatsModeling_Project/LHL-statsModeling-priject/data/yelp_station_data/{bikes_df['id'][index]}.json"
    yelp_places_json = places_yelp(bikes_df['latitude'][index],bikes_df['longitude'][index],1000,category_list,5,api_key_yelp).json()
    with open(filename,'w') as f:
        json.dump(yelp_places_json,f)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [14]:
    # pulling data from json into dataframe
    # lists for temp busckets
id = []
distance = []
name = []
bar = []
dog_park = []
restaurant = []
lat = []
long = []
rating= []
station_id = []
    # lists to normalize category list
bar_lookup = ['bar', 'pub', 'speakeasy', 'beer', 'Brewery']
dog_park_lookup = ['dog park']


for file in os.listdir(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\yelp_station_data'):

        # loading json file
        with open(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\yelp_station_data/'+ file,'r') as f:
            data= json.load(f)
            
        # run through all results for each place info
        for result in range(len(data['businesses'])):
            id.append(data['businesses'][result]['id'])
            lat.append(data['businesses'][result]['coordinates']['latitude'])
            long.append(data['businesses'][result]['coordinates']['longitude'])
            for i in range(len(data['businesses'][result]['categories'])):
                if any(map(data['businesses'][result]['categories'][i]['alias'].lower().__contains__, bar_lookup)):
                        bar.append(1)
                        dog_park.append(0)
                        restaurant.append(0)
                        break

                elif any(map(data['businesses'][result]['categories'][i]['alias'].lower().__contains__, dog_park_lookup)):
                    bar.append(0)
                    dog_park.append(1)
                    restaurant.append(0)
                    break
                        
                else:
                    bar.append(0)
                    dog_park.append(0)
                    restaurant.append(1)
                    break
            if 'rating' in data['businesses'][result]:
                rating.append(data['businesses'][result]['rating'])
            else:
                rating.append(None)
            distance.append(data['businesses'][result]['distance'])
            name.append(data['businesses'][result]['name'])
            filename_split = os.path.splitext(os.path.basename(file))[0]
            station_id.append(filename_split)

    # dataframe creation



Put your parsed results into a DataFrame

In [16]:
# dataframe creation
yelp_places_df = pd.DataFrame({'id':id,'name':name, "bar":bar ,'dog_park': dog_park,'restaurant':restaurant, 'distance':distance,'latitude':lat,'longitude':long, 'rating':rating, 'station_id':station_id})
yelp_places_df = df_clean(yelp_places_df) 
yelp_places_df

Unnamed: 0,id,name,bar,dog_park,restaurant,distance,latitude,longitude,rating,station_id
1,c2fwMshDF0wtCKGLe3IGtw,Dukes,1,0,0,838.518390,55.866966,-4.292194,4.4,066c99293af108ece27d9b0436c30cc4
2,qjjmYSR3CVsJlr75vyIEBg,BrewDog Glasgow,1,0,0,871.079804,55.867702,-4.291878,4.3,066c99293af108ece27d9b0436c30cc4
3,_S0eZCB0-qNCAMTrlAIqCw,6 Degrees North,1,0,0,854.560573,55.870835,-4.315485,4.2,066c99293af108ece27d9b0436c30cc4
4,djfs-a4vk1jUD70F0-5NGg,Nam Tuk,0,0,1,596.695005,55.869846,-4.299887,4.7,066c99293af108ece27d9b0436c30cc4
5,BBD1UiXhcUpvKmQo8jZrog,Old Black Bull,1,0,0,636.954617,55.852070,-4.200545,5.0,0a106cbc148d5a0c2535b51c1dbe3b4d
...,...,...,...,...,...,...,...,...,...,...
517,LOEGJ9C8QgEfbVq-haTwqA,Lincoln Inn,1,0,0,553.814985,55.899938,-4.349245,2.0,f6210447e1724487afe0ce63b011aecd
518,B2B1zS_JHzOQvm9bJ9MQGA,New Dragon Inn,1,0,0,818.569003,55.896633,-4.341972,0.0,f6210447e1724487afe0ce63b011aecd
527,M7ekLvEjNuqrx828SFFhCw,Kimberley,1,0,0,678.130598,55.843273,-4.170313,0.0,f6b64fd82fe6e607f1fc788ace31c3a1
528,-vLT5nRX8i3miXL6Hhq6jw,Elders Bar,1,0,0,427.682480,55.840632,-4.171166,0.0,f6b64fd82fe6e607f1fc788ace31c3a1


In [17]:
# save dataframe as CSV to access in other notebooks
yelp_places_df.to_csv(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\yelp_places_df.csv',sep =',',index=False)

# Comparing Results

In [18]:
yelp_places_df.head(2)

Unnamed: 0,id,name,bar,dog_park,restaurant,distance,latitude,longitude,rating,station_id
1,c2fwMshDF0wtCKGLe3IGtw,Dukes,1,0,0,838.51839,55.866966,-4.292194,4.4,066c99293af108ece27d9b0436c30cc4
2,qjjmYSR3CVsJlr75vyIEBg,BrewDog Glasgow,1,0,0,871.079804,55.867702,-4.291878,4.3,066c99293af108ece27d9b0436c30cc4


In [19]:
foursquare_places_df.head(2)

Unnamed: 0,id,name,bar,dog_park,restaurant,distance,latitude,longitude,rating,station_id
0,4e1c4c481fc714dc5c3143ed,BrewDog Glasgow,1,0,0,884,55.868325,-4.292289,,066c99293af108ece27d9b0436c30cc4
1,4b7b007cf964a5205b4b2fe3,Mother India's Cafe,0,0,1,931,55.867287,-4.2911,,066c99293af108ece27d9b0436c30cc4


In [22]:
# concat foursquare and yelp
places_df = pd.concat([foursquare_places_df, yelp_places_df])
# remove dupes
places_df = places_df.drop_duplicates(subset=['name'],keep='last')
places_df.to_csv(r'B:\LHL\StatsModeling_Project\LHL-statsModeling-priject\data\places_df.csv',sep =',',index=False)
places_df


  places_df = pd.concat([foursquare_places_df, yelp_places_df])


Unnamed: 0,id,name,bar,dog_park,restaurant,distance,latitude,longitude,rating,station_id
1,4b7b007cf964a5205b4b2fe3,Mother India's Cafe,0,0,1,931.000000,55.867287,-4.291100,,066c99293af108ece27d9b0436c30cc4
2,54c61ff3498ec3e20f6a64b2,Meadow Road Coffee,0,0,1,902.000000,55.870559,-4.316758,,066c99293af108ece27d9b0436c30cc4
3,56752941498ef438347e5c5e,Tantrum Doughnuts,0,0,1,851.000000,55.866930,-4.291825,,066c99293af108ece27d9b0436c30cc4
4,57f54b2d498e574bd31426ea,Baffo,0,0,1,908.000000,55.867785,-4.291606,,066c99293af108ece27d9b0436c30cc4
5,5635275a498e569169b27825,Kimchi Cult,0,0,1,800.000000,55.871878,-4.298324,,066c99293af108ece27d9b0436c30cc4
...,...,...,...,...,...,...,...,...,...,...
517,LOEGJ9C8QgEfbVq-haTwqA,Lincoln Inn,1,0,0,553.814985,55.899938,-4.349245,2.0,f6210447e1724487afe0ce63b011aecd
518,B2B1zS_JHzOQvm9bJ9MQGA,New Dragon Inn,1,0,0,818.569003,55.896633,-4.341972,0.0,f6210447e1724487afe0ce63b011aecd
527,M7ekLvEjNuqrx828SFFhCw,Kimberley,1,0,0,678.130598,55.843273,-4.170313,0.0,f6b64fd82fe6e607f1fc788ace31c3a1
528,-vLT5nRX8i3miXL6Hhq6jw,Elders Bar,1,0,0,427.682480,55.840632,-4.171166,0.0,f6b64fd82fe6e607f1fc788ace31c3a1


Which API provided you with more complete data? Provide an explanation. 

Foursquare isn't giving rating for any of the locations and yelp gives more detailed distance. I feel that yelp give more, and more complete, data.

Get the top 10 restaurants according to their rating

In [21]:
places_df.sort_values(by='rating', ascending=False).head(10)

Unnamed: 0,id,name,bar,dog_park,restaurant,distance,latitude,longitude,rating,station_id
95,JJfo7Q2AILPGZjJYHVxKEw,Let Petit Cochon,1,0,0,860.476936,55.866606,-4.28914,5.0,3c3dcd5ab1d1b41a307a4436823f798a
226,AtjKOvEI08MjKaR7RKLmvw,The Parlour,1,0,0,177.776442,55.877191,-4.290577,5.0,71b02b6f26f03e58c97dec1e1c9faa00
41,bp_fzGEY7NQnH0gOq6Z27g,Abarcrombys Cafe Bar,1,0,0,747.891472,55.876879,-4.344608,5.0,1179f511143ae9898181016b37d85b5a
5,BBD1UiXhcUpvKmQo8jZrog,Old Black Bull,1,0,0,636.954617,55.85207,-4.200545,5.0,0a106cbc148d5a0c2535b51c1dbe3b4d
127,0YLxdiz3jyMu7dxjdBjndA,The Old Ship Bank,1,0,0,620.957353,55.854646,-4.245874,5.0,5542f3a29121984f8bab4d281d63bedd
164,dBKFw6F8aFoo9Efm7tI7Jg,Minnesota Fats,0,0,1,187.988323,55.826894,-4.258136,5.0,5dc194cece6b94f85ccf2c373802e7cc
189,FWKGKP2fvYRTbgvfzjKGaQ,Exhale Lounge,0,0,1,225.473062,55.848257,-4.317684,5.0,65a1fded1cfdaf39b8c7ea2cc561144b
10,yodGx21keWc0JNk5YWgqDA,The Drum,1,0,0,960.919549,55.851746,-4.168507,5.0,0cc8c09950e1435ee7782478ed292fef
15,8AFvRh2DCwd7bRD1F0wdTA,Velvet Lounge,0,0,1,657.8547,55.846979,-4.264903,5.0,0e94d2ad012bff0cf23497963ff5fd77
17,3fp-hYuuPTjxJT2eAjKf5w,Stanley Bar,1,0,0,748.600216,55.852032,-4.280034,5.0,0e94d2ad012bff0cf23497963ff5fd77
