In [1]:
# imports
import pandas as pd
import os # use this to access your environment variables
import requests # this will be used to call the APIs
import json

In [2]:
# From city bikes workbook
path = '/Users/parkerharalds/Documents/Lighthouse/Statistical_Modelling_Project/data/city_bikes.csv'
df = pd.read_csv(path)

In [14]:
df.shape

(258, 7)

In [3]:
# smaller city bikes dataframe used for testing
df2 = df.head(2)
df2

Unnamed: 0,city,station_id,station_name,latitude,longitude,free_bikes,empty_slots
0,Vancouver,00fa94ad698dc4a9e4d708d6fd32f294,Chilco & Barclay,49.291909,-123.140713,11,7
1,Vancouver,012d3e06901cc222b1c2cf0a2ace3a29,St George & Broadway,49.262321,-123.09306,1,13


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

**Function to make API calls based on latitude and longitude from city_bikes.csv:**

In [7]:
api_key = os.environ['FS_API_KEY']
headers = {"accept": "application/json",
           'Authorization': api_key}
url ='https://api.foursquare.com/v3/places/search?'

def get_nearby_places(lat, lon):
    filters = {  'll': f'{lat},{lon}',            #lat and lon to same string
                'radius': 1000,                  # Radius in meters (specified above)  
                'categories': 13032,       # code for coffee shops and cafes
                'limit': 50              
            }                 
    response = requests.get(url, headers= headers, params= filters)
    if response.status_code == 200:             #if api call was successfull return the results
        data = response.json()
        results = data['results']
        df_results = pd.json_normalize(results)
        return df_results
    else:
        print('Error')
        return None

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [8]:
# Iterate through the DataFrame and make API calls to foursquare api
df_all_stations = pd.DataFrame()
for index, row in df.iterrows():
    lat, lon = row['latitude'], row['longitude']
    df_this_station = get_nearby_places(lat, lon)
    
    if df_this_station is not None:
        df_this_station['station'] = row['station_id']
        df_all_stations = pd.concat([df_all_stations, df_this_station], axis=0)

# df_all_stations

In [9]:
fsq_df = df_all_stations

In [13]:
fsq_df.shape

(10660, 30)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

Test api call:

In [155]:
yelp_api_key = os.environ['yelp_api']
y_headers = {"accept": "application/json",
           'Authorization': f'Bearer {yelp_api_key}'
}
filters = {     'latitude': 48.4, 
                'longitude': 123.4,           
                'radius': 1000     
            }      
y_url = f'https://api.yelp.com/v3/businesses/search?'
y_response = requests.get(y_url, headers=y_headers, params=filters)

In [156]:
testing = y_response.json()
testing['businesses'] #will use this for json_normalize in function

Yelp API call (limit 300 calls per day):

Should be able to run entire ciry bikes df as it only has 258 rows.

In [15]:
yelp_api_key = os.environ['yelp_api']
y_headers = {"accept": "application/json",
           'Authorization': f'Bearer {yelp_api_key}'
           }
y_url = f'https://api.yelp.com/v3/businesses/search?'

def get_yelp_places(lat, lon):
    filters = { 'latitude': lat, 
                'longitude': lon,           
                'radius': 1000,                    
                'categories': 'cafes',      
            }                 
    y_response = requests.get(y_url, headers=y_headers, params=filters)
    if y_response.status_code == 200:             
        data = y_response.json()
        results = data['businesses']
        df_results = pd.json_normalize(results)
        return df_results
    else:
        print('Error')
        return None


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [16]:
# Iterate through the DataFrame and make API calls to yelp fusion api
df_yelp_stations = pd.DataFrame()
for index, row in df.iterrows():
    lat = row['latitude']
    lon =  row['longitude']
    df_this_yelp = get_yelp_places(lat, lon)
    
    if df_this_yelp is not None:
        df_this_yelp['station'] = row['station_id']
        df_yelp_stations = pd.concat([df_yelp_stations, df_this_yelp], axis=0)
df_yelp_stations = df_yelp_stations.reset_index(drop=True)

# df_yelp_stations

In [None]:
yelp_df = df_yelp_stations

yelp_df.head()

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

**Yelp:**

In [34]:
print('The shape is:\n', yelp_df.shape,
    '\nThe info is:\n', yelp_df.describe())

The shape is:
 (3864, 30) 
The info is:
        review_count       rating     distance  coordinates.latitude  \
count   3864.000000  3864.000000  3864.000000           3864.000000   
mean      83.256470     3.824896   688.751284             49.273342   
std      141.790533     0.995486   308.308144              0.010627   
min        0.000000     0.000000     6.421876             49.234423   
25%        9.000000     3.700000   454.295845             49.264230   
50%       30.000000     4.000000   714.947514             49.274920   
75%       90.000000     4.400000   935.765177             49.281896   
max      999.000000     5.000000  1396.264483             49.301243   

       coordinates.longitude  
count            3864.000000  
mean             -123.116981  
std                 0.029571  
min              -123.255143  
25%              -123.128757  
50%              -123.117107  
75%              -123.100880  
max              -123.049625  


**FourSquare:**

Get the top 10 restaurants according to their rating