In [57]:
# imports
import requests
import json
import pandas as pd
import numpy as np
import os

FOURSQUARE_KEY = os.getenv('FOURSQUARE_KEY')
YELP_KEY = os.getenv('YELP_KEY')

In [58]:
#import the bike station data from csv file from part1
avaiable_bike_stations = pd.read_csv('..\\data\\available_bikes.csv')

In [59]:
avaiable_bike_stations.head()

Unnamed: 0,station_id,name,lon,lat,address,num_bikes_available,num_bikes_disabled,last_reported
0,hub_540,Sanders at Hollywood,-79.926481,43.259903,"26, Sanders Boulevard, University Gardens, Ham...",27,0,2024-03-09 15:06:26
1,hub_545,Emerson at Whitney,-79.922216,43.252142,"202, Emerson Street, University Gardens, Hamil...",16,0,2024-03-09 15:06:26
2,hub_557,Dundurn at Aberdeen,-79.892656,43.251458,"330-346 Dundurn Street South, Hamilton",13,0,2024-03-09 15:06:26
3,hub_571,Oxford at York,-79.877794,43.264564,"40-48 Oxford Street, Hamilton",13,0,2024-03-09 15:06:26
4,hub_554,Macklin at King,-79.896498,43.262167,"768, King Street West, McMaster Innovation Par...",12,0,2024-03-09 15:06:26


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [60]:
#use the method from course work to get the data from the FS API
def get_venues_fs(latitude, longitude, radius, api_key, categories):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. Separate ids with commas
    
    Returns:
        response: response object from the requests library.
    """
    # create the URL
    url = "https://api.foursquare.com/v3/places/search"
    # create the parameters
    params = {
        "ll" : f"{latitude},{longitude}",
        "radius" : radius,
        "categories" : categories,

    }
    # create the headers
    headers = {
        "accept": "application/json",
        "Authorization": api_key
        }
    # make the request
    resp = requests.get(url=url, params=params, headers=headers)
    # return the response
    return resp


In [61]:
#test the method using the bike station data
# get the first 5 bike stations
bike_stations = avaiable_bike_stations
#iterate through the bike stations and get the venues info from the API in json format
for index, row in bike_stations.iterrows():
    # get the response
    res = get_venues_fs(latitude=row['lat'], longitude=row['lon'], radius=1000, api_key=FOURSQUARE_KEY, categories=None)
    # convert the response to json
    res_json = res.json()
    #add the response to the dataframe
    bike_stations.loc[index, 'venues'] = json.dumps(res_json)

    

In [62]:
bike_stations

Unnamed: 0,station_id,name,lon,lat,address,num_bikes_available,num_bikes_disabled,last_reported,venues
0,hub_540,Sanders at Hollywood,-79.926481,43.259903,"26, Sanders Boulevard, University Gardens, Ham...",27,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4c405e7a520fa593a3e6c..."
1,hub_545,Emerson at Whitney,-79.922216,43.252142,"202, Emerson Street, University Gardens, Hamil...",16,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4c05f704517d0f475957f..."
2,hub_557,Dundurn at Aberdeen,-79.892656,43.251458,"330-346 Dundurn Street South, Hamilton",13,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""534c6197498e2a0a588f0..."
3,hub_571,Oxford at York,-79.877794,43.264564,"40-48 Oxford Street, Hamilton",13,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""52e4081c498e41df675c6..."
4,hub_554,Macklin at King,-79.896498,43.262167,"768, King Street West, McMaster Innovation Par...",12,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4ee1174949010b299235f..."
...,...,...,...,...,...,...,...,...,...
133,hub_605,Wentworth at King,-79.849104,43.251990,"16 Wentworth Street South, Hamilton",1,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4cb353ef0cdc721eba892..."
134,hub_5838,King at Sanford,-79.846358,43.251884,"735, King Street East, Hamilton, Ontario",1,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4e7ce02ccc2169aa4d38e..."
135,hub_894,Forsyth at Sterling,-79.915806,43.262714,"88 Forsyth Avenue North, Hamilton",1,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4d21085e4c4f60fcb765e..."
136,hub_5846,Cannon at Steven,-79.849981,43.256461,"400-408 Cannon Street East, Hamilton",1,0,2024-03-09 15:06:26,"{""results"": [{""fsq_id"": ""4c83c848e602b1f7a1a4a..."


In [63]:
# #venues is a nested json object, so we need to flatten it to be able to use it in the analysis
# #grab the first venue from the first bike station
# venues = json.loads(bike_stations['venues'][0])
# #flatten the json object
# flattened_venues = pd.json_normalize(venues['results'])
# flattened_venues.head()

#Loop through every json object in the venues column and flatten it
#add the flattened data to a new dataframe nearby_POIs
#maintain station id for reference
nearby_POIs = pd.DataFrame()
for index, row in bike_stations.iterrows():
    #get the json object
    venues = json.loads(row['venues'])
    #flatten the json object
    flattened_venues = pd.json_normalize(venues['results'])
    #concatenate the data to the nearby_POIs dataframe
    #maintain the station id for reference
    flattened_venues['station_id'] = row['station_id']
    nearby_POIs = pd.concat([nearby_POIs, flattened_venues], ignore_index=True)


In [64]:
#from the nearby_POIs df extract the name data for the POI_category from the categories column
#flatten the categories column
#maintain the station id for reference
categories = pd.json_normalize(nearby_POIs['categories'])
categories['station_id'] = nearby_POIs['station_id']
categories

Unnamed: 0,0,1,2,3,station_id
0,"{'id': 13049, 'name': 'Diner', 'short_name': '...",,,,hub_540
1,"{'id': 13018, 'name': 'Pub', 'short_name': 'Pu...","{'id': 13031, 'name': 'Burger Joint', 'short_n...",,,hub_540
2,"{'id': 13099, 'name': 'Chinese Restaurant', 's...","{'id': 13145, 'name': 'Fast Food Restaurant', ...","{'id': 13177, 'name': 'Greek Restaurant', 'sho...",,hub_540
3,"{'id': 13064, 'name': 'Pizzeria', 'short_name'...",,,,hub_540
4,"{'id': 13029, 'name': 'Brewery', 'short_name':...",,,,hub_540
...,...,...,...,...,...
1370,"{'id': 13035, 'name': 'Coffee Shop', 'short_na...",,,,hub_539
1371,"{'id': 13049, 'name': 'Diner', 'short_name': '...","{'id': 13236, 'name': 'Italian Restaurant', 's...","{'id': 13383, 'name': 'Steakhouse', 'short_nam...",,hub_539
1372,"{'id': 13031, 'name': 'Burger Joint', 'short_n...",,,,hub_539
1373,"{'id': 13065, 'name': 'Restaurant', 'short_nam...",,,,hub_539


In [65]:
#from the categories df extract the name data for the POI_category from the 0 column
#flatten the all columns except the station_id
#maintain the station id for reference
POI_category = pd.json_normalize(categories[0])
POI_category['station_id'] = categories['station_id']
#drop short_name, pluralname, id, icon.prefix, icon.suffix
POI_category = POI_category.drop(columns=['short_name', 'plural_name', 'id', 'icon.prefix', 'icon.suffix'])
POI_category

Unnamed: 0,name,station_id
0,Diner,hub_540
1,Pub,hub_540
2,Chinese Restaurant,hub_540
3,Pizzeria,hub_540
4,Brewery,hub_540
...,...,...
1370,Coffee Shop,hub_539
1371,Diner,hub_539
1372,Burger Joint,hub_539
1373,Restaurant,hub_539


In [66]:
#replace the categories column in the nearby_POIs df with the POI_category df
nearby_POIs['categories'] = POI_category['name']
nearby_POIs.head()

Unnamed: 0,fsq_id,categories,chains,closed_bucket,distance,link,name,timezone,geocodes.main.latitude,geocodes.main.longitude,...,location.address_extended,related_places.children,geocodes.drop_off.latitude,geocodes.drop_off.longitude,geocodes.front_door.latitude,geocodes.front_door.longitude,station_id,related_places.parent.fsq_id,related_places.parent.categories,related_places.parent.name
0,4c405e7a520fa593a3e6c7ac,Diner,[],LikelyOpen,348,/v3/places/4c405e7a520fa593a3e6c7ac,Maple Leaf Pancake House,America/Toronto,43.257771,-79.923534,...,,,,,,,hub_540,,,
1,4b6b5d23f964a520a7032ce3,Pub,[],LikelyOpen,547,/v3/places/4b6b5d23f964a520a7032ce3,Phoenix Bar & Grill,America/Toronto,43.262878,-79.921158,...,,,,,,,hub_540,,,
2,4ba9734cf964a520f3273ae3,Chinese Restaurant,[],VeryLikelyOpen,315,/v3/places/4ba9734cf964a520f3273ae3,Tally Ho Restaurants,America/Toronto,43.257728,-79.924083,...,,,,,,,hub_540,,,
3,4bb832a87421a5937ca4c140,Pizzeria,[{'id': 'd5737460-d890-0132-61d3-7a163eb2a6fc'...,VeryLikelyOpen,308,/v3/places/4bb832a87421a5937ca4c140,Boston Pizza,America/Toronto,43.257231,-79.927615,...,Cottrill St,,,,,,hub_540,,,
4,59035559061b51746eba7df9,Brewery,[],VeryLikelyOpen,546,/v3/places/59035559061b51746eba7df9,Grain & Grit Beer Co,America/Toronto,43.256697,-79.931604,...,,,,,,,hub_540,,,


In [67]:
# drop the columns that are not needed
# fsq_id, chains, link, timezone, related_places.parent.fsq_id, related_places.parent.categories, location.address_extended, 
# related_places.children, geocodes.drop_off.latitude, geocodes.drop_off.longitude
nearby_POIs = nearby_POIs.drop(columns=['fsq_id', 'chains', 'link', 'timezone', 'related_places.parent.fsq_id',
                                        'related_places.parent.categories', 'location.address_extended',
                                        'related_places.children', 'geocodes.drop_off.latitude', 'geocodes.drop_off.longitude'])

In [68]:
nearby_POIs.head()

Unnamed: 0,categories,closed_bucket,distance,name,geocodes.main.latitude,geocodes.main.longitude,geocodes.roof.latitude,geocodes.roof.longitude,location.address,location.country,location.formatted_address,location.locality,location.postcode,location.region,location.cross_street,geocodes.front_door.latitude,geocodes.front_door.longitude,station_id,related_places.parent.name
0,Diner,LikelyOpen,348,Maple Leaf Pancake House,43.257771,-79.923534,43.257771,-79.923534,1520 Main St W,CA,"1520 Main St W, Hamilton ON L8S 1E3",Hamilton,L8S 1E3,ON,,,,hub_540,
1,Pub,LikelyOpen,547,Phoenix Bar & Grill,43.262878,-79.921158,43.262878,-79.921158,1280 Main St W,CA,"1280 Main St W (in The Refectory), Hamilton ON...",Hamilton,L8S 4L8,ON,in The Refectory,,,hub_540,
2,Chinese Restaurant,VeryLikelyOpen,315,Tally Ho Restaurants,43.257728,-79.924083,43.257728,-79.924083,1536 Main St W,CA,"1536 Main St W, Hamilton ON L8S 1E4",Hamilton,L8S 1E4,ON,,,,hub_540,
3,Pizzeria,VeryLikelyOpen,308,Boston Pizza,43.257231,-79.927615,43.257231,-79.927615,1563 Main St W,CA,"1563 Main St W (Cottrill St.), Hamilton ON L8S...",Hamilton,L8S 1E6,ON,Cottrill St.,,,hub_540,
4,Brewery,VeryLikelyOpen,546,Grain & Grit Beer Co,43.256697,-79.931604,43.256697,-79.931604,11 Ewen Rd,CA,"11 Ewen Rd, Hamilton ON L8S 3C3",Hamilton,L8S 3C3,ON,,,,hub_540,


In [69]:
#drop more unneeded columns
# geocodes.main.latitude, geocodes.main.longitude, location.address, location.city, location.country, location.cross_street, 
# related_places.parent.name, location.locality, location.postal_code, location.region, geocodes.front_door.latitude, geocodes.front_door.longitude

nearby_POIs = nearby_POIs.drop(columns=['geocodes.main.latitude', 'geocodes.main.longitude', 'location.address',
                                        'location.country', 'location.cross_street', 'related_places.parent.name', 'location.locality',
                                        'location.postcode', 'location.region', 'geocodes.front_door.latitude', 'geocodes.front_door.longitude'])


Put your parsed results into a DataFrame

In [70]:
nearby_POIs
#save the data to a csv file
nearby_POIs.to_csv('..\\data\\FS_nearby_POIs.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [71]:
#adapt the method used for the FS API
def get_venues_yelp(latitude, longitude, radius, api_key):
    """
    Get venues from yelp with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query
        longitude (float): longitude for query
        api_key (str): foursquare API to use for query
   
    Returns:
        response: response object from the requests library.
    """
    # create the URL
    url = f"https://api.yelp.com/v3/businesses/search?latitude={latitude}&longitude={longitude}&radius={radius}&sort_by=best_match&limit=20"

    # create the headers
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {api_key}"
        }
    
    # make the request
    resp = requests.get(url=url, headers=headers)
    
    # return the response
    return resp

In [72]:
# test the method using the bike station data
# get the first 5 bike stations
bike_stations_yelp = avaiable_bike_stations
#iterate through the bike stations and get the venues info from the API in json format
for index, row in bike_stations_yelp.iterrows():
    # get the response
    res = get_venues_yelp(latitude=row['lat'], longitude=row['lon'], radius=1000, api_key=YELP_KEY)
    # convert the response to json
    res_json = res.json()
    #add the response to the dataframe
    bike_stations_yelp.loc[index, 'venues'] = json.dumps(res_json)

In [73]:
bike_stations_yelp['venues']

0      {"businesses": [{"id": "6q9ODD89p-EntewcOD8J-Q...
1      {"businesses": [{"id": "VUHvOv0gV4S4TbfHK3AmtQ...
2      {"businesses": [{"id": "6F_TjwC8F3OG_q1lGxgMFg...
3      {"businesses": [{"id": "752Fv2jKafftvoS3Twkqyg...
4      {"businesses": [{"id": "o_CBe1ZQs6kj-Aet2qmL5A...
                             ...                        
133    {"businesses": [{"id": "BqkJd5ZI96-kpX0RHx717w...
134    {"businesses": [{"id": "BqkJd5ZI96-kpX0RHx717w...
135    {"businesses": [{"id": "jmcXDIVvsFEq7_OlzUi8qQ...
136    {"businesses": [{"id": "R3vynTtGXFYmsYEHEVQHOQ...
137    {"businesses": [{"id": "6q9ODD89p-EntewcOD8J-Q...
Name: venues, Length: 138, dtype: object

In [74]:
#Loop through every json object in the venues column and flatten it
#add the flattened data to a new dataframe nearby_POIs_yelp
#maintain station id for reference
nearby_POIs_yelp = pd.DataFrame()
for index, row in bike_stations_yelp.iterrows():
    #get the json object
    venues = json.loads(row['venues'])
    #flatten the json object
    flattened_venues = pd.json_normalize(venues['businesses'])
    #concatenate the data to the nearby_POIs dataframe
    #maintain the station id for reference
    flattened_venues['station_id'] = row['station_id']
    nearby_POIs_yelp = pd.concat([nearby_POIs_yelp, flattened_venues], ignore_index=True)

In [75]:
nearby_POIs_yelp.columns

Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'transactions', 'price', 'phone',
       'display_phone', 'distance', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1', 'location.address2',
       'location.address3', 'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address',
       'station_id'],
      dtype='object')

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [76]:
#drop unneeded columns
# id, alias, image_url, url, location.address1, location.address2, location.address3, location.city, location.zip_code
# location.country, location.state, transactions, price,
nearby_POIs_yelp = nearby_POIs_yelp.drop(columns=['id', 'alias', 'image_url', 'url', 'location.address1', 'location.address2',
                                                  'location.address3', 'location.city', 'location.zip_code', 'location.country',
                                                  'location.state', 'transactions', 'price'])

In [77]:
#drop more unneeded columns
# phone
nearby_POIs_yelp = nearby_POIs_yelp.drop(columns=['phone'])

In [78]:
#for each row in the nearby_POIs_yelp df, flatten the categories data from the categories column
#maintain the station id for reference
categories_yelp = pd.json_normalize(nearby_POIs_yelp['categories'])
categories_yelp['station_id'] = nearby_POIs_yelp['station_id']
categories_yelp

Unnamed: 0,0,1,2,station_id
0,"{'alias': 'pizza', 'title': 'Pizza'}","{'alias': 'italian', 'title': 'Italian'}","{'alias': 'chicken_wings', 'title': 'Chicken W...",hub_540
1,"{'alias': 'chinese', 'title': 'Chinese'}",,,hub_540
2,"{'alias': 'pizza', 'title': 'Pizza'}","{'alias': 'italian', 'title': 'Italian'}",,hub_540
3,"{'alias': 'italian', 'title': 'Italian'}",,,hub_540
4,"{'alias': 'burgers', 'title': 'Burgers'}","{'alias': 'hotdogs', 'title': 'Fast Food'}","{'alias': 'sandwiches', 'title': 'Sandwiches'}",hub_540
...,...,...,...,...
2691,"{'alias': 'chicken_wings', 'title': 'Chicken W...",,,hub_539
2692,"{'alias': 'lebanese', 'title': 'Lebanese'}","{'alias': 'mediterranean', 'title': 'Mediterra...",,hub_539
2693,"{'alias': 'icecream', 'title': 'Ice Cream & Fr...","{'alias': 'customcakes', 'title': 'Custom Cakes'}",,hub_539
2694,"{'alias': 'breakfast_brunch', 'title': 'Breakf...",,,hub_539


Put your parsed results into a DataFrame

In [81]:
nearby_POIs_yelp
nearby_POIs_yelp.to_csv('..\\data\\Yelp_nearby_POIs.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

* yelp has more consumer friendly data like ratings, no. of ratings, true or false regarding if its open or not
* yelp also has phone # where FS does not 
* yelp has more results per Bike station hub as well
* Foursquare has more public areas as POIs where Yelp has more consumer services 
* Yelp still displays more information regarding these public areas

Get the top 10 restaurants according to their rating

In [80]:
#display the top 10 entries from nearby_POIs_yelp by rating
top_10_yelp = nearby_POIs_yelp.sort_values(by='rating', ascending=False)