In [1]:
# imports the require libraries
import os
import json
import requests
import pandas as pd
from pandas.io.json import json_normalize

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
fsq_api_key = os.environ["FSQ_API_KEY"]

In [4]:
# Function to get nearby places using Foursquare API
def get_foursquare_location(latitude, longitude):
    gps = f"{latitude},{longitude}"
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    url = 'https://api.foursquare.com/v3/places/search?ll='+ gps +'&radius=1000&fields='+ fields +'&limit=20'

    headers = {
        "Accept": "application/json",
        "Authorization": fsq_api_key
    }

    response = requests.get(url, headers=headers)
    data = response.json()
    return data

In [5]:
# Read bike station data from CSV into a DataFrame
bike_stations_df = pd.read_csv("../data/hamilton_bike_stations.csv")

# Create a dictionary to store the results for each bike station
results_dict = {}

# Iterate through each row in the DataFrame
for index, row in bike_stations_df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    
    # Call the get_Foursquare_location function to retrieve nearby places
    nearby_places = get_foursquare_location(latitude, longitude)
    
    # Store the location data in the results dictionary
    results_dict[row['name']] = nearby_places

# Save the results dictionary as a JSON file
with open('../data/nearby_foursquare_results.json', 'w') as json_file:
    json.dump(results_dict, json_file, indent=4)

print("Results saved to nearby_foursquare_results.json")

Results saved to nearby_foursquare_results.json


In [6]:
# Specify the path to the JSON file
json_file_path = '../data/nearby_foursquare_results.json'

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    foursquare_response = json.load(json_file)

# Print the contents of the JSON file
#print(json.dumps(data, indent=4)) 
print(type(foursquare_response))

<class 'dict'>


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [7]:
# Function that get POI information from the result of foursquare API call for each bike stations
def extract_foursquare_poi(data):
    list_of_dict = []

    for key, value in data.items():
        # extract relevant information and add it into a data dictionary
        for item in value['results']:
            name = item.get('name', 'N/A')

            # Check if 'categories' key exists in the 'item' dictionary
            categories = None
            if 'categories' in item and item['categories']:
                categories = item['categories'][0].get('name', 'N/A')

            latitude = item.get('geocodes', {}).get('main', {}).get('latitude', None)
            longitude = item.get('geocodes', {}).get('main', {}).get('longitude', None)
            distance = item.get('distance', None)
            formatted_address = item.get('location', {}).get('formatted_address', [])
            rating = item.get('rating', None)
            stats = item.get('stats', {})
            total_photos = stats.get('total_photos', None)
            total_ratings = stats.get('total_ratings', None)
            total_tips = stats.get('total_tips', None)

            poi_dict = {
                "bike_station": key,
                "name": name,
                "categories": categories,
                "address": formatted_address,
                "distance": distance,
                "poi_latitude": latitude,
                "poi_longitude": longitude,
                "ratings": rating,
                "total_ratings": total_ratings,
                "total_tips": total_tips,
                "total_photos": total_photos
            }
            # add poi dictionary into our list of dicts
            list_of_dict.append(poi_dict)

    return list_of_dict


Put your parsed results into a DataFrame

In [24]:
foursquare_poi_result = extract_foursquare_poi(foursquare_response)
foursquare_poi_df = pd.DataFrame(foursquare_poi_result)
foursquare_poi_df

Unnamed: 0,bike_station,name,categories,address,distance,poi_latitude,poi_longitude,ratings,total_ratings,total_tips,total_photos
0,Hess at king,Coop Wicked Chicken Hamilton,Fried Chicken Joint,"274 King St W (Hess St), Hamilton ON L8P 1J6",19,43.259271,-79.877373,8.0,9.0,2.0,6.0
1,Hess at king,Coop Hamilton,Fried Chicken Joint,"274 King St W, Hamilton ON L8P 1J6",22,43.259153,-79.877446,7.8,13.0,2.0,7.0
2,Hess at king,Ark + Anchor,Coffee Shop,"300 King St W, Hamilton ON L8P 1B1",81,43.259332,-79.878032,8.1,11.0,2.0,13.0
3,Hess at king,La Luna Restaurant Downtown,Lebanese Restaurant,"306 King St W (at Queen St), Hamilton ON L8P 1B1",111,43.259414,-79.878417,8.6,35.0,15.0,8.0
4,Hess at king,Hess Village,Neighborhood,Hamilton ON,77,43.258514,-79.877755,7.9,18.0,3.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...
2767,Cannon at Ottawa,Rankin's Bar & Grill,Bar,"1342 Main St E, Hamilton ON L8K 1B5",890,43.240773,-79.812412,8.1,16.0,8.0,28.0
2768,Cannon at Ottawa,Tim Hortons,Restaurant,"954 Barton St E, Hamilton ON L8L 3C5",667,43.252714,-79.822252,6.3,0.0,0.0,1.0
2769,Cannon at Ottawa,Pizza Pizza,Pizzeria,"1296 King St E, Hamilton ON L8M 1G8",686,43.243909,-79.824758,6.2,1.0,0.0,2.0
2770,Cannon at Ottawa,Buddys Roadhouse,Restaurant,"1360 King St E, Hamilton ON L8M 1H5",805,43.240965,-79.822316,6.9,9.0,2.0,17.0


In [25]:
foursquare_poi_df['ratings'] = round((foursquare_poi_df['ratings'] / 10) * 5, 1)
foursquare_poi_df

Unnamed: 0,bike_station,name,categories,address,distance,poi_latitude,poi_longitude,ratings,total_ratings,total_tips,total_photos
0,Hess at king,Coop Wicked Chicken Hamilton,Fried Chicken Joint,"274 King St W (Hess St), Hamilton ON L8P 1J6",19,43.259271,-79.877373,4.0,9.0,2.0,6.0
1,Hess at king,Coop Hamilton,Fried Chicken Joint,"274 King St W, Hamilton ON L8P 1J6",22,43.259153,-79.877446,3.9,13.0,2.0,7.0
2,Hess at king,Ark + Anchor,Coffee Shop,"300 King St W, Hamilton ON L8P 1B1",81,43.259332,-79.878032,4.0,11.0,2.0,13.0
3,Hess at king,La Luna Restaurant Downtown,Lebanese Restaurant,"306 King St W (at Queen St), Hamilton ON L8P 1B1",111,43.259414,-79.878417,4.3,35.0,15.0,8.0
4,Hess at king,Hess Village,Neighborhood,Hamilton ON,77,43.258514,-79.877755,4.0,18.0,3.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...
2767,Cannon at Ottawa,Rankin's Bar & Grill,Bar,"1342 Main St E, Hamilton ON L8K 1B5",890,43.240773,-79.812412,4.0,16.0,8.0,28.0
2768,Cannon at Ottawa,Tim Hortons,Restaurant,"954 Barton St E, Hamilton ON L8L 3C5",667,43.252714,-79.822252,3.2,0.0,0.0,1.0
2769,Cannon at Ottawa,Pizza Pizza,Pizzeria,"1296 King St E, Hamilton ON L8M 1G8",686,43.243909,-79.824758,3.1,1.0,0.0,2.0
2770,Cannon at Ottawa,Buddys Roadhouse,Restaurant,"1360 King St E, Hamilton ON L8M 1H5",805,43.240965,-79.822316,3.4,9.0,2.0,17.0


In [26]:
# persist yelp DataFrame to a CSV file on the local disk.
foursquare_poi_df.to_csv('../data/foursquare_poi.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [12]:
yelp_api_key = os.environ["MY_YELP_API_KEY"]

In [35]:
# Function to get nearby places using Yelp API
def get_yelp_locations(latitude, longitude):
    gps_yelp = f"latitude={latitude}&longitude={longitude}"
    radius = 1000  # 1000 meters radius
    url = "https://api.yelp.com/v3/businesses/search?"+ gps_yelp +f"&radius={radius}&categories=&sort_by=best_match&limit=20"

    headers = {
        "accept": "application/json",
        "authorization": f"Bearer {yelp_api_key}"
    }

    response = requests.get(url, headers=headers)
    data = response.json()
    return data

In [36]:
# A dictionary to store the results for each bike station
results_dict = {}

# Iterate through each row in the DataFrame
for index, row in bike_stations_df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    
    # Call the get_location function to retrieve nearby places
    nearby_places = get_yelp_locations(latitude, longitude)
    
    # Store the location data in the results dictionary
    results_dict[row['name']] = nearby_places

# Save the results dictionary as a JSON file
with open('../data/nearby_yelp_results.json', 'w') as json_file:
    json.dump(results_dict, json_file, indent=4)

print("Results saved to nearby_yelp_results.json")

Results saved to nearby_yelp_results.json


In [37]:
# Specify the path to the JSON file
json_file_path = '../data/nearby_yelp_results.json'

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    yelp_response = json.load(json_file)

# Print the contents of the JSON file
#print(json.dumps(yelp_response, indent=4)) 
print(type(yelp_response))

<class 'dict'>


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [38]:
# Function that POI information from the result of yelp API call for each bike stations
def yelp_poi(data):
    list_of_dict = []

    for key, value in data.items():
        # extract relevant information and add it into a data dictionary
        for item in value['businesses']:
            name = item['name']
            # Check if 'categories' key exists in the 'item' dictionary
            if "categories" in item and item['categories']:
                categories = item['categories'][0]['title']
            else:
                categories = None
            latitude = item['coordinates']['latitude']
            longitude = item['coordinates']['longitude']
            distance = item['distance']
            display_address = item['location']['display_address']
            if "rating" in item:
                rating = item['rating']
            else:
                rating = None
            # Check if 'stats' key exists in the 'item' dictionary
            if "review_count" in item:
                review_count = item['review_count']
            else:
                review_count = None

            yelp_poi_dict = {
                    "bike_station": key,
                    "name": name,
                    "categories": categories,
                    "address": display_address,
                    "distance": distance,
                    "poi_latitude": latitude,
                    "poi_longitude": longitude,
                    "ratings": rating,
                    "review_count": review_count,
            }
            # add poi dictionary into our list of dicts
            list_of_dict.append(yelp_poi_dict)
            
    return list_of_dict

In [39]:
# Function that POI information from the result of yelp API call for each bike stations
def extract_yelp_poi_details(data):
    list_of_dict = []

    for key, value in data.items():
        # extract relevant information and add it into a data dictionary
        for item in value['businesses']:
            name = item.get('name', 'N/A')
            
            # Check if 'categories' key exists in the 'item' dictionary
            categories = None
            if 'categories' in item and item['categories']:
                categories = item['categories'][0].get('title', 'N/A')
            
            latitude = item['coordinates'].get('latitude', None)
            longitude = item['coordinates'].get('longitude', None)
            distance = item.get('distance', None)
            display_address = ','.join(item['location'].get('display_address', []))
            rating = item.get('rating', None)
            review_count = item.get('review_count', None)

            yelp_poi_dict = {
                    "bike_station": key,
                    "name": name,
                    "categories": categories,
                    "address": display_address,
                    "distance": distance,
                    "poi_latitude": latitude,
                    "poi_longitude": longitude,
                    "ratings": rating,
                    "review_count": review_count,
            }
            # add poi dictionary into our list of dicts
            list_of_dict.append(yelp_poi_dict)
            
    return list_of_dict


Put your parsed results into a DataFrame

In [40]:
yelp_poi_details = extract_yelp_poi_details(yelp_response)
yelp_poi_df = pd.DataFrame(yelp_poi_details)
yelp_poi_df

Unnamed: 0,bike_station,name,categories,address,distance,poi_latitude,poi_longitude,ratings,review_count
0,Hess at king,La Luna,Middle Eastern,"306 King Street W,Hamilton, ON L8P 1B1,Canada",108.424550,43.259422,-79.878488,4.0,63
1,Hess at king,Hambrgr,Burgers,"49 King William Street,Hamilton, ON L8R 1A2,Ca...",858.672096,43.257210,-79.866900,4.5,202
2,Hess at king,Earth To Table : Bread Bar,Pizza,"258 Locke Street S,Hamilton, ON L8P 4B9,Canada",1052.141521,43.252840,-79.887020,4.0,293
3,Hess at king,The Ship,Seafood,"23 Augusta Street,Hamilton, ON L8N 1P6,Canada",970.855528,43.252150,-79.870000,4.0,208
4,Hess at king,Berkeley North,Bars,"31 King William Street,Hamilton, ON L8R 1A1,Ca...",792.544458,43.257405,-79.867715,4.5,43
...,...,...,...,...,...,...,...,...,...
2801,Cannon at Ottawa,Mr Beast Burger,Burgers,"224 Ottawa Street N,Hamilton, ON L8H 3Z6,Canada",149.392720,43.248686,-79.817039,4.0,1
2802,Cannon at Ottawa,Bernie’s Tavern,Modern European,"1101-1103 Cannon St E,Hamilton, ON L8L 2J5,Canada",293.018582,43.248570,-79.821395,3.5,3
2803,Cannon at Ottawa,The Hearty Hooligan,Cafes,"292 Ottawa Street N,Hamilton, ON L8H 3Z9,Canada",324.046798,43.250241,-79.816466,4.5,7
2804,Cannon at Ottawa,Simply Italian Bakery,Bakeries,"212 Ottawa Street N,Hamilton, ON L8H 3Z6,Canada",116.158933,43.248448,-79.817282,4.0,1


In [41]:
# persist yelp DataFrame to a CSV file on the local disk.
yelp_poi_df.to_csv('../data/yelp_poi.csv', index=False)

# Comparing Results

#### Which API provided you with more complete data? Provide an explanation. 

* Number of POIs: Compared the number of POIs returned by each API for the city of Hamilton. A higher number of POIs might indicate better coverage in terms of the variety of places available in the area.

* Depth of Information: Assessed the completeness of the information provided for each POI. This includes details like name, address, categories, ratings, and user-generated content such as reviews and photos. A more comprehensive set of data for each POI can be an indicator of better quality.

* User Reviews: Analyzed the number and quality of user reviews for each POI. A larger number of reviews and higher ratings might suggest that one API has more engaged users and better coverage in terms of user-generated content.

In [42]:
# Calculate the number of unique POIs
unique_yelp_poi_count = yelp_poi_df['name'].nunique()
unique_fsq_poi_count = foursquare_poi_df['name'].nunique()

print(f"Number of POIs returned by Yelp API: {unique_yelp_poi_count}")
print(f"Number of POIs returned by Foursquare API: {unique_fsq_poi_count}")

Number of POIs returned by Yelp API: 327
Number of POIs returned by Foursquare API: 420


### Depth of Information:
Assessed the completeness of the information provided for each POI. This includes details like name, address, categories, ratings, and user-generated content such as reviews and photos. A more comprehensive set of data for each POI can be an indicator of better quality.

In [43]:
# Check for NaN values in POIs by Yelp
nan_counts = yelp_poi_df.isna().sum()

# Print the counts of NaN values in each column
print("NaN counts in each column:")
print(nan_counts)

NaN counts in each column:
bike_station     0
name             0
categories       0
address          0
distance         0
poi_latitude     0
poi_longitude    0
ratings          0
review_count     0
dtype: int64


In [44]:
# Check for NaN values in POIs by Foursquare
nan_counts = foursquare_poi_df.isna().sum()

# Print the counts of NaN values in each column
print("NaN counts in each column:")
print(nan_counts)

NaN counts in each column:
bike_station       0
name               0
categories        28
address            0
distance           0
poi_latitude       0
poi_longitude      0
ratings          740
total_ratings    740
total_tips       842
total_photos     837
dtype: int64


### User Reviews: 
Analyzed the number and quality of user reviews for each POI. A larger number of reviews and higher ratings might suggest that one API has more engaged users and better coverage in terms of user-generated content.

In [45]:
# Group by 'name' and calculate summary statistics for each POI
yelp_poi_stats = yelp_poi_df.groupby('name').agg({
    'ratings': ['mean', 'max', 'min'],
    'review_count': 'sum'
}).reset_index()

# Rename columns for clarity
yelp_poi_stats.columns = ['name', 'average_rating', 'highest_rating', 'lowest_rating', 'total_review_count']

# Sort by total_review_count in descending order
yelp_poi_stats = yelp_poi_stats.sort_values(by='total_review_count', ascending=False)

# Print the summary statistics for each POI
print("Summary statistics for each POI:")
print(yelp_poi_stats)

Summary statistics for each POI:
                           name  average_rating  highest_rating  \
128                     Hambrgr             4.5             4.5   
287                    The Mule             4.0             4.0   
292                    The Ship             4.0             4.0   
93   Earth To Table : Bread Bar             4.0             4.0   
12                     August 8             4.0             4.0   
..                          ...             ...             ...   
70           Confederation Park             4.0             4.0   
137                  Hargitai's             5.0             5.0   
78                Das Schnitzel             5.0             5.0   
134  Hammerheads Fresh On Locke             5.0             5.0   
217                Pizza Fiamma             5.0             5.0   

     lowest_rating  total_review_count  
128            4.5                8888  
287            4.0                7965  
292            4.0                7696 

In [46]:
# Group by 'name' and calculate summary statistics for each POI
foursquare_poi_stats = foursquare_poi_df.groupby('name').agg({
    'ratings': ['mean', 'max', 'min'],
    'total_ratings': 'sum'
}).reset_index()

# Rename columns for clarity
foursquare_poi_stats.columns = ['name', 'average_rating', 'highest_rating', 'lowest_rating', 'total_ratings_count']

# Sort by total_review_count in descending order
foursquare_poi_stats = foursquare_poi_stats.sort_values(by='total_ratings_count', ascending=False)

# Print the summary statistics for each POI
print("Summary statistics for each POI:")
print(foursquare_poi_stats)

Summary statistics for each POI:
                               name  average_rating  highest_rating  \
130        Earth to Table Bread Bar        4.300000             4.3   
312     Sapporo Japanese Restaurant        3.900000             3.9   
251     Mulberry Street Coffeehouse        4.300000             4.3   
174        Hamilton Farmers' Market        4.300000             4.3   
374                     Tim Hortons        3.468794             4.0   
..                              ...             ...             ...   
66                     Canna Cabana             NaN             NaN   
213                   Laking Garden             NaN             NaN   
212  Lakeside Insurance & Financial             NaN             NaN   
211                    Lake Ontario             NaN             NaN   
419                       pt Health             NaN             NaN   

     lowest_rating  total_ratings_count  
130            4.3               3111.0  
312            3.9            

In [47]:
# Check for NaN values in POIs by Foursquare
nan_counts = foursquare_poi_stats.isna().sum()

# Print the counts of NaN values in each column
print("NaN counts in each column:")
print(nan_counts)

NaN counts in each column:
name                     0
average_rating         270
highest_rating         270
lowest_rating          270
total_ratings_count      0
dtype: int64


## Get the top 10 restaurants according to their rating

In [48]:
# Filter rows where 'categories' contain the word 'Restaurant'
restaurant_df = foursquare_poi_df[foursquare_poi_df['categories'].str.contains('Restaurant', case=False, na=False)]

# Group the DataFrame by 'categories', calculate the mean rating for each category, and sort by rating in descending order
top_10_by_category = restaurant_df.groupby('categories')['ratings'].mean().reset_index().sort_values(by='ratings', ascending=False).head(10)

# Display the top 10 restaurants by category
print(top_10_by_category)

                  categories   ratings
18           Taco Restaurant  4.400000
11       Lebanese Restaurant  4.300000
1         Burrito Restaurant  4.200000
4         Dim Sum Restaurant  4.200000
9          Indian Restaurant  4.200000
0        American Restaurant  4.134783
13  Mediterranean Restaurant  4.000000
8          German Restaurant  4.000000
5       Ethiopian Restaurant  3.900000
20     Vietnamese Restaurant  3.900000


In [51]:
# Filter rows where 'categories' contain the word 'Restaurant'
restaurant_df = yelp_poi_df[yelp_poi_df['categories'].str.contains('Restaurant', case=False, na=False)]

# Group the DataFrame by 'categories', calculate the mean rating for each category, and sort by rating in descending order
top_10_by_category = restaurant_df.groupby('categories')['ratings'].mean().reset_index().sort_values(by='ratings', ascending=False).head(10)

# Display the top 10 restaurants by category
print(top_10_by_category)

Empty DataFrame
Columns: [categories, ratings]
Index: []
