In [1]:
# imports the require libraries
import os
import json
import requests
import pandas as pd
from pandas.io.json import json_normalize

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
fsq_api_key = os.environ["MY_FOURSQUARE_API_KEY"]

In [3]:
# Function to get nearby places using Foursquare API
def get_location(latitude, longitude):
    gps = f"{latitude},{longitude}"
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    url = 'https://api.foursquare.com/v3/places/search?ll='+ gps +'&radius=1000&fields='+ fields +'&limit=25'

    headers = {
        "Accept": "application/json",
        "Authorization": fsq_api_key
    }

    response = requests.get(url, headers=headers)
    data = response.json()
    return data

In [10]:
# Testing my Foursquare API request
results = get_location(43.259126, -79.877212)

#print(results['results'])
#results_norm = pd.json_normalize(results)
#results_norm

In [11]:
len(results['results'])   

50

In [4]:
# Read bike station data from CSV into a DataFrame
bike_stations_df = pd.read_csv("../data/hamilton_bike_stations.csv")
bike_stations_df

Unnamed: 0,name,latitude,longitude,number_of_bikes
0,Hess at king,43.259126,-79.877212,12
1,Bayfront Park,43.269288,-79.871327,35
2,Bay at Strachan,43.267859,-79.867923,25
3,Bay at Mulberry,43.263198,-79.871803,13
4,City Hall,43.256132,-79.874499,17
...,...,...,...,...
140,Van Wagner's Beach,43.260008,-79.766388,11
141,King William at James,43.257635,-79.868487,6
142,Westdale Village - Paisley,43.261712,-79.905901,11
143,Westdale Aviary,43.264869,-79.907480,8


In [5]:
# Create a dictionary to store the results for each bike station
results_dict = {}

# Iterate through each row in the DataFrame
for index, row in bike_stations_df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    
    # Call the get_location function to retrieve nearby places
    location_data = get_location(latitude, longitude)
    
    # Store the location data in the results dictionary
    results_dict[row['name']] = location_data

# Save the results dictionary as a JSON file
with open('../data/nearby_location_results.json', 'w') as json_file:
    json.dump(results_dict, json_file, indent=4)

print("Results saved to nearby_location_results.json")

Results saved to nearby_location_results.json


In [6]:
# Specify the path to the JSON file
json_file_path = '../data/nearby_location_results.json'

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    fourSquare_response = json.load(json_file)

# Print the contents of the JSON file
#print(json.dumps(data, indent=4)) 
print(type(fourSquare_response))

<class 'dict'>


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [7]:
# Function that POI information from the result of foursquare API call for each bike stations
def get_poi_info(data):
    list_of_dict = []

    for key, value in data.items():
        # extract relevant information and add it into a data dictionary
        for item in value['results']:
            name = item['name']
            # Check if 'categories' key exists in the 'item' dictionary
            if "categories" in item and item['categories']:
                categories = item['categories'][0]['name']
            else:
                categories = None
            latitude = item['geocodes']['main']['latitude']
            longitude = item['geocodes']['main']['longitude']
            distance = item['distance']
            formatted_address = item['location']['formatted_address']
            if "rating" in item:
                rating = item['rating']
            else:
                rating = None
            # Check if 'stats' key exists in the 'item' dictionary
            if 'stats' in item:
                total_photos = item['stats'].get('total_photos', None)
                total_ratings = item['stats'].get('total_ratings', None)
                total_tips = item['stats'].get('total_tips', None)
            else:
                total_photos = None
                total_ratings = None
                total_tips = None

            poi_dict = {
                "bike_station": key,
                "name": name,
                "categories": categories,
                "address": formatted_address,
                "distance": distance,
                "poi_latitude": latitude,
                "poi_longitude": longitude,
                "ratings": rating,
                "total_ratings": total_ratings,
                "total_tips": total_tips,
                "total_photos": total_photos
            }
            # add poi dictionary into our list of dicts
            list_of_dict.append(poi_dict)

    return list_of_dict


Put your parsed results into a DataFrame

In [8]:
poi_result = get_poi_info(fourSquare_response)
poi_result_df = pd.DataFrame(poi_result)
poi_result_df

Unnamed: 0,bike_station,name,categories,address,distance,poi_latitude,poi_longitude,ratings,total_ratings,total_tips,total_photos
0,Hess at king,Coop Wicked Chicken Hamilton,Fried Chicken Joint,"274 King St W (Hess St), Hamilton ON L8P 1J6",19,43.259271,-79.877373,8.0,9.0,2.0,6.0
1,Hess at king,Coop Hamilton,Fried Chicken Joint,"274 King St W, Hamilton ON L8P 1J6",22,43.259153,-79.877446,7.8,13.0,2.0,7.0
2,Hess at king,Ark + Anchor,Coffee Shop,"300 King St W, Hamilton ON L8P 1B1",81,43.259332,-79.878032,8.1,11.0,2.0,13.0
3,Hess at king,La Luna Restaurant Downtown,Lebanese Restaurant,"306 King St W (at Queen St), Hamilton ON L8P 1B1",111,43.259414,-79.878417,8.6,35.0,15.0,8.0
4,Hess at king,Hess Village,Neighborhood,Hamilton ON,77,43.258514,-79.877755,7.9,18.0,3.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...
3355,Cannon at Ottawa,A l Choppers,Automotive Service,"1119 Main St E, Hamilton ON L8M 1N7",616,43.243484,-79.823018,,,,
3356,Cannon at Ottawa,Hi-Line Centre Pizza & Wings,Pizzeria,"1150 King St E (Gage), Hamilton ON L8M 1E8",859,43.247646,-79.828538,6.9,8.0,4.0,4.0
3357,Cannon at Ottawa,Triple H Restorations Inc,Home Improvement Service,"970 Barton St E (Belview), Hamilton ON L8L 3C7",645,43.252521,-79.821848,,,,
3358,Cannon at Ottawa,Michaels,Arts and Crafts Store,"1267 Barton St E (Kenilworth Ave. N.), Hamilto...",914,43.252288,-79.809096,7.1,12.0,2.0,6.0


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [19]:
yelp_api_key = os.environ["YELP_API_KEY"]

In [36]:
# Function to get nearby places using Yelp API
def get_Yelp_POIs(latitude, longitude):
    gps_yelp = f"latitude={latitude}&longitude={longitude}"
    radius = 1000  # 1000 meters radius
    url = "https://api.yelp.com/v3/businesses/search?"+ gps_yelp +f"&radius={radius}&categories=&sort_by=best_match&limit=25"

    headers = {
        "accept": "application/json",
        "authorization": f"Bearer {yelp_api_key}"
    }

    response = requests.get(url, headers=headers)
    data = response.json()
    return data

In [37]:
# Create a dictionary to store the results for each bike station
results_dict = {}

# Iterate through each row in the DataFrame
for index, row in bike_stations_df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    
    # Call the get_location function to retrieve nearby places
    location_data = get_Yelp_POIs(latitude, longitude)
    
    # Store the location data in the results dictionary
    results_dict[row['name']] = location_data

# Save the results dictionary as a JSON file
with open('../data/nearby_yelp_results.json', 'w') as json_file:
    json.dump(results_dict, json_file, indent=4)

print("Results saved to nearby_location_results.json")

Results saved to nearby_location_results.json


In [38]:
# Specify the path to the JSON file
json_file_path = '../data/nearby_yelp_results.json'

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    yelp_response = json.load(json_file)

# Print the contents of the JSON file
#print(json.dumps(yelp_response, indent=4)) 
print(type(yelp_response))

<class 'dict'>


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [44]:
# Function that POI information from the result of yelp API call for each bike stations
def get_yelp_poi(data):
    list_of_dict = []

    for key, value in data.items():
        # extract relevant information and add it into a data dictionary
        for item in value['businesses']:
            name = item['name']
            # Check if 'categories' key exists in the 'item' dictionary
            if "categories" in item and item['categories']:
                categories = item['categories'][0]['title']
            else:
                categories = None
            latitude = item['coordinates']['latitude']
            longitude = item['coordinates']['longitude']
            distance = item['distance']
            display_address = item['location']['display_address']
            if "rating" in item:
                rating = item['rating']
            else:
                rating = None
            # Check if 'stats' key exists in the 'item' dictionary
            if "review_count" in item:
                review_count = item['review_count']
            else:
                review_count = None

            yelp_poi_dict = {
                    "bike_station": key,
                    "name": name,
                    "categories": categories,
                    "address": display_address,
                    "distance": distance,
                    "poi_latitude": latitude,
                    "poi_longitude": longitude,
                    "ratings": rating,
                    "review_count": review_count,
                }
            # add poi dictionary into our list of dicts
            list_of_dict.append(yelp_poi_dict)

    return list_of_dict

Put your parsed results into a DataFrame

In [45]:
yelp_poi_result = get_yelp_poi(yelp_response)
yelp_poi_result_df = pd.DataFrame(yelp_poi_result)
yelp_poi_result_df

Unnamed: 0,bike_station,name,categories,address,distance,poi_latitude,poi_longitude,ratings,review_count
0,Hess at king,La Luna,Middle Eastern,"[306 King Street W, Hamilton, ON L8P 1B1, Canada]",108.424550,43.259422,-79.878488,4.0,63
1,Hess at king,Earth To Table : Bread Bar,Pizza,"[258 Locke Street S, Hamilton, ON L8P 4B9, Can...",1052.141521,43.252840,-79.887020,4.0,293
2,Hess at king,Hambrgr,Burgers,"[49 King William Street, Hamilton, ON L8R 1A2,...",858.672096,43.257210,-79.866900,4.5,202
3,Hess at king,The Ship,Seafood,"[23 Augusta Street, Hamilton, ON L8N 1P6, Canada]",970.855528,43.252150,-79.870000,4.0,208
4,Hess at king,The Coop,Sandwiches,"[274-274 King Street W, Hamilton, ON L8P 1J6, ...",18.431317,43.259134,-79.877439,4.0,43
...,...,...,...,...,...,...,...,...,...
3474,Cannon at Ottawa,Bernie’s Tavern,Modern European,"[1101-1103 Cannon St E, Hamilton, ON L8L 2J5, ...",293.018582,43.248570,-79.821395,3.5,3
3475,Cannon at Ottawa,Food Box,Burgers,"[180 Kenilworth Avenue N, Hamilton, ON L8H 4S1...",893.883812,43.245670,-79.807325,4.0,10
3476,Cannon at Ottawa,Quality Bakery,Bakeries,"[150 Ottawa Street N, Hamilton, ON L8H 3Z3, Ca...",99.913882,43.246710,-79.817730,1.0,2
3477,Cannon at Ottawa,Gage Park Diner,Diners,"[975 Main Street E, Hamilton, ON L8M 1N2, Canada]",951.839988,43.245060,-79.829290,4.0,7


# Comparing Results

## Which API provided you with more complete data? Provide an explanation. 

* Number of POIs: Compared the number of POIs returned by each API for the city of Hamilton. A higher number of POIs might indicate better coverage in terms of the variety of places available in the area.

* Depth of Information: Assessed the completeness of the information provided for each POI. This includes details like name, address, categories, ratings, and user-generated content such as reviews and photos. A more comprehensive set of data for each POI can be an indicator of better quality.

* User Reviews: Analyzed the number and quality of user reviews for each POI. A larger number of reviews and higher ratings might suggest that one API has more engaged users and better coverage in terms of user-generated content.

In [48]:
# Calculate the number of unique POIs
unique_yelp_poi_count = yelp_poi_result_df['name'].nunique()
unique_fsq_poi_count = poi_result_df['name'].nunique()

print(f"Number of POIs returned by Yelp API: {unique_yelp_poi_count}")
print(f"Number of POIs returned by Foursquare API: {unique_fsq_poi_count}")

Number of POIs returned by Yelp API: 369
Number of POIs returned by Foursquare API: 502


### Depth of Information:
Assessed the completeness of the information provided for each POI. This includes details like name, address, categories, ratings, and user-generated content such as reviews and photos. A more comprehensive set of data for each POI can be an indicator of better quality.

In [50]:
# Define the list of required attributes for completeness assessment
required_attributes = ["name", "categories", "address", "distance", "poi_latitude", "poi_longitude", "ratings", "review_count"]

# Assess the completeness for each row in the DataFrame
completeness_scores = []

for index, row in yelp_poi_result_df.iterrows():
    completeness_score = sum(1 for attr in required_attributes if row[attr] is not None)
    completeness_scores.append(completeness_score)

# Add a completeness score column to the DataFrame
yelp_poi_result_df['completeness_score'] = completeness_scores

# Print the DataFrame with completeness scores
print(yelp_poi_result_df)

          bike_station                        name          categories  \
0         Hess at king                     La Luna      Middle Eastern   
1         Hess at king  Earth To Table : Bread Bar               Pizza   
2         Hess at king                     Hambrgr             Burgers   
3         Hess at king                    The Ship             Seafood   
4         Hess at king                    The Coop          Sandwiches   
...                ...                         ...                 ...   
3474  Cannon at Ottawa             Bernie’s Tavern     Modern European   
3475  Cannon at Ottawa                    Food Box             Burgers   
3476  Cannon at Ottawa              Quality Bakery            Bakeries   
3477  Cannon at Ottawa             Gage Park Diner              Diners   
3478  Cannon at Ottawa                Sunset Grill  Breakfast & Brunch   

                                                address     distance  \
0     [306 King Street W, Hamilton, ON 

In [55]:
# Define the list of required attributes for completeness assessment
required_attributes = ["name", "categories", "address", "distance", "poi_latitude", "poi_longitude", "ratings", "total_ratings", "total_tips", "total_photos"]

# Assess the completeness for each row in the DataFrame
completeness_scores = []

for index, row in poi_result_df.iterrows():
    completeness_score = sum(1 for attr in required_attributes if row[attr] is not None)
    completeness_scores.append(completeness_score)

# Add a completeness score column to the DataFrame
poi_result_df['completeness_score'] = completeness_scores

# Print the DataFrame with completeness scores
print(poi_result_df)

          bike_station                          name  \
0         Hess at king  Coop Wicked Chicken Hamilton   
1         Hess at king                 Coop Hamilton   
2         Hess at king                  Ark + Anchor   
3         Hess at king   La Luna Restaurant Downtown   
4         Hess at king                  Hess Village   
...                ...                           ...   
3355  Cannon at Ottawa                  A l Choppers   
3356  Cannon at Ottawa  Hi-Line Centre Pizza & Wings   
3357  Cannon at Ottawa     Triple H Restorations Inc   
3358  Cannon at Ottawa                      Michaels   
3359  Cannon at Ottawa                    McDonald's   

                    categories  \
0          Fried Chicken Joint   
1          Fried Chicken Joint   
2                  Coffee Shop   
3          Lebanese Restaurant   
4                 Neighborhood   
...                        ...   
3355        Automotive Service   
3356                  Pizzeria   
3357  Home Improvemen

In [54]:
# Check for NaN values in POIs by Yelp
nan_counts = yelp_poi_result_df.isna().sum()

# Print the counts of NaN values in each column
print("NaN counts in each column:")
print(nan_counts)

NaN counts in each column:
bike_station          0
name                  0
categories            0
address               0
distance              0
poi_latitude          0
poi_longitude         0
ratings               0
review_count          0
completeness_score    0
dtype: int64


In [56]:
# Check for NaN values in POIs by Foursquare
nan_counts = poi_result_df.isna().sum()

# Print the counts of NaN values in each column
print("NaN counts in each column:")
print(nan_counts)

NaN counts in each column:
bike_station             0
name                     0
categories              20
address                  0
distance                 0
poi_latitude             0
poi_longitude            0
ratings               1124
total_ratings         1124
total_tips            1223
total_photos          1247
completeness_score       0
dtype: int64


### User Reviews: 
Analyzed the number and quality of user reviews for each POI. A larger number of reviews and higher ratings might suggest that one API has more engaged users and better coverage in terms of user-generated content.

In [58]:
# Group by 'name' and calculate summary statistics for each POI
yelp_poi_stats = yelp_poi_result_df.groupby('name').agg({
    'ratings': ['mean', 'max', 'min'],
    'review_count': 'sum'
}).reset_index()

# Rename columns for clarity
yelp_poi_stats.columns = ['name', 'average_rating', 'highest_rating', 'lowest_rating', 'total_review_count']

# Sort by total_review_count in descending order
yelp_poi_stats = yelp_poi_stats.sort_values(by='total_review_count', ascending=False)

# Print the summary statistics for each POI
print("Summary statistics for each POI:")
print(yelp_poi_stats)

Summary statistics for each POI:
                           name  average_rating  highest_rating  \
137                     Hambrgr             4.5             4.5   
326                    The Mule             4.0             4.0   
331                    The Ship             4.0             4.0   
99   Earth To Table : Bread Bar             4.0             4.0   
15                     August 8             4.0             4.0   
..                          ...             ...             ...   
182                 Lucky’s BBQ             5.0             5.0   
269          Rock Garden Bistro             4.0             4.0   
348           Van Wagners Beach             4.0             4.0   
325              The Mighty Cob             5.0             5.0   
38            Botanical Gardens             5.0             5.0   

     lowest_rating  total_review_count  
137            4.5                8888  
326            4.0                7965  
331            4.0                7696 

In [59]:
# Group by 'name' and calculate summary statistics for each POI
fourSquare_poi_stats = poi_result_df.groupby('name').agg({
    'ratings': ['mean', 'max', 'min'],
    'total_ratings': 'sum'
}).reset_index()

# Rename columns for clarity
fourSquare_poi_stats.columns = ['name', 'average_rating', 'highest_rating', 'lowest_rating', 'total_ratings_count']

# Sort by total_review_count in descending order
fourSquare_poi_stats = fourSquare_poi_stats.sort_values(by='total_ratings_count', ascending=False)

# Print the summary statistics for each POI
print("Summary statistics for each POI:")
print(fourSquare_poi_stats)

Summary statistics for each POI:
                            name  average_rating  highest_rating  \
381  Sapporo Japanese Restaurant             7.8             7.8   
314  Mulberry Street Coffeehouse             8.3             8.3   
162     Earth to Table Bread Bar             8.6             8.6   
216     Hamilton Farmers' Market             8.8             8.8   
445                     The Ship             8.3             8.3   
..                           ...             ...             ...   
194        Genuine's Bread House             NaN             NaN   
192                  Galley Pump             NaN             NaN   
190     Friendly Stranger Dundas             NaN             NaN   
189                      Freshii             NaN             NaN   
501                    pt Health             NaN             NaN   

     lowest_rating  total_ratings_count  
381            7.8               3286.0  
314            8.3               3146.0  
162            8.6      

In [60]:
# Check for NaN values in POIs by Foursquare
nan_counts = fourSquare_poi_stats.isna().sum()

# Print the counts of NaN values in each column
print("NaN counts in each column:")
print(nan_counts)

NaN counts in each column:
name                     0
average_rating         350
highest_rating         350
lowest_rating          350
total_ratings_count      0
dtype: int64


## Get the top 10 restaurants according to their rating

In [63]:
# For POIs by Foursquare

# Sort the DataFrame by 'ratings' column in descending order and get the top 10 restaurants
top_10_restaurants = poi_result_df.sort_values(by='ratings', ascending=False).head(10)

# Print the top 10 restaurants
print("Top 10 restaurants according to their rating:")
#print(top_10_restaurants)
print(top_10_restaurants.to_string(index=False))

Top 10 restaurants according to their rating:
             bike_station                    name   categories                                                      address  distance  poi_latitude  poi_longitude  ratings  total_ratings  total_tips  total_photos  completeness_score
      Cootes Drive Dundas Heirlooms Bridal Shoppe Bridal Store              19 King St W (at Ogilvie St), Dundas ON L9H 1T5       529     43.266273     -79.954526      9.1           35.0        20.0          10.0                  10
  Cootes at York and King Heirlooms Bridal Shoppe Bridal Store              19 King St W (at Ogilvie St), Dundas ON L9H 1T5       368     43.266273     -79.954526      9.1           35.0        20.0          10.0                  10
          Discovery Drive Collective Arts Brewing      Brewery 207 Burlington St E (Wellington St. N.), Hamilton ON L8L 4H2       863     43.270881     -79.852995      9.1           56.0        13.0         232.0                  10
           Eastwood Ar

In [64]:
# For POIs by Foursquare

# Sort the DataFrame by 'ratings' column in descending order and get the top 10 restaurants
top_10_restaurants = yelp_poi_result_df.sort_values(by='ratings', ascending=False).head(10)

# Print the top 10 restaurants
print("Top 10 restaurants according to their rating:")
#print(top_10_restaurants)
print(top_10_restaurants.to_string(index=False))

Top 10 restaurants according to their rating:
       bike_station                name   categories                                                    address     distance  poi_latitude  poi_longitude  ratings  review_count  completeness_score
Sterling at Whitton       Pho Excellent   Vietnamese         [1120 Main Street W, Hamilton, ON L8S 1C1, Canada]   479.997890     43.258204     -79.910846      5.0             1                   8
    Main at Dundurn               Tomah       Syrian         [132 Queen Street S, Hamilton, ON L8P 3S2, Canada]   838.771294     43.255460     -79.880640      5.0             7                   8
        270 Sherman Burger Bus Hamilton      Burgers       [126 Sanford Avenue N, Hamilton, ON L8L 5Z3, Canada]   975.508773     43.255527     -79.844512      5.0             1                   8
        270 Sherman  Mikey's Cream Pies     Bakeries     [775 Barton Street East, Hamilton, ON L8L 3A9, Canada]   599.443153     43.255447     -79.831588      5.0    