In [1]:
# Import libaries
import pandas as pd
import numpy as np
import requests
import os
import pprint

# Set option to view max 500 columns
pd.set_option('display.max_columns', 500)

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Recall the bike stations latitude and logitude from our saved 'montreal_stations_coordinate.csv' file
montreal_stations_coordinate = pd.read_csv('../data/montreal_stations_coordinate.csv')
montreal_stations_coordinate

Unnamed: 0,cb_latitude,cb_longitude
0,45.617500,-73.606011
1,45.516926,-73.564257
2,45.506176,-73.711186
3,45.512994,-73.682498
4,45.514734,-73.691449
...,...,...
792,45.492913,-73.564688
793,45.545188,-73.576443
794,45.512470,-73.532122
795,45.499666,-73.630291


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
# As Yelp API has a daily limit of 500 per day, we'll take split into two sets of the coordinates at 400 rows for Yelp
montreal_stations_coordinate1 = montreal_stations_coordinate[:400]
montreal_stations_coordinate2 = montreal_stations_coordinate[400:]

In [4]:
# The new Montreal station coordinates dataframes to iterate and make Yelp API call is now 'montreal_stations_coordinate1' and 'montreal_stations_coordinate2'
# Scheduled to run 2 sub-groups in two different days to bypass the Yelp API limit
print("Length of the first set of montreal bike stations coordinates :", len(montreal_stations_coordinate1))
print("Length of the first set of montreal bike stations coordinates :", len(montreal_stations_coordinate2))

Length of the first set of montreal bike stations coordinates : 400
Length of the first set of montreal bike stations coordinates : 397


In [5]:
YELP_KEY1 = os.getenv('yelp_key1')

In [6]:
# Due to the 5000 limit per day, let's test out a small sample of 5 coordinates to figure out the response data structure from Yelp API
montreal_stations_coordinate_s = montreal_stations_coordinate[:5]

# Create a test function to get sample businesses from Yelp API with limit 
def get_yelp_business_sample(latitude, longitude, radius, api_key, categories, limit):

    # Form the url with a limit per coordinate
    yelp_url_limit = 'https://api.yelp.com/v3/businesses/search?' + f"latitude={latitude}&longitude={longitude}" + f"&radius={radius}" + f"&categories={categories}" + f"&limit={limit}"
    
    # Create dictionary for headers, add key with our API KEY
    headers = {"Accept": "application/json",
              "Authorization": f"Bearer {api_key}"}
    
    # Get the response
    yelp_result_sample = requests.get(yelp_url_limit, headers=headers)
    
    return yelp_result_sample.json()

In [7]:
# Create an empty DataFrame to store the sample results from Yelp for bars with 1000m radius from Montreal bike stations, limit=1 for each coordinate
yelp_bars_df_s = pd.DataFrame()

for index, row in montreal_stations_coordinate_s.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    yelp_bar_data_sample = get_yelp_business_sample(latitude=latitude, longitude=longitude, radius=1000, api_key=YELP_KEY1, categories="bars", limit=1)

    # Extract the 'businesses' key from the JSON response and create a DataFrame from it
    yelp_businesses_sample = yelp_bar_data_sample.get('businesses', [])
    yelp_bar_details_sample = pd.DataFrame(yelp_businesses_sample)

    # Add latitude and longitude to yelp_bar_details_sample DataFrame
    yelp_bar_details_sample['yelp_latitude'] = latitude
    yelp_bar_details_sample['yelp_longitude'] = longitude

    # Append it to 'yelp_bars_df_s'
    yelp_bars_df_s = pd.concat([yelp_bars_df_s, yelp_bar_details_sample], ignore_index=True)

In [8]:
yelp_bars_df_s.head()

Unnamed: 0,yelp_latitude,yelp_longitude,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,45.516926,-73.564257,3jKUbhGSjFTv5jZ0wnW0xA,les-deux-gamins-montréal-3,Les Deux Gamins,https://s3-media4.fl.yelpcdn.com/bphoto/MthdGD...,False,https://www.yelp.com/biz/les-deux-gamins-montr...,595.0,"[{'alias': 'french', 'title': 'French'}, {'ali...",4.0,"{'latitude': 45.51545, 'longitude': -73.57095}",[restaurant_reservation],$$,"{'address1': '170 Rue Prince Arthur E', 'addre...",15142883389,+1 514-288-3389,546.848942
1,45.512994,-73.682498,raDMB9FD8yVRT8qXKFMiuA,district-961-saint-laurent,District 961,https://s3-media2.fl.yelpcdn.com/bphoto/uzPrvW...,False,https://www.yelp.com/biz/district-961-saint-la...,9.0,"[{'alias': 'mideastern', 'title': 'Middle East...",4.0,"{'latitude': 45.51040614472581, 'longitude': -...",[],,{'address1': '1814 Boulevard de la Côte-Vertu'...,15143377961,+1 514-337-7961,441.195324
2,45.514734,-73.691449,raDMB9FD8yVRT8qXKFMiuA,district-961-saint-laurent,District 961,https://s3-media2.fl.yelpcdn.com/bphoto/uzPrvW...,False,https://www.yelp.com/biz/district-961-saint-la...,9.0,"[{'alias': 'mideastern', 'title': 'Middle East...",4.0,"{'latitude': 45.51040614472581, 'longitude': -...",[],,{'address1': '1814 Boulevard de la Côte-Vertu'...,15143377961,+1 514-337-7961,602.803151


In [9]:
# The five sample coordinate API call works with the 'businesses' key
# Let's apply it to the two sets of the coordinates at 400 row with an official function

# Create a function to get businesses from Yelp API
def get_yelp_business(latitude, longitude, radius, api_key, categories):

    """
    Get business information from Yelp API based on latitude, longitude, radius, and categories.

    Parameters:
        latitude (float): The latitude coordinate of the location.
        longitude (float): The longitude coordinate of the location.
        radius (int): The search radius in meters.
        api_key (str): Your Yelp API key for authentication.
        categories (str): A comma-separated list of venue categories.
        
    Returns:
        json: JSON data containing the response data from the Yelp API.

    Example:
        response_data = get_yelp_business(latitude=40.7128, longitude=-74.0060, radius=1000, "YOUR_API_KEY", categories="bars")
        # Returns JSON data with business information based on the provided parameters.
    """

    # Form the url with a limit as Yelp API has a daily limit of 500 per day
    yelp_url = 'https://api.yelp.com/v3/businesses/search?' + f"latitude={latitude}&longitude={longitude}" + f"&radius={radius}" + f"&categories={categories}"
    
    # Create dictionary for headers, add key with our API KEY
    headers = {"Accept": "application/json",
              "Authorization": f"Bearer {api_key}"}
    
    # Get the response, check status
    yelp_result = requests.get(yelp_url, headers=headers)
    
    return yelp_result.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [10]:
# Start with the first 400 station coordinates from 'montreal_stations_coordinate1'

# Create an empty DataFrame to store the results from Yelp for bars with 1000m radius from Montreal bike stations
yelp_bars_df1 = pd.DataFrame()

for index, row in montreal_stations_coordinate1.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    yelp_bar_data1 = get_yelp_business(latitude=latitude, longitude=longitude, radius=1000, api_key=YELP_KEY1, categories="bars")
    
    # Extract the 'businesses' key from the JSON response and create a DataFrame from it
    yelp_businesses1 = yelp_bar_data1.get('businesses', [])
    yelp_bar_details1 = pd.DataFrame(yelp_businesses1)

    # Add latitude and longitude to yelp_bar_details1 DataFrame
    yelp_bar_details1['yelp_latitude'] = latitude
    yelp_bar_details1['yelp_longitude'] = longitude

    # Append it to 'yelp_bars_df1'
    yelp_bars_df1 = pd.concat([yelp_bars_df1, yelp_bar_details1], ignore_index=True)

In [11]:
yelp_bars_df1.head()

Unnamed: 0,yelp_latitude,yelp_longitude,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,45.516926,-73.564257,3jKUbhGSjFTv5jZ0wnW0xA,les-deux-gamins-montréal-3,Les Deux Gamins,https://s3-media4.fl.yelpcdn.com/bphoto/MthdGD...,False,https://www.yelp.com/biz/les-deux-gamins-montr...,595.0,"[{'alias': 'french', 'title': 'French'}, {'ali...",4.0,"{'latitude': 45.51545, 'longitude': -73.57095}",[restaurant_reservation],$$,"{'address1': '170 Rue Prince Arthur E', 'addre...",15142883389,+1 514-288-3389,546.848942
1,45.516926,-73.564257,RO32EAy8IfnWAm04vCPmzA,le-4e-mur-montréal,Le 4e Mur,https://s3-media2.fl.yelpcdn.com/bphoto/5FUAUP...,False,https://www.yelp.com/biz/le-4e-mur-montr%C3%A9...,99.0,"[{'alias': 'speakeasies', 'title': 'Speakeasie...",4.5,"{'latitude': 45.5161779, 'longitude': -73.5657...",[],$$,"{'address1': '2021 Rue Saint-Denis', 'address2...",14383968947,+1 438-396-8947,145.030626
2,45.516926,-73.564257,XQDWifQPvL2R_c3ptJ8D2g,randolph-pub-ludique-montréal-6,Randolph Pub Ludique,https://s3-media1.fl.yelpcdn.com/bphoto/z5_6vy...,False,https://www.yelp.com/biz/randolph-pub-ludique-...,96.0,"[{'alias': 'pubs', 'title': 'Pubs'}, {'alias':...",4.5,"{'latitude': 45.5164298, 'longitude': -73.5660...",[],$$,"{'address1': '2041 Rue Saint-Denis', 'address2...",15144195001,+1 514-419-5001,146.938745
3,45.516926,-73.564257,TgjgHz-QRrv60QKYZ-qmMg,les-3-brasseurs-montreal-13,Les 3 Brasseurs,https://s3-media3.fl.yelpcdn.com/bphoto/tPnOF-...,False,https://www.yelp.com/biz/les-3-brasseurs-montr...,101.0,"[{'alias': 'pubs', 'title': 'Pubs'}, {'alias':...",3.5,"{'latitude': 45.5149237507194, 'longitude': -7...",[],$$,"{'address1': '1658 St-Denis', 'address2': '', ...",15148451660,+1 514-845-1660,230.449057
4,45.516926,-73.564257,Adq9HEkjkM5qMppiwa7OLg,cadet-montréal,Cadet,https://s3-media2.fl.yelpcdn.com/bphoto/T60o6H...,False,https://www.yelp.com/biz/cadet-montr%C3%A9al?a...,113.0,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.5,"{'latitude': 45.5103, 'longitude': -73.564418}",[],$$$,"{'address1': '1431 Boulevard Saint-Laurent', '...",15149031631,+1 514-903-1631,736.906512


In [12]:
# Save the dataframe 'yelp_bars_df1' as 'yelp_bars_df1.csv' to archive the first API call data for safety
yelp_bars_df1.to_csv('../data/yelp_bars_df1.csv')

In [13]:
# Due to Yelp's limit of 500 requests per day, we can get creative with another API key for the rest of Montreal station coordinates
YELP_KEY2 = os.getenv('yelp_key2')

In [14]:
# Continue with rest of station coordinates with 'montreal_stations_coordinate2' (398 rows)

# Create an empty DataFrame to store the results from Yelp for bars with 1000m radius from Montreal bike stations
yelp_bars_df2 = pd.DataFrame()

for index, row in montreal_stations_coordinate2.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    yelp_bar_data2 = get_yelp_business(latitude=latitude, longitude=longitude, radius=1000, api_key=YELP_KEY2, categories="bars")
    
    # Extract the 'businesses' key from the JSON response and create a DataFrame from it
    yelp_businesses2 = yelp_bar_data2.get('businesses', [])
    yelp_bar_details2 = pd.DataFrame(yelp_businesses2)

    # Add latitude and longitude to yelp_bar_details1 DataFrame
    yelp_bar_details2['yelp_latitude'] = latitude
    yelp_bar_details2['yelp_longitude'] = longitude

    # Append it to 'yelp_bars_df2'
    yelp_bars_df2 = pd.concat([yelp_bars_df2, yelp_bar_details2], ignore_index=True)

In [15]:
yelp_bars_df2.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,yelp_latitude,yelp_longitude
0,XI3iNUgChOfJk5ppjKVEQQ,barroco-montréal,Barroco,https://s3-media1.fl.yelpcdn.com/bphoto/zhCqnO...,False,https://www.yelp.com/biz/barroco-montr%C3%A9al...,297.0,"[{'alias': 'spanish', 'title': 'Spanish'}, {'a...",4.5,"{'latitude': 45.50138, 'longitude': -73.55592}",[],$$$,"{'address1': '312 Rue Saint-Paul O', 'address2...",15145445800,+1 514-544-5800,248.531957,45.500876,-73.559006
1,FRuuIDOd21pTJHvEGIt1Gg,holder-montréal-2,Holder,https://s3-media2.fl.yelpcdn.com/bphoto/D3rim-...,False,https://www.yelp.com/biz/holder-montr%C3%A9al-...,370.0,"[{'alias': 'french', 'title': 'French'}, {'ali...",4.0,"{'latitude': 45.50047, 'longitude': -73.55734}",[],$$$,"{'address1': '407 Rue McGill', 'address2': '',...",15148490333,+1 514-849-0333,131.494516,45.500876,-73.559006
2,uH4-Esv5-b31q2y89AqyAw,place-carmin-montréal,Place Carmin,https://s3-media2.fl.yelpcdn.com/bphoto/nrOtTW...,False,https://www.yelp.com/biz/place-carmin-montr%C3...,29.0,"[{'alias': 'french', 'title': 'French'}, {'ali...",5.0,"{'latitude': 45.49820816147015, 'longitude': -...",[],,"{'address1': '740 Rue William', 'address2': ''...",15144390740,+1 514-439-0740,316.103787,45.500876,-73.559006
3,t3Rr8xk_9dSdoZm8sfwCXw,pub-saint-pierre-montréal,Pub Saint Pierre,https://s3-media2.fl.yelpcdn.com/bphoto/pSyKAQ...,False,https://www.yelp.com/biz/pub-saint-pierre-mont...,101.0,"[{'alias': 'pubs', 'title': 'Pubs'}]",4.5,"{'latitude': 45.50129, 'longitude': -73.55669}",[],$$,"{'address1': '410 Rue Saint-Pierre', 'address2...",15149855204,+1 514-985-5204,188.127676,45.500876,-73.559006
4,8xCiibfgqy-8xpCWEaguqg,the-keg-steakhouse-bar-montréal-8,The Keg Steakhouse + Bar,https://s3-media4.fl.yelpcdn.com/bphoto/GtlIJS...,False,https://www.yelp.com/biz/the-keg-steakhouse-ba...,310.0,"[{'alias': 'steak', 'title': 'Steakhouses'}, {...",4.0,"{'latitude': 45.5007523, 'longitude': -73.5694...",[],$$$,"{'address1': '5 Place Ville Marie', 'address2'...",15148681999,+1 514-868-1999,817.773376,45.500876,-73.559006


In [16]:
# Save the dataframe 'yelp_bars_df2' as 'yelp_bars_df2.csv' to archive the second API call data for safety
yelp_bars_df2.to_csv('../data/yelp_bars_df2.csv')

In [17]:
# Check the shape of the two API results before merging into one dataframe
print(yelp_bars_df1.shape)
print(yelp_bars_df2.shape)

(4257, 18)
(4804, 18)


In [18]:
# The columns of 'yelp_bars_df1'
yelp_bars_df1.columns

Index(['yelp_latitude', 'yelp_longitude', 'id', 'alias', 'name', 'image_url',
       'is_closed', 'url', 'review_count', 'categories', 'rating',
       'coordinates', 'transactions', 'price', 'location', 'phone',
       'display_phone', 'distance'],
      dtype='object')

In [19]:
# The columns of 'yelp_bars_df2' doesn't have the same order as yelp_bars_df2
yelp_bars_df2.columns

Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'coordinates', 'transactions', 'price',
       'location', 'phone', 'display_phone', 'distance', 'yelp_latitude',
       'yelp_longitude'],
      dtype='object')

In [20]:
# Change the order of columns of 'yelp_bars_df1' to match with yelp_bars_df2
new_column_order = ['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
                    'categories', 'rating', 'coordinates', 'transactions', 'price',
                    'location', 'phone', 'display_phone', 'distance', 'yelp_latitude','yelp_longitude']

yelp_bars_df1 = yelp_bars_df1[new_column_order]

# Check again if columns orders of the two dataframe match

yelp_bars_df1.columns == yelp_bars_df2.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True])

In [21]:
# Concatenate the two results into the 'merged_yelp_bars_df' dataframe
merged_yelp_bars_df = pd.concat([yelp_bars_df1, yelp_bars_df2], ignore_index=True)
merged_yelp_bars_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9061 entries, 0 to 9060
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              9061 non-null   object 
 1   alias           9061 non-null   object 
 2   name            9061 non-null   object 
 3   image_url       9061 non-null   object 
 4   is_closed       9061 non-null   object 
 5   url             9061 non-null   object 
 6   review_count    9061 non-null   float64
 7   categories      9061 non-null   object 
 8   rating          9061 non-null   float64
 9   coordinates     9061 non-null   object 
 10  transactions    9061 non-null   object 
 11  price           6480 non-null   object 
 12  location        9061 non-null   object 
 13  phone           9061 non-null   object 
 14  display_phone   9061 non-null   object 
 15  distance        9061 non-null   float64
 16  yelp_latitude   9061 non-null   float64
 17  yelp_longitude  9061 non-null   f

In [22]:
# Convert into 'merged_yelp_bars_df.csv' file for saving
merged_yelp_bars_df.to_csv('../data/merged_yelp_bars_df.csv')

In [23]:
# This master Yelp dataframe provides rating for bars whereas Foursquare doesn't
# Check if bar 'id' is duplicated
yelp_dup = merged_yelp_bars_df[merged_yelp_bars_df[['id']].duplicated() == True].sort_values('id')
yelp_dup

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,yelp_latitude,yelp_longitude
2482,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27.0,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,632.565041,45.532265,-73.611063
4831,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27.0,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,1312.110957,45.543102,-73.616075
2584,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27.0,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,658.414955,45.534099,-73.595535
2649,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27.0,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,232.910698,45.533348,-73.605834
8355,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27.0,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,1218.778274,45.528092,-73.591691
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4453,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26.0,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,323.396473,45.505274,-73.567479
788,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26.0,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,287.477583,45.502060,-73.562950
5294,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26.0,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,287.111988,45.502940,-73.560640
5614,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26.0,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,315.857762,45.507078,-73.561395


Those duplicated id and name rows can't be eliminated yet. While the majority of columns look the same, we used coordinates of the bike stations in Montreal to look up bars nearby. We'll revisit this issue in part 3.

Put your parsed results into a DataFrame

In [24]:
merged_yelp_bars_df.columns

Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'coordinates', 'transactions', 'price',
       'location', 'phone', 'display_phone', 'distance', 'yelp_latitude',
       'yelp_longitude'],
      dtype='object')

In [25]:
# Initialize lists to only select data from some meaningful columns: name, zip_code, category, review_count, rating, price, distance
name = []
zip_code = []
category = []
review_count = []
rating = []
price = []
distance = []
latitude = []
longitude =[]

# Extract data from 'result_df'
for index, record in merged_yelp_bars_df.iterrows():
    name.append(record['name'])

    location = record['location']
    zip_code.append(location['zip_code'])

    # Extract the category alias with 'Bars' from the categories title. Assign 'Bars' in general if not found
    categories = record['categories']
    bar_categories = [category_info['title'] for category_info in categories if 'Bars' in category_info['title']]
    category.append(bar_categories[0] if bar_categories else 'Bars')

    review_count.append(record['review_count'])
    rating.append(record['rating'])
    price.append(record['price'])

    distance.append(record['distance'])
    latitude.append(record['yelp_latitude'])
    longitude.append(record['yelp_longitude'])

# Create a DataFrame from the extracted data
yelp_bar_features_df = pd.DataFrame({
    'name': name,
    'postcode': zip_code,
    'category': category,
    'review_count': review_count,
    'rating': rating,
    'price': price,
    'distance': distance,
    'latitude': latitude,
    'longitude': longitude
})

# Print the head of the DataFrame
yelp_bar_features_df.head(5)

Unnamed: 0,name,postcode,category,review_count,rating,price,distance,latitude,longitude
0,Les Deux Gamins,H2X 1B7,Wine Bars,595.0,4.0,$$,546.848942,45.516926,-73.564257
1,Le 4e Mur,H2X 3K8,Bars,99.0,4.5,$$,145.030626,45.516926,-73.564257
2,Randolph Pub Ludique,H2X 3K8,Bars,96.0,4.5,$$,146.938745,45.516926,-73.564257
3,Les 3 Brasseurs,H2X 3K6,Bars,101.0,3.5,$$,230.449057,45.516926,-73.564257
4,Cadet,H2X 2S8,Bars,113.0,4.5,$$$,736.906512,45.516926,-73.564257


In [26]:
merged_yelp_bars_df.shape

(9061, 18)

In [27]:
# Save the Yelp's Montreal bar result with desired columns to 'yelp_bar_features.csv'
yelp_bar_features_df.to_csv('../data/yelp_bar_features.csv', index=False)

# Comparing API Results

**Which API provided you with more complete data? Provide an explanation.**

Comparing responses from Foursquare and Yelp API, Yelp has a slightly better edge in having more coverage and more data; regarding Montreal bar places within a kilometer away from the shared bike stations:

* **Foursquare**: 5897 entries with 10 columns initially received from API with modest information about the place (we added 2 extra columns: latitude and longitude). But they have geographical indicators specific to the place (like geocodes, timezone), as well as connected places (chains, related places - but mostly empty in this dataset).
* **Yelp**: 9061 entries with 16 columns initially received from API (we added 2 extra columns: latitude and longitude) with incredible indicators such as rating, review counts, prices compared to Foursquare's.

However, in terms of data quality: The catgory classification of Foursquare is more complete. We noticed that distance values over 1000 meters from both Foursquare and Yelp API responses.

**Get the top 10 restaurants or bars according to their rating**

In [28]:
yelp_bar_features_df.sort_values(by='rating', ascending=False).head(10)

Unnamed: 0,name,postcode,category,review_count,rating,price,distance,latitude,longitude
7345,Chez Ernest,H2S 2M3,Bars,2.0,5.0,,726.839188,45.54151,-73.598421
6170,Tabac Villeray,H2E 1G6,Bars,1.0,5.0,,1141.83346,45.540951,-73.626375
4922,Caffè Roma,H2E 1H5,Bars,3.0,5.0,$,847.091418,45.544819,-73.62151
4924,Tabac Villeray,H2E 1G6,Bars,1.0,5.0,,568.70829,45.544819,-73.62151
3680,Cafe Del Monko,H4C 2C7,Bars,1.0,5.0,$,1245.435046,45.477605,-73.573775
3677,Shushuto,H3K 1B9,Tapas Bars,2.0,5.0,,263.046936,45.477605,-73.573775
3676,La Canting,H3K 0B3,Wine Bars,8.0,5.0,,1138.584784,45.477605,-73.573775
4926,Le Système,H2S 2N1,Bars,1.0,5.0,,846.762294,45.544819,-73.62151
6168,Cafécoquetel,H2R 1L4,Wine Bars,1.0,5.0,,510.610362,45.540951,-73.626375
2510,Le Système,H2S 2N1,Bars,1.0,5.0,,1316.016073,45.549121,-73.600874
