In [1]:
# Import libaries
import pandas as pd
import numpy as np
import requests
import os
import pprint

# Set option to view max 500 columns
pd.set_option('display.max_columns', 500)

In [2]:
# Recall the bike stations latitude and logitude from our saved 'montreal_stations_coordinate.csv' file
montreal_stations_coordinate = pd.read_csv('../data/montreal_stations_coordinate.csv')
montreal_stations_coordinate

Unnamed: 0,station_latitude,station_longitude
0,45.617500,-73.606011
1,45.516926,-73.564257
2,45.506176,-73.711186
3,45.512994,-73.682498
4,45.514734,-73.691449
...,...,...
793,45.533815,-73.578905
794,45.509911,-73.563806
795,45.497605,-73.555350
796,45.501188,-73.570455


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
# As Yelp API has a daily limit of 500 per day, we'll take split into two sets of the coordinates at 400 rows for Yelp
montreal_stations_coordinate1 = montreal_stations_coordinate[:400]
montreal_stations_coordinate2 = montreal_stations_coordinate[400:]

In [4]:
# The new Montreal station coordinates dataframes to iterate and make Yelp API call is now 'montreal_stations_coordinate1' and 'montreal_stations_coordinate2'
# Scheduled to run 2 sub-groups in two different days to bypass the Yelp API limit
print("Length of the first set of montreal bike stations coordinates :", len(montreal_stations_coordinate1))
print("Length of the first set of montreal bike stations coordinates :", len(montreal_stations_coordinate2))

Length of the first set of montreal bike stations coordinates : 400
Length of the first set of montreal bike stations coordinates : 398


In [5]:
YELP_KEY = os.getenv('yelp_key2')

In [6]:
# Due to the 5000 limit per day, let's test out a small sample of 5 coordinates to figure out the response data structure from Yelp API
montreal_stations_coordinate_s = montreal_stations_coordinate[:5]

# Create a test function to get sample businesses from Yelp API with limit 
def get_yelp_business_sample(latitude, longitude, radius, api_key, categories, limit):

    # Form the url with a limit per coordinate
    yelp_url_limit = 'https://api.yelp.com/v3/businesses/search?' + f"latitude={latitude}&longitude={longitude}" + f"&radius={radius}" + f"&categories={categories}" + f"&limit={limit}"
    
    # Create dictionary for headers, add key with our API KEY
    headers = {"Accept": "application/json",
              "Authorization": f"Bearer {api_key}"}
    
    # Get the response
    yelp_result_sample = requests.get(yelp_url_limit, headers=headers)
    
    return yelp_result_sample.json()

In [8]:
# Create an empty DataFrame to store the sample results from Yelp for bars with 1000m radius from Montreal bike stations, limit=1 for each coordinate
yelp_bars_df_s = pd.DataFrame()

for index, row in montreal_stations_coordinate_s.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    yelp_bar_data_sample = get_yelp_business_sample(latitude=latitude, longitude=longitude, radius=1000, api_key=YELP_KEY, categories="bars", limit=1)

    # Extract the 'businesses' key from the JSON response and create a DataFrame from it
    yelp_businesses_sample = yelp_bar_data_sample.get('businesses', [])
    yelp_bar_details_sample = pd.DataFrame(yelp_businesses_sample)

    # Add latitude and longitude to yelp_bar_details_sample DataFrame
    yelp_bar_details_sample['yelp_latitude'] = latitude
    yelp_bar_details_sample['yelp_longitude'] = longitude

    # Append it to 'yelp_bars_df_s'
    yelp_bars_df_s = pd.concat([yelp_bars_df_s, yelp_bar_details_sample], ignore_index=True)

In [9]:
yelp_bars_df_s.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,RO32EAy8IfnWAm04vCPmzA,le-4e-mur-montréal,Le 4e Mur,https://s3-media2.fl.yelpcdn.com/bphoto/5FUAUP...,False,https://www.yelp.com/biz/le-4e-mur-montr%C3%A9...,99,"[{'alias': 'speakeasies', 'title': 'Speakeasie...",4.5,"{'latitude': 45.5161779, 'longitude': -73.5657...",[],$$,"{'address1': '2021 Rue Saint-Denis', 'address2...",14383968947,+1 438-396-8947,145.030626
1,raDMB9FD8yVRT8qXKFMiuA,district-961-saint-laurent,District 961,https://s3-media2.fl.yelpcdn.com/bphoto/uzPrvW...,False,https://www.yelp.com/biz/district-961-saint-la...,9,"[{'alias': 'mideastern', 'title': 'Middle East...",4.0,"{'latitude': 45.51040614472581, 'longitude': -...",[],,{'address1': '1814 Boulevard de la Côte-Vertu'...,15143377961,+1 514-337-7961,441.195324
2,raDMB9FD8yVRT8qXKFMiuA,district-961-saint-laurent,District 961,https://s3-media2.fl.yelpcdn.com/bphoto/uzPrvW...,False,https://www.yelp.com/biz/district-961-saint-la...,9,"[{'alias': 'mideastern', 'title': 'Middle East...",4.0,"{'latitude': 45.51040614472581, 'longitude': -...",[],,{'address1': '1814 Boulevard de la Côte-Vertu'...,15143377961,+1 514-337-7961,602.803151


In [10]:
# The five sample coordinate API call works with the 'businesses' key
# Let's apply it to the two sets of the coordinates at 400 row with an official function

# Create a function to get businesses from Yelp API
def get_yelp_business(latitude, longitude, radius, api_key, categories):

    """
    Get business information from Yelp API based on latitude, longitude, radius, and categories.

    Parameters:
        latitude (float): The latitude coordinate of the location.
        longitude (float): The longitude coordinate of the location.
        radius (int): The search radius in meters.
        api_key (str): Your Yelp API key for authentication.
        categories (str): A comma-separated list of venue categories.
        
    Returns:
        json: JSON data containing the response data from the Yelp API.

    Example:
        response_data = get_yelp_business(latitude=40.7128, longitude=-74.0060, radius=1000, "YOUR_API_KEY", categories="bars")
        # Returns JSON data with business information based on the provided parameters.
    """

    # Form the url with a limit as Yelp API has a daily limit of 500 per day
    yelp_url = 'https://api.yelp.com/v3/businesses/search?' + f"latitude={latitude}&longitude={longitude}" + f"&radius={radius}" + f"&categories={categories}"
    
    # Create dictionary for headers, add key with our API KEY
    headers = {"Accept": "application/json",
              "Authorization": f"Bearer {api_key}"}
    
    # Get the response, check status
    yelp_result = requests.get(yelp_url, headers=headers)
    
    return yelp_result.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [11]:
# Start with the first 400 station coordinates from 'montreal_stations_coordinate1'

# Create an empty DataFrame to store the results from Yelp for bars with 1000m radius from Montreal bike stations
yelp_bars_df1 = pd.DataFrame()

for index, row in montreal_stations_coordinate1.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    yelp_bar_data1 = get_yelp_business(latitude=latitude, longitude=longitude, radius=1000, api_key=YELP_KEY, categories="bars")
    
    # Extract the 'businesses' key from the JSON response and create a DataFrame from it
    yelp_businesses1 = yelp_bar_data1.get('businesses', [])
    yelp_bar_details1 = pd.DataFrame(yelp_businesses1)

    # Add latitude and longitude to yelp_bar_details1 DataFrame
    yelp_bar_details1['yelp_latitude'] = latitude
    yelp_bar_details1['yelp_longitude'] = longitude

    # Append it to 'yelp_bars_df1'
    yelp_bars_df1 = pd.concat([yelp_bars_df1, yelp_bar_details1], ignore_index=True)

In [12]:
yelp_bars_df1.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,RO32EAy8IfnWAm04vCPmzA,le-4e-mur-montréal,Le 4e Mur,https://s3-media2.fl.yelpcdn.com/bphoto/5FUAUP...,False,https://www.yelp.com/biz/le-4e-mur-montr%C3%A9...,99,"[{'alias': 'speakeasies', 'title': 'Speakeasie...",4.5,"{'latitude': 45.5161779, 'longitude': -73.5657...",[],$$,"{'address1': '2021 Rue Saint-Denis', 'address2...",14383968947,+1 438-396-8947,145.030626
1,3jKUbhGSjFTv5jZ0wnW0xA,les-deux-gamins-montréal-3,Les Deux Gamins,https://s3-media4.fl.yelpcdn.com/bphoto/MthdGD...,False,https://www.yelp.com/biz/les-deux-gamins-montr...,595,"[{'alias': 'french', 'title': 'French'}, {'ali...",4.0,"{'latitude': 45.51545, 'longitude': -73.57095}",[restaurant_reservation],$$,"{'address1': '170 Rue Prince Arthur E', 'addre...",15142883389,+1 514-288-3389,546.848942
2,EQCd_CUFrK_gSARL7s01fw,la-distillerie-no-1-montréal-7,La Distillerie No.1,https://s3-media2.fl.yelpcdn.com/bphoto/HEHvlX...,False,https://www.yelp.com/biz/la-distillerie-no-1-m...,314,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.5,"{'latitude': 45.514822, 'longitude': -73.565401}",[],$$,"{'address1': '300 Rue Ontario E', 'address2': ...",15144482461,+1 514-448-2461,250.371652
3,XQDWifQPvL2R_c3ptJ8D2g,randolph-pub-ludique-montréal-6,Randolph Pub Ludique,https://s3-media1.fl.yelpcdn.com/bphoto/z5_6vy...,False,https://www.yelp.com/biz/randolph-pub-ludique-...,96,"[{'alias': 'pubs', 'title': 'Pubs'}, {'alias':...",4.5,"{'latitude': 45.5164298, 'longitude': -73.5660...",[],$$,"{'address1': '2041 Rue Saint-Denis', 'address2...",15144195001,+1 514-419-5001,146.938745
4,TgjgHz-QRrv60QKYZ-qmMg,les-3-brasseurs-montreal-13,Les 3 Brasseurs,https://s3-media3.fl.yelpcdn.com/bphoto/tPnOF-...,False,https://www.yelp.com/biz/les-3-brasseurs-montr...,101,"[{'alias': 'pubs', 'title': 'Pubs'}, {'alias':...",3.5,"{'latitude': 45.5149237507194, 'longitude': -7...",[],$$,"{'address1': '1658 St-Denis', 'address2': '', ...",15148451660,+1 514-845-1660,230.449057


In [13]:
# Save the dataframe 'yelp_bars_df1' as 'yelp_bars_df1.csv' to archive the first API call data for safety
yelp_bars_df1.to_csv('../data/yelp_bars_df1.csv')

In [14]:
# Due to Yelp's limit of 500 requests per day, we can get creative with another API key for the rest of Montreal station coordinates
YELP_KEY2 = os.getenv('yelp_key3')

In [16]:
# Continue with rest of station coordinates with 'montreal_stations_coordinate2' (398 rows)

# Create an empty DataFrame to store the results from Yelp for bars with 1000m radius from Montreal bike stations
yelp_bars_df2 = pd.DataFrame()

for index, row in montreal_stations_coordinate2.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    yelp_bar_data2 = get_yelp_business(latitude=latitude, longitude=longitude, radius=1000, api_key=YELP_KEY2, categories="bars")
    
    # Extract the 'businesses' key from the JSON response and create a DataFrame from it
    yelp_businesses2 = yelp_bar_data2.get('businesses', [])
    yelp_bar_details2 = pd.DataFrame(yelp_businesses2)

    # Add latitude and longitude to yelp_bar_details1 DataFrame
    yelp_bar_details2['yelp_latitude'] = latitude
    yelp_bar_details2['yelp_longitude'] = longitude

    # Append it to 'yelp_bars_df2'
    yelp_bars_df2 = pd.concat([yelp_bars_df2, yelp_bar_details2], ignore_index=True)

In [17]:
yelp_bars_df2.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,XI3iNUgChOfJk5ppjKVEQQ,barroco-montréal,Barroco,https://s3-media1.fl.yelpcdn.com/bphoto/zhCqnO...,False,https://www.yelp.com/biz/barroco-montr%C3%A9al...,297,"[{'alias': 'spanish', 'title': 'Spanish'}, {'a...",4.5,"{'latitude': 45.50138, 'longitude': -73.55592}",[],$$$,"{'address1': '312 Rue Saint-Paul O', 'address2...",15145445800,+1 514-544-5800,248.531957
1,FRuuIDOd21pTJHvEGIt1Gg,holder-montréal-2,Holder,https://s3-media2.fl.yelpcdn.com/bphoto/D3rim-...,False,https://www.yelp.com/biz/holder-montr%C3%A9al-...,369,"[{'alias': 'french', 'title': 'French'}, {'ali...",4.0,"{'latitude': 45.50047, 'longitude': -73.55734}",[],$$$,"{'address1': '407 Rue McGill', 'address2': '',...",15148490333,+1 514-849-0333,131.494516
2,t3Rr8xk_9dSdoZm8sfwCXw,pub-saint-pierre-montréal,Pub Saint Pierre,https://s3-media2.fl.yelpcdn.com/bphoto/pSyKAQ...,False,https://www.yelp.com/biz/pub-saint-pierre-mont...,100,"[{'alias': 'pubs', 'title': 'Pubs'}]",4.5,"{'latitude': 45.50129, 'longitude': -73.55669}",[],$$,"{'address1': '410 Rue Saint-Pierre', 'address2...",15149855204,+1 514-985-5204,188.127676
3,uH4-Esv5-b31q2y89AqyAw,place-carmin-montréal,Place Carmin,https://s3-media2.fl.yelpcdn.com/bphoto/nrOtTW...,False,https://www.yelp.com/biz/place-carmin-montr%C3...,29,"[{'alias': 'french', 'title': 'French'}, {'ali...",5.0,"{'latitude': 45.49820816147015, 'longitude': -...",[],,"{'address1': '740 Rue William', 'address2': ''...",15144390740,+1 514-439-0740,316.103787
4,ii9kqE9s4456Jn9asz-BPQ,bevo-bar-pizzeria-montréal,BEVO Bar + Pizzeria,https://s3-media1.fl.yelpcdn.com/bphoto/X70_yH...,False,https://www.yelp.com/biz/bevo-bar-pizzeria-mon...,388,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,"{'latitude': 45.50696, 'longitude': -73.55324}",[],$$,"{'address1': '410 Rue Saint Vincent', 'address...",15148615039,+1 514-861-5039,817.247623


In [18]:
# Save the dataframe 'yelp_bars_df2' as 'yelp_bars_df2.csv' to archive the second API call data for safety
yelp_bars_df2.to_csv('../data/yelp_bars_df2.csv')

In [20]:
# Check the shape of the two API results before merging into one dataframe
print(yelp_bars_df1.shape)
print(yelp_bars_df2.shape)

(390, 16)
(4830, 16)


In [27]:
# The columns of the two results match
yelp_bars_df1.columns == yelp_bars_df2.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [28]:
# Concatenate the two results into the 'merged_yelp_bars_df' dataframe
merged_yelp_bars_df = pd.concat([yelp_bars_df1, yelp_bars_df2], ignore_index=True)
merged_yelp_bars_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5220 entries, 0 to 5219
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             5220 non-null   object 
 1   alias          5220 non-null   object 
 2   name           5220 non-null   object 
 3   image_url      5220 non-null   object 
 4   is_closed      5220 non-null   bool   
 5   url            5220 non-null   object 
 6   review_count   5220 non-null   int64  
 7   categories     5220 non-null   object 
 8   rating         5220 non-null   float64
 9   coordinates    5220 non-null   object 
 10  transactions   5220 non-null   object 
 11  price          3798 non-null   object 
 12  location       5220 non-null   object 
 13  phone          5220 non-null   object 
 14  display_phone  5220 non-null   object 
 15  distance       5220 non-null   float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 616.9+ KB


In [29]:
# Convert into 'merged_yelp_bars_df.csv' file for archival
merged_yelp_bars_df.to_csv('../data/merged_yelp_bars_df.csv')

In [35]:
# This master Yelp dataframe provides rating for bars whereas Foursquare doesn't
# Check if bar 'id' is duplicated, as they might be good for imputation
yelp_dup = merged_yelp_bars_df[merged_yelp_bars_df[['id']].duplicated() == True].sort_values('id')
yelp_dup

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
2283,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,1214.732044
3322,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,919.364911
458,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,879.869165
3513,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,991.384712
1915,-Bus-TnZSfw20gnHHpwSlg,restaurant-el-chalateco-montréal,Restaurant El Chalateco,https://s3-media2.fl.yelpcdn.com/bphoto/t0nkra...,False,https://www.yelp.com/biz/restaurant-el-chalate...,27,"[{'alias': 'salvadoran', 'title': 'Salvadoran'...",4.0,"{'latitude': 45.53494597689491, 'longitude': -...",[],$$,"{'address1': '520 Rue Beaubien E', 'address2':...",+15142725585,+1 514-272-5585,1030.590089
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1685,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,352.247706
281,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,1143.586293
3145,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,1205.032414
5176,zySx88a8wu-mpPIkg5ijcQ,club-pelicano-montréal,Club Pelicano,https://s3-media3.fl.yelpcdn.com/bphoto/QagUDW...,False,https://www.yelp.com/biz/club-pelicano-montr%C...,26,"[{'alias': 'tapasmallplates', 'title': 'Tapas/...",4.5,"{'latitude': 45.504623, 'longitude': -73.563434}",[],,"{'address1': '1076 Rue de Bleury', 'address2':...",+15148611515,+1 514-861-1515,1002.942296


Those duplicated rows can't be eliminated yet. While the majority of columns look the same, we used coordinates of the bike stations in Montreal to look up bars nearby. We'll revisit this issue in part 3.

Put your parsed results into a DataFrame

In [65]:
merged_yelp_bars_df.columns

Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'coordinates', 'transactions', 'price',
       'location', 'phone', 'display_phone', 'distance'],
      dtype='object')

In [67]:
# Initialize lists to only select data from some meaningful columns: name, is_closed, review_count, rating, price, zip_code, distance
name = []
zip_code = []
category = []
review_count = []
rating = []
price = []

distance = []

# Extract data from 'result_df'
for index, record in merged_yelp_bars_df.iterrows():
    name.append(record['name'])
    zip_code.append(location['zip_code'])

    # Extract the category alias with "bar" from the categories alias
    categories = record['categories']
    for alias in categories:
        if 'bar' in category['alias']:
            category.append(row)
            break

    review_count.append(record['review_count'])
    rating.append(record['rating'])
    price.append(record['price'])
    location = record['location']

    distance.append(record['distance'])

# Create a DataFrame from the extracted data
yelp_bar_features_df = pd.DataFrame({
    'name': name,
    'review_count': review_count,
    'rating': rating,
    'price': price,
    'postcode': zip_code,
    'distance': distance
})

# Print the head of the DataFrame
yelp_bar_features_df.head(5)

Unnamed: 0,name,is_closed,review_count,rating,price,postcode,distance
0,Le 4e Mur,False,99,4.5,$$,H2X 3K8,145.030626
1,Les Deux Gamins,False,595,4.0,$$,H2X 1B7,546.848942
2,La Distillerie No.1,False,314,4.5,$$,H2X 1H6,250.371652
3,Randolph Pub Ludique,False,96,4.5,$$,H2X 3K8,146.938745
4,Les 3 Brasseurs,False,101,3.5,$$,H2X 3K6,230.449057


In [68]:
# Save the Yelp's Montreal bar result with desired columns to 'yelp_bar_features.csv'
yelp_bar_features_df.to_csv('../data/yelp_bar_features.csv')

# Comparing Results

**Which API provided you with more complete data? Provide an explanation.**

Comparing both the pre-processed and cleaned up data (mostly removing duplicated ids) responses from Foursquare and Yelp API, Yelp has a slightly better edge in complete data; regarding Montreal bar places within a kilometer away from the shared bike stations.

* **Foursquare**: 5907 entries with 10 columns initially received from API - 482 unique bar places returned after cleanup with more modest information about the place. But they have geographical indicators specific to the place (like geocodes, timezone) as well as connected places (chains, related places)
* **Yelp**: 5220 entries with 16 columns initially received from API - 482 unique bar places returned after cleanup with incredible indicators such as rating, review counts, prices and all.

Given the same amount of unique bars we get from the two API sources, we have the possibility to further explore whether these bars are the same. If true, there's no clear conclusion which API has a more completed data in terms of quantity.

**Get the top 10 restaurants or bars according to their rating**

In [71]:
yelp_bar_features_df.sort_values(by='rating', ascending=False).head(10)

Unnamed: 0,name,is_closed,review_count,rating,price,postcode,distance
292,Le Petit Secret,False,1,5.0,$$,H2S 1Z6,847.611221
330,Name’s On The Way,False,1,5.0,$$,H2J 2H2,17.947504
369,Bar Métro Pub,False,1,5.0,$$,H2K 1W6,766.596271
110,Caffè Roma,False,3,5.0,$,H2E 1H5,989.886164
447,Le Food Klüb,False,1,5.0,$$,H2T 1S1,1068.116543
375,Resto Cabana,False,1,5.0,$$,H7V 2V9,1057.682793
152,Place Carmin,False,29,5.0,$$,H3C 1P1,177.455686
174,Crispo's Lounge,False,1,5.0,$$,H1R,427.987992
32,Supernat,False,1,5.0,$$,H1V 1X9,526.382549
439,Karisma,False,5,5.0,$$,H2T 2N7,382.70458
