In [1]:
# imports
import requests
import os
import pandas as pd
import numpy as np
import pprint
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
fs_key = os.getenv('FOURSQUARE_API_KEY')

In [3]:
''' Categories
10000	Arts and Entertainment
12009	Community and Government > Education
13003	Dining and Drinking > Bar
13065	Dining and Drinking > Restaurant
16032	Landmarks and Outdoors > Park
16019	Landmarks and Outdoors > Hiking Trail
17114	Retail > Shopping Mall
18000	Sports and Recreation
19030	Travel and Transportation > Transport Hub
'''
categories = '10000,12009,13003,13065,16032,16019,17114,18000,19030'

In [4]:
def get_venues_fs(latitude, longitude, radius, api_key, categories):
    
    url = f'https://api.foursquare.com/v3/places/search?ll={latitude}%2C{longitude}&radius={radius}&categories={categories}'

    headers = {'accept': 'application/json'}
    headers['Authorization'] = api_key
        
    response = requests.get(url, headers=headers)
    response_data = response.json()
    return response_data

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [5]:
test_fs = get_venues_fs(latitude=43.685924, longitude=-79.376304, radius=1000, api_key=fs_key, categories=categories)
pprint.pp(test_fs)

{'results': [{'fsq_id': '4ae48f90f964a520ad9b21e3',
              'categories': [{'id': 16032,
                              'name': 'Park',
                              'short_name': 'Park',
                              'plural_name': 'Parks',
                              'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
                                       'suffix': '.png'}}],
              'chains': [],
              'closed_bucket': 'LikelyOpen',
              'distance': 470,
              'geocodes': {'main': {'latitude': 43.687377,
                                    'longitude': -79.370803},
                           'roof': {'latitude': 43.687377,
                                    'longitude': -79.370803}},
              'link': '/v3/places/4ae48f90f964a520ad9b21e3',
              'location': {'address': '250 Douglas Dr',
                           'country': 'CA',
                           'cross_street': 'Glenn Rd.(Summerhill Av.)',
 

Put your parsed results into a DataFrame

In [6]:
# We need the bike_stations df so that can we search for venues close to each station
bike_stations = pd.read_csv('../data/toronto_bike_stations.csv', index_col=0)

In [None]:
fs_df = pd.DataFrame()
for index, row in bike_stations.iterrows():
    fs_venues = get_venues_fs(latitude=row['latitude'], longitude=row['longitude'], radius=1000, api_key=fs_key, categories=categories)
    temp_df = pd.json_normalize(fs_venues, record_path=['results'])
    temp_df['station_id'] = row['id']
    fs_df = pd.concat([fs_df, temp_df], ignore_index=True)

In [7]:
# I created a pickle of the dataframe while working on the code so that I could restart if I made a mistake, without having to call the API again
fs_df = pd.read_pickle('../data/foursquare.pkl')

+ confirming that the dataframe was created correctly

In [8]:
print(fs_df.shape)
print(fs_df.columns)

(8640, 30)
Index(['fsq_id', 'categories', 'chains', 'closed_bucket', 'distance', 'link',
       'name', 'timezone', 'geocodes.main.latitude', 'geocodes.main.longitude',
       'geocodes.roof.latitude', 'geocodes.roof.longitude', 'location.address',
       'location.country', 'location.cross_street',
       'location.formatted_address', 'location.locality', 'location.postcode',
       'location.region', 'related_places.children',
       'geocodes.drop_off.latitude', 'geocodes.drop_off.longitude',
       'station_id', 'related_places.parent.fsq_id',
       'related_places.parent.categories', 'related_places.parent.name',
       'location.address_extended', 'location.po_box',
       'geocodes.front_door.latitude', 'geocodes.front_door.longitude'],
      dtype='object')


In [9]:
fs_df

Unnamed: 0,fsq_id,categories,chains,closed_bucket,distance,link,name,timezone,geocodes.main.latitude,geocodes.main.longitude,...,geocodes.drop_off.latitude,geocodes.drop_off.longitude,station_id,related_places.parent.fsq_id,related_places.parent.categories,related_places.parent.name,location.address_extended,location.po_box,geocodes.front_door.latitude,geocodes.front_door.longitude
0,4ae48f90f964a520ad9b21e3,"[{'id': 16032, 'name': 'Park', 'short_name': '...",[],LikelyOpen,470,/v3/places/4ae48f90f964a520ad9b21e3,Chorley Park,America/Toronto,43.687377,-79.370803,...,,,009f180cf35ae1285733d98ccf058313,,,,,,,
1,4dfe2ca5185009bba4df0608,"[{'id': 16032, 'name': 'Park', 'short_name': '...",[],LikelyOpen,905,/v3/places/4dfe2ca5185009bba4df0608,The Don Valley Brick Works Park,America/Toronto,43.684526,-79.365204,...,,,009f180cf35ae1285733d98ccf058313,,,,,,,
2,4bf031dcf831c928eab401f2,"[{'id': 16032, 'name': 'Park', 'short_name': '...",[],VeryLikelyOpen,960,/v3/places/4bf031dcf831c928eab401f2,David A. Balfour Park,America/Toronto,43.685607,-79.388253,...,,,009f180cf35ae1285733d98ccf058313,,,,,,,
3,4dfe2d6d185009bba4df0672,"[{'id': 16032, 'name': 'Park', 'short_name': '...",[],VeryLikelyOpen,906,/v3/places/4dfe2d6d185009bba4df0672,Moore Park Ravine,America/Toronto,43.693691,-79.379762,...,,,009f180cf35ae1285733d98ccf058313,,,,,,,
4,4bc0d290920eb713dda1192c,"[{'id': 16032, 'name': 'Park', 'short_name': '...",[],LikelyOpen,984,/v3/places/4bc0d290920eb713dda1192c,Rosehill Reservoir,America/Toronto,43.686260,-79.388544,...,,,009f180cf35ae1285733d98ccf058313,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8635,4ad4c061f964a520b3f720e3,"[{'id': 10037, 'name': 'Concert Hall', 'short_...",[],LikelyOpen,389,/v3/places/4ad4c061f964a520b3f720e3,Massey Hall,America/Toronto,43.654096,-79.379167,...,,,ffeaa2aaedeb18fc11f708bcd8323149,,,,at Shuter St,,,
8636,4adc9148f964a520512d21e3,"[{'id': 13145, 'name': 'Fast Food Restaurant',...",[{'id': 'ab4c58e0-d68a-012e-5619-003048cad9da'...,VeryLikelyOpen,264,/v3/places/4adc9148f964a520512d21e3,Chipotle Mexican Grill,America/Toronto,43.656829,-79.380958,...,,,ffeaa2aaedeb18fc11f708bcd8323149,4adcaacff964a520882e21e3,"[{'id': 17000, 'name': 'Retail', 'short_name':...",10 Dundas East,Unit 114,,,
8637,55a9bbf9498e00ffd7f4c71f,"[{'id': 13305, 'name': 'Burrito Restaurant', '...",[{'id': '2d108fae-d80a-482b-a105-57ab79c2581e'...,VeryLikelyOpen,410,/v3/places/55a9bbf9498e00ffd7f4c71f,Burrito Boyz,America/Toronto,43.656331,-79.378541,...,,,ffeaa2aaedeb18fc11f708bcd8323149,,,,,,,
8638,4adbbae6f964a520402a21e3,"[{'id': 10024, 'name': 'Movie Theater', 'short...",[],LikelyOpen,261,/v3/places/4adbbae6f964a520402a21e3,Cineplex Odeon Yonge & Dundas Cinemas,America/Toronto,43.656582,-79.380498,...,,,ffeaa2aaedeb18fc11f708bcd8323149,4adcaacff964a520882e21e3,"[{'id': 17000, 'name': 'Retail', 'short_name':...",10 Dundas East,Suite 402,,,


+ dropping unnecessary columns

In [10]:
columns_to_drop_fs = [
    'chains','closed_bucket','link','timezone', 'location.formatted_address', 'related_places.children', 'geocodes.drop_off.latitude', 'geocodes.drop_off.longitude',
    'geocodes.roof.latitude', 'geocodes.roof.longitude', 'location.country', 'location.cross_street',	'location.locality', 'location.region',
    'related_places.parent.fsq_id',	'related_places.parent.categories',	'related_places.parent.name', 'location.address_extended', 'location.po_box',
    'geocodes.front_door.latitude','geocodes.front_door.longitude'
]

fs_df = fs_df.drop(columns=columns_to_drop_fs)

+ this code was to parse the list of dictionaries in `categories`

In [18]:
j=0
for row in fs_df['categories']:
    for i in range(len(row)):
        fs_df.at[j,f'category{i}'] = row[i]
    j+=1

+ this extracted the names of the categories from the separated dictionaries

In [19]:
j=0
for row in fs_df.iterrows():
    fs_df.at[j,'category0_name'] = fs_df.at[j,'category0']['name']
    if type(fs_df.at[j,'category1']) != float: fs_df.at[j,'category1_name'] = fs_df.at[j,'category1']['name']
    if type(fs_df.at[j,'category2']) != float: fs_df.at[j,'category2_name'] = fs_df.at[j,'category2']['name']
    if type(fs_df.at[j,'category3']) != float: fs_df.at[j,'category3_name'] = fs_df.at[j,'category3']['name']
    if type(fs_df.at[j,'category4']) != float: fs_df.at[j,'category4_name'] = fs_df.at[j,'category4']['name']
    if type(fs_df.at[j,'category5']) != float: fs_df.at[j,'category5_name'] = fs_df.at[j,'category5']['name']
    j+=1

+ This code was to extract the category IDs. I ended up not using it for the final dataframe since it wasn't needed, but left it for posterity's sake

In [22]:
j=0
for row in fs_df.iterrows():
    fs_df.at[j,'category0_id'] = fs_df.at[j,'category0']['id']
    if type(fs_df.at[j,'category1']) != float: fs_df.at[j,'category1_id'] = fs_df.at[j,'category1']['id']
    if type(fs_df.at[j,'category2']) != float: fs_df.at[j,'category2_id'] = fs_df.at[j,'category2']['id']
    if type(fs_df.at[j,'category3']) != float: fs_df.at[j,'category3_id'] = fs_df.at[j,'category3']['id']
    if type(fs_df.at[j,'category4']) != float: fs_df.at[j,'category4_id'] = fs_df.at[j,'category4']['id']
    if type(fs_df.at[j,'category5']) != float: fs_df.at[j,'category5_id'] = fs_df.at[j,'category5']['id']
    j+=1

+ dropping the category columns that are no longer needed

In [20]:
drop_categories_fs = ['categories', 'category0', 'category1', 'category2', 'category3', 'category4', 'category5']
fs_df = fs_df.drop(columns=drop_categories_fs)

In [22]:
na_columns_fs={
    'category0_name':'N/A', 'category1_name':'N/A', 'category2_name':'N/A', 'category3_name':'N/A', 'category4_name':'N/A', 'category5_name':'N/A'
    #'category0_id':'N/A', 'category1_id':'N/A', 'category2_id':'N/A', 'category3_id':'N/A', 'category4_id':'N/A', 'category5_id':'N/A'
}
fs_df = fs_df.fillna(value=na_columns_fs)

In [23]:
to_cat_fs = list(na_columns_fs.keys())

for col in to_cat_fs:
    fs_df[col] = fs_df[col].astype('category')

In [24]:
rename_columns_fs = {
    'geocodes.main.latitude':'latitude', 'geocodes.main.longitude':'longitude','location.address':'address',
    'location.postcode':'postal_code', 'category0_name':'category0','category1_name':'category1','category2_name':'category2',
    'category3_name':'category3','category4_name':'category4','category5_name':'category5',
}

fs_df = fs_df.rename(columns=rename_columns_fs)

In [25]:
# creating a pickle to store the dataframe 
fs_df.to_pickle('../data/fs_final_data.pkl')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [26]:
yelp_key = os.getenv('YELP_API_KEY')

In [27]:
categories_yelp = 'active,arts,education,nightlife,restaurants,shoppingcenters'

In [28]:
def get_venues_yelp(latitude, longitude, radius, api_key, categories):
    
    url = f"https://api.yelp.com/v3/businesses/search?latitude={latitude}&longitude={longitude}&radius={radius}&categories={categories}&sort_by=best_match&limit=10"

    headers = {
        "accept": "application/json",
        "authorization": f"Bearer {api_key}"
    }
        
    response = requests.get(url, headers=headers)
    response_data = response.json()
    return response_data

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [29]:
test_yelp = get_venues_yelp(latitude=43.685924, longitude=-79.376304, radius=1000, api_key=yelp_key, categories=categories_yelp)
pprint.pp(test_yelp)

{'businesses': [{'id': 'GO3d6vQBfByJN93Hco4dOQ',
                 'alias': 'megumi-mazesoba-toronto',
                 'name': 'Megumi Mazesoba',
                 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/GU87KJbFHYQLvM22NbTQKw/o.jpg',
                 'is_closed': False,
                 'url': 'https://www.yelp.com/biz/megumi-mazesoba-toronto?adjust_creative=9GLA4RlrtiBodQwJet7Qrg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9GLA4RlrtiBodQwJet7Qrg',
                 'review_count': 37,
                 'categories': [{'alias': 'japanese', 'title': 'Japanese'}],
                 'rating': 4.6,
                 'coordinates': {'latitude': 43.68876, 'longitude': -79.39196},
                 'transactions': [],
                 'price': '$$',
                 'location': {'address1': '46 Saint Clair Avenue E',
                              'address2': None,
                              'address3': '',
                              'city': 'Toronto',


Put your parsed results into a DataFrame

In [None]:
yelp_df = pd.DataFrame()
for index, row in bike_stations.iterrows():
    yelp_venues = get_venues_yelp(latitude=row['latitude'], longitude=row['longitude'], radius=1000, api_key=yelp_key, categories=categories_yelp)
    temp_df = pd.json_normalize(yelp_venues, record_path=['businesses'])
    temp_df['station_id'] = row['id']
    yelp_df = pd.concat([yelp_df, temp_df], ignore_index=True)

In [30]:
yelp_df = pd.read_pickle('../data/yelp.pkl')

In [31]:
print(yelp_df.shape)
print(yelp_df.columns)

(8640, 30)
Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'transactions', 'price', 'phone',
       'display_phone', 'distance', 'business_hours', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1', 'location.address2',
       'location.address3', 'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address',
       'attributes.business_temp_closed', 'attributes.menu_url',
       'attributes.open24_hours', 'attributes.waitlist_reservation',
       'station_id'],
      dtype='object')


In [32]:
yelp_df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,...,location.city,location.zip_code,location.country,location.state,location.display_address,attributes.business_temp_closed,attributes.menu_url,attributes.open24_hours,attributes.waitlist_reservation,station_id
0,GO3d6vQBfByJN93Hco4dOQ,megumi-mazesoba-toronto,Megumi Mazesoba,https://s3-media4.fl.yelpcdn.com/bphoto/GU87KJ...,False,https://www.yelp.com/biz/megumi-mazesoba-toron...,37,"[{'alias': 'japanese', 'title': 'Japanese'}]",4.6,[],...,Toronto,M4T 1M9,CA,ON,"[46 Saint Clair Avenue E, Toronto, ON M4T 1M9,...",,,,,009f180cf35ae1285733d98ccf058313
1,uGLAEGqeJa4jtP1Io0sOrg,mineral-toronto,Mineral,https://s3-media3.fl.yelpcdn.com/bphoto/3RgKw8...,False,https://www.yelp.com/biz/mineral-toronto?adjus...,35,"[{'alias': 'noodles', 'title': 'Noodles'}, {'a...",4.5,[],...,Toronto,M4W 2K9,CA,ON,"[1027 Yonge Street, Toronto, ON M4W 2K9, Canada]",,,,,009f180cf35ae1285733d98ccf058313
2,lDpVyjENOz_qbxbRoyBi8w,black-camel-toronto-2,Black Camel,https://s3-media2.fl.yelpcdn.com/bphoto/M5V1lm...,False,https://www.yelp.com/biz/black-camel-toronto-2...,399,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",4.4,[],...,Toronto,M4W 1S9,CA,ON,"[4 Crescent Road, Toronto, ON M4W 1S9, Canada]",,,,,009f180cf35ae1285733d98ccf058313
3,RqOgBMpKYdsEOmBVmFXe3g,ryus-noodle-bar-toronto-7,RYUS Noodle Bar,https://s3-media2.fl.yelpcdn.com/bphoto/ZzsTo2...,False,https://www.yelp.com/biz/ryus-noodle-bar-toron...,411,"[{'alias': 'ramen', 'title': 'Ramen'}]",4.0,[],...,Toronto,M4K 2P7,CA,ON,"[786 Broadview Avenue, Toronto, ON M4K 2P7, Ca...",,https://ryusnoodlebar.com/#Menu,,,009f180cf35ae1285733d98ccf058313
4,v1hfGBkWhDHIdRIfMJtp5A,dolce-bakery-toronto,Dolce Bakery,https://s3-media1.fl.yelpcdn.com/bphoto/91ntLa...,False,https://www.yelp.com/biz/dolce-bakery-toronto?...,10,"[{'alias': 'bakeries', 'title': 'Bakeries'}, {...",4.3,[],...,Toronto,M4W 2E4,CA,ON,"[420 Summerhill Avenue, Toronto, ON M4W 2E4, C...",,https://www.dolcebakery.ca/menus,,,009f180cf35ae1285733d98ccf058313
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8635,6eRSdODyj9GS0w6zlhrWkg,adega-restaurante-toronto,Adega Restaurante,https://s3-media4.fl.yelpcdn.com/bphoto/U-L0Qj...,False,https://www.yelp.com/biz/adega-restaurante-tor...,170,"[{'alias': 'portuguese', 'title': 'Portuguese'...",4.1,[],...,Toronto,M5G 1H1,CA,ON,"[33 Elm St, Toronto, ON M5G 1H1, Canada]",,,,,ffeaa2aaedeb18fc11f708bcd8323149
8636,7ODXq--HE7QpzvWwgk5rMA,barberians-steak-house-toronto,Barberian's Steak House,https://s3-media1.fl.yelpcdn.com/bphoto/kWOWiD...,False,https://www.yelp.com/biz/barberians-steak-hous...,362,"[{'alias': 'steak', 'title': 'Steakhouses'}]",3.9,[],...,Toronto,M5G 1H1,CA,ON,"[7 Elm Street, Toronto, ON M5G 1H1, Canada]",,http://barberians.com/menu/,,,ffeaa2aaedeb18fc11f708bcd8323149
8637,wpQsmMvdhefqIlxvRt_Jbg,dondon-izakaya-toronto-3,DonDon Izakaya,https://s3-media2.fl.yelpcdn.com/bphoto/KrH_2X...,False,https://www.yelp.com/biz/dondon-izakaya-toront...,361,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.1,[],...,Toronto,M5G 1C3,CA,ON,"[130 Dundas Street West, Toronto, ON M5G 1C3, ...",,http://www.dondonizakaya.com/menu,,,ffeaa2aaedeb18fc11f708bcd8323149
8638,mZRKH9ngRY92bI_irrHq6w,richmond-station-toronto,Richmond Station,https://s3-media3.fl.yelpcdn.com/bphoto/agCQCt...,False,https://www.yelp.com/biz/richmond-station-toro...,962,"[{'alias': 'newcanadian', 'title': 'Canadian (...",4.4,[],...,Toronto,M5H 3W4,CA,ON,"[1 Richmond Street W, Toronto, ON M5H 3W4, Can...",,https://richmondstation.ca/menu/,,,ffeaa2aaedeb18fc11f708bcd8323149


In [33]:
columns_to_drop_yelp = [
    'alias', 'image_url', 'is_closed', 'url', 'transactions', 'phone', 'display_phone','business_hours',
    'location.address2', 'location.address3', 'location.city', 'location.country', 'location.state',
    'location.display_address','attributes.business_temp_closed', 'attributes.menu_url', 'attributes.open24_hours', 
    'attributes.waitlist_reservation'
]

yelp_df = yelp_df.drop(columns=columns_to_drop_yelp)

+ just like for FourSquare, this was to extract the category names

In [39]:
j=0
for row in yelp_df['categories']:
    for i in range(len(row)):
        yelp_df.at[j,f'category{i}'] = row[i]
    j+=1

In [40]:
j=0
for row in yelp_df.iterrows():
    yelp_df.at[j,'category0_name'] = yelp_df.at[j,'category0']['title']
    if type(yelp_df.at[j,'category1']) != float: yelp_df.at[j,'category1_name'] = yelp_df.at[j,'category1']['title']
    if type(yelp_df.at[j,'category2']) != float: yelp_df.at[j,'category2_name'] = yelp_df.at[j,'category2']['title']
    if type(yelp_df.at[j,'category3']) != float: yelp_df.at[j,'category3_name'] = yelp_df.at[j,'category3']['title']
    j+=1

In [41]:
drop_categories_yelp = ['categories', 'category0', 'category1', 'category2', 'category3']
yelp_df = yelp_df.drop(columns=drop_categories_yelp)

In [42]:
na_columns_yelp={
    'category0_name':'N/A', 'category1_name':'N/A', 'category2_name':'N/A', 'category3_name':'N/A', 'price':'N/A'
}

yelp_df = yelp_df.fillna(value=na_columns_yelp)

In [43]:
to_cat_yelp = list(na_columns_yelp.keys())

for col in to_cat_yelp:
    yelp_df[col] = yelp_df[col].astype('category')

In [44]:
rename_columns_yelp = {
    'id':'yelp_id', 'coordinates.latitude':'latitude', 'coordinates.longitude':'longitude', 'location.address1':'address',
    'location.zip_code':'postal_code','category0_name':'category0', 'category1_name':'category1', 'category2_name':'category2',
    'category3_name':'category3'
}

yelp_df = yelp_df.rename(columns=rename_columns_yelp)

In [45]:
yelp_df.to_pickle('../data/yelp_final_data.pkl')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

For the purposes of this project, the Yelp API provided more complete data. When calling the API for a list of businesses, they both provide basic business details. FourSquare extends on this by providing an exhaustive list of geolocation information, whereas Yelp provides information on ratings which is more useful to us.

If we were to call both APIs for business details then they both provide a wealth of information, though FourSquare includes social media links and quite a few more attributes. This wasn't done for this project to limit the amount of calls used.

A strange quirk of the Yelp API is that it doesn't strictly adhere to the radius provided and instead uses it as a suggestion, whereas Foursquare respected the radius value.

The categories between the APIs aren't directly comparable making it difficult to draw any conclusions based off of the amount of unique categories, though I still included the totals.

In [46]:
print('FourSquare Data')
fs_df.head()

FourSquare Data


Unnamed: 0,fsq_id,distance,name,latitude,longitude,address,postal_code,station_id,category0,category1,category2,category3,category4,category5
0,4ae48f90f964a520ad9b21e3,470,Chorley Park,43.687377,-79.370803,250 Douglas Dr,M4W 2C1,009f180cf35ae1285733d98ccf058313,Park,,,,,
1,4dfe2ca5185009bba4df0608,905,The Don Valley Brick Works Park,43.684526,-79.365204,550 Bayview Ave,M4W 3X8,009f180cf35ae1285733d98ccf058313,Park,,,,,
2,4bf031dcf831c928eab401f2,960,David A. Balfour Park,43.685607,-79.388253,200 Mount Pleasant Rd,M4T 2C4,009f180cf35ae1285733d98ccf058313,Park,Sports and Recreation,,,,
3,4dfe2d6d185009bba4df0672,906,Moore Park Ravine,43.693691,-79.379762,205 Moore Ave,M4T 2K7,009f180cf35ae1285733d98ccf058313,Park,Sports and Recreation,,,,
4,4bc0d290920eb713dda1192c,984,Rosehill Reservoir,43.68626,-79.388544,75 Rosehill Ave,,009f180cf35ae1285733d98ccf058313,Park,,,,,


In [47]:
print('Yelp Data')
yelp_df.head()

Yelp Data


Unnamed: 0,yelp_id,name,review_count,rating,price,distance,latitude,longitude,address,postal_code,station_id,category0,category1,category2,category3
0,GO3d6vQBfByJN93Hco4dOQ,Megumi Mazesoba,37,4.6,$$,1293.173257,43.68876,-79.39196,46 Saint Clair Avenue E,M4T 1M9,009f180cf35ae1285733d98ccf058313,Japanese,,,
1,uGLAEGqeJa4jtP1Io0sOrg,Mineral,35,4.5,,1402.327352,43.67763,-79.38947,1027 Yonge Street,M4W 2K9,009f180cf35ae1285733d98ccf058313,Noodles,Seafood,Asian Fusion,
2,lDpVyjENOz_qbxbRoyBi8w,Black Camel,399,4.4,$,1426.509564,43.677122,-79.38921,4 Crescent Road,M4W 1S9,009f180cf35ae1285733d98ccf058313,Sandwiches,,,
3,RqOgBMpKYdsEOmBVmFXe3g,RYUS Noodle Bar,411,4.0,$$,1701.034869,43.6771,-79.35905,786 Broadview Avenue,M4K 2P7,009f180cf35ae1285733d98ccf058313,Ramen,,,
4,v1hfGBkWhDHIdRIfMJtp5A,Dolce Bakery,10,4.3,,25.13882,43.68617,-79.37609,420 Summerhill Avenue,M4W 2E4,009f180cf35ae1285733d98ccf058313,Bakeries,Desserts,Pizza,


In [48]:
all_categories_fs = []

for column in list(fs_df.columns):
    if column.startswith('category') == True: all_categories_fs.extend(list(fs_df[f'{column}'].value_counts().index))

unique_categories_fs = set(all_categories_fs)
print('Unique FourSquare Categories:', len(unique_categories_fs))

all_categories_yelp = []

for column in list(yelp_df.columns):
    if column.startswith('category') == True: all_categories_yelp.extend(list(yelp_df[f'{column}'].value_counts().index))

unique_categories_yelp = set(all_categories_yelp)
print('Unique Yelp Categories:', len(unique_categories_yelp))

Unique FourSquare Categories: 311
Unique Yelp Categories: 237


+ This isn't directly comparable because how each defines their categories can be different, and because Yelp uses radius as a suggestion. I still wanted to see how manyt resulting categories I got for each

Get the top 10 restaurants according to their rating

In [50]:
yelp_df.sort_values(by='rating', ascending=False).head(10)

Unnamed: 0,yelp_id,name,review_count,rating,price,distance,latitude,longitude,address,postal_code,station_id,category0,category1,category2,category3
407,a3q0ulHNNP8OnOKc0mBWRA,Jong Park Tae Kwon Do,1,5.0,,32.167055,43.745538,-79.209955,3730 Kingston Road,M1J 3H3,0b46fa9ccc3ac1fa00d279abd7e73df2,Taekwondo,,,
5068,igaTtgKFrp-6X9-TBHxE_g,Pinochio,4,5.0,,1256.737836,43.733735,-79.532453,45 Abraham Welsh Road,M9M 0C5,97416e3d0063fa4952e8e667f7a6de08,Desserts,Cafes,Ice Cream & Frozen Yogurt,
4465,1kGDxmL3PxsJZgbdJWDChQ,Looking Glass Adventures,10,5.0,,1822.290348,43.690473,-79.292976,2944 Danforth Avenue,M4C 1M5,84e8e3827807916c9c3ac9b60404346d,Escape Games,,,
4419,Uf00W5Yrqn7Pq6W-IfGXQw,Bereket Kebab House,3,5.0,,251.76459,43.768871,-79.185653,245 Morningside Ave D2,M1E 3E6,843602f3de2dc3c06cfacd5bf0a1a0fe,Turkish,,,
5658,zXxpMUZdS8lhs6F4PMYHlA,Grapefully Yours,2,5.0,$,170.906339,43.737204,-79.21849,3467 Kingston Rd,M1M 1R4,a96febb839b2b9bb6b6faee220f3f8c1,"Beer, Wine & Spirits",Wineries,,
5691,9dV86LKzD6WNPTf3FDiT_A,Lume Kitchen and Lounge,8,5.0,,76.259847,43.624228,-79.484404,72 Park Lawn Road,M8Y 3H8,ab463b4b4fbf00bbfbe972989481ed7d,Lounges,Breakfast & Brunch,Cocktail Bars,
2109,trg8oGeN5dErZw9mfyzV8g,Ocak Basi,4,5.0,,756.555869,43.76614,-79.46773,4544 Dufferin Street,M3H 5X2,3b8388fb5baa35cdc62ea9a8f7bb5fd7,Turkish,Kebab,,
3598,gMS5zYPhK3DKWhzjKSu_sg,Bhuri Bhoj - Happy Tummy,1,5.0,,90.101609,43.74911,-79.54274,3390 Weston Rd,M9M 2X3,6bd88200486000a93cc1ca5b78bc5ba9,Indian,Caterers,,
7117,MhRpSowt0btjV4mUplTQ6g,Greelz,1,5.0,,669.529615,43.69024,-79.49899,1354 Weston Road,M6M 4R8,d310230182f10034968c97f0c799ee0b,Burgers,Barbeque,Fast Food,
8488,NwsTOsBOUWn-9le4rY7Q_w,Omni Noodle,2,5.0,,136.037651,43.770239,-79.499332,115 Haynes Avenue,M3J 0L8,fbc181887030746137034a35fb5f7cfd,Noodles,,,
