# Getting the Canadian cities food venues data

By Francisco Tosetto da Silva

### Importing necessary libraries.

In [98]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geocoder
import requests
import pandas as pd
import numpy as np
import folium
import random
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import Image 
from IPython.core.display import HTML 
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

### First we set up the cities that we want to collect data from and find their location (latitude and longitude) utilizing the Geopy library.

In [99]:
addresses = ['Toronto, Canada', 'Montreal, Canada', 'Calgary, Canada', 'Ottawa, Canada', 'Edmonton, Canada',
             'Winnipeg, Canada', 'Vancouver, Canada', 'Quebec City, Canada', 'Halifax, Canada', 'London, Canada']
lat = []
lng = []
geolocator = Nominatim(user_agent = "my-application", timeout = 5)
for i in addresses:
    location = geolocator.geocode(i)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinates of {} are {}, {}.'.format(i, latitude, longitude))
    lat.append(latitude)
    lng.append(longitude)

The geograpical coordinates of Toronto, Canada are 43.653963, -79.387207.
The geograpical coordinates of Montreal, Canada are 45.4972159, -73.6103642.
The geograpical coordinates of Calgary, Canada are 51.02532675, -114.049868485806.
The geograpical coordinates of Ottawa, Canada are 45.421106, -75.690308.
The geograpical coordinates of Edmonton, Canada are 53.535411, -113.507996.
The geograpical coordinates of Winnipeg, Canada are 49.884017, -97.168579.
The geograpical coordinates of Vancouver, Canada are 49.2608724, -123.1139529.
The geograpical coordinates of Quebec City, Canada are 46.8259601, -71.2352226.
The geograpical coordinates of Halifax, Canada are 44.6486237, -63.5859487.
The geograpical coordinates of London, Canada are 42.988576, -81.246643.


In [4]:
print(lat)
print(lng)

[43.653963, 45.4972159, 51.02532675, 45.421106, 53.535411, 49.884017, 49.2608724, 46.8259601, 44.6486237, 42.988576]
[-79.387207, -73.6103642, -114.049868485806, -75.690308, -113.507996, -97.168579, -123.1139529, -71.2352226, -63.5859487, -81.246643]


In [5]:
city = ['Toronto', 'Montreal', 'Calgary', 'Ottawa', 'Edmonton',
             'Winnipeg', 'Vancouver', 'Quebec City', 'Halifax', 'London']
country = ['Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada']

### Now we have a dataframe with the cities locations

In [6]:
d = {'City': city, 'Country': country, 'Latitude': lat, 'Longitude': lng}
df = pd.DataFrame(data=d)
df

Unnamed: 0,City,Country,Latitude,Longitude
0,Toronto,Canada,43.653963,-79.387207
1,Montreal,Canada,45.497216,-73.610364
2,Calgary,Canada,51.025327,-114.049868
3,Ottawa,Canada,45.421106,-75.690308
4,Edmonton,Canada,53.535411,-113.507996
5,Winnipeg,Canada,49.884017,-97.168579
6,Vancouver,Canada,49.260872,-123.113953
7,Quebec City,Canada,46.82596,-71.235223
8,Halifax,Canada,44.648624,-63.585949
9,London,Canada,42.988576,-81.246643


In [7]:
df.to_csv('10_canadian_cities.csv')

In [36]:
CLIENT_ID = 'YW1G4YM4YVJ0BV3J0D2P1BXGESTULQYIWET3D1I5Z31Q4EA1'
CLIENT_SECRET = 'OI033BPVLR2ZEGO4MANA3DYGOM451QDTKNKZFIPLJZV1VZR5'
VERSION = '20180605'
LIMIT = 100
radius = 5000

### Now we set up the requests from the Free Foursquare API to collect the venues data.

In [37]:
LIMIT = 100

radius = 10000 
section = 'food'

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&section={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius,
    section,
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=YW1G4YM4YVJ0BV3J0D2P1BXGESTULQYIWET3D1I5Z31Q4EA1&client_secret=OI033BPVLR2ZEGO4MANA3DYGOM451QDTKNKZFIPLJZV1VZR5&v=20180605&ll=42.988576,-81.246643&radius=10000&section=food&limit=100'

In [38]:
results = requests.get(url).json()

In [39]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [40]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Works,Restaurant,42.98228,-81.249781
1,Prince Albert's Diner,Diner,42.989256,-81.250884
2,Edgar + Joe's Cafe,Café,42.979983,-81.243563
3,The Early Bird,Diner,42.981468,-81.251113
4,Black Walnut Bakery Cafe,Café,42.99467,-81.252631


In [41]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [43]:
def getNearbyVenues(names, latitudes, longitudes, radius=2500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&section=food&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City',
                             'City Latitude', 
                             'City Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude',
                             'Venue Category']
    
    return(nearby_venues)

### Here we got the nearby venues on the cities locations.

In [44]:
cities_restaurants = getNearbyVenues(names = df['City'],
                                   latitudes = df['Latitude'],
                                   longitudes = df['Longitude'])

In [45]:
print(cities_restaurants.shape)
cities_restaurants.head()

(979, 7)


Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Toronto,43.653963,-79.387207,Japango,43.655268,-79.385165,Sushi Restaurant
1,Toronto,43.653963,-79.387207,Sansotei Ramen 三草亭,43.655157,-79.386501,Ramen Restaurant
2,Toronto,43.653963,-79.387207,Manpuku まんぷく,43.653612,-79.390613,Japanese Restaurant
3,Toronto,43.653963,-79.387207,Cafe Plenty,43.654571,-79.38945,Café
4,Toronto,43.653963,-79.387207,Eggspectation Bell Trinity Square,43.653144,-79.38198,Breakfast Spot


In [46]:
cities_restaurants = pd.read_csv('Canadian_10_cities_restaurants.csv')

In [47]:
cities_restaurants.drop('Unnamed: 0', axis = 1, inplace = True)
cities_restaurants.head()

Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Toronto,43.653963,-79.387207,Japango,43.655268,-79.385165,Sushi Restaurant
1,Toronto,43.653963,-79.387207,Sansotei Ramen 三草亭,43.655157,-79.386501,Ramen Restaurant
2,Toronto,43.653963,-79.387207,Manpuku まんぷく,43.653612,-79.390613,Japanese Restaurant
3,Toronto,43.653963,-79.387207,Cafe Plenty,43.654571,-79.38945,Café
4,Toronto,43.653963,-79.387207,Eggspectation Bell Trinity Square,43.653144,-79.38198,Breakfast Spot


### Here we used the venues locations to expand our search and results.

In [48]:
cities_restaurants_expanded = getNearbyVenues(names = cities_restaurants['City'],
                                   latitudes = cities_restaurants['Venue Latitude'],
                                   longitudes = cities_restaurants['Venue Longitude'])

In [186]:
cities_restaurants_expanded = pd.read_csv('Canadian10expanded.csv')

In [187]:
cities_restaurants_expanded.drop('Unnamed: 0', axis = 1, inplace = True)
cities_restaurants_expanded.head()

Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Toronto,43.655268,-79.385165,The Queen and Beaver Public House,43.657472,-79.383524,Gastropub
1,Toronto,43.655268,-79.385165,Japango,43.655268,-79.385165,Sushi Restaurant
2,Toronto,43.655268,-79.385165,Eggspectation Bell Trinity Square,43.653144,-79.38198,Breakfast Spot
3,Toronto,43.655268,-79.385165,Sansotei Ramen 三草亭,43.655157,-79.386501,Ramen Restaurant
4,Toronto,43.655268,-79.385165,Banh Mi Boys,43.659188,-79.382131,Sandwich Place


In [188]:
cities_restaurants_expanded.drop_duplicates(subset = ['Venue'], keep = 'first', inplace = True)

### Now we check there is no duplicates after deleting them.

In [189]:
cities_restaurants_expanded['Venue'].value_counts()

Chop Steakhouse & Bar                 1
Fable Diner                           1
Provisions                            1
Che Restobar                          1
Montana's                             1
MexiCactus                            1
Nguyên Phi                            1
Sidewalk Citizen Bakery               1
White Star Diner                      1
Double Greeting Won Ton House         1
Elena                                 1
Buca                                  1
Cibo Wine Bar King Street             1
Tacofino                              1
Wawel Bakery                          1
Le Hobbit                             1
Monsieur Restaurant + Bar             1
Noodle Express                        1
The Mackenzie Room                    1
Zipang Provisions                     1
Pumpernickel's Deli                   1
Jackie Parker Room                    1
Little Caesars Pizza                  1
Prime Time Restaurant                 1
Café Shaika                           1


In [190]:
cities_restaurants_expanded.shape

(2581, 7)

### Then we check the number of different food venues categories and check how many times they appear.

In [191]:
print('There are {} uniques categories.'.format(len(cities_restaurants_expanded['Venue Category'].unique())))

There are 104 uniques categories.


In [192]:
# one hot encoding
restaurants_onehot = pd.get_dummies(cities_restaurants_expanded[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
restaurants_onehot['City'] = cities_restaurants_expanded['City'] 

# move neighborhood column to the first column
fixed_columns = [restaurants_onehot.columns[-1]] + list(restaurants_onehot.columns[:-1])
restaurants_onehot = restaurants_onehot[fixed_columns]

restaurants_onehot.head()

Unnamed: 0,City,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [193]:
restaurants_onehot.shape

(2581, 105)

### Now we remove the categories that does not have an specific origins, or are generic titles.

In [194]:
restaurants_onehot.drop(['Bakery', 'Restaurant', 'Café', 'Breakfast Spot', 'Deli / Bodega',
                         'Vegetarian / Vegan Restaurant', 'Sandwich Place'], axis = 1, inplace = True)

### Here is the number of venues from each city.

In [195]:
restaurants_onehot['City'].value_counts()

Montreal       446
Vancouver      329
Calgary        282
Toronto        268
Winnipeg       258
Edmonton       250
Quebec City    224
Ottawa         188
Halifax        175
London         161
Name: City, dtype: int64

### Now we calculate the percentage of each type of restaurant in the cities.

In [196]:
restaurants_mean = restaurants_onehot.groupby('City').mean().reset_index()
restaurants_mean

Unnamed: 0,City,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Belgian Restaurant,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vietnamese Restaurant,Wings Joint
0,Calgary,0.0,0.003546,0.042553,0.0,0.003546,0.024823,0.010638,0.003546,0.0,...,0.039007,0.003546,0.0,0.003546,0.003546,0.010638,0.0,0.0,0.060284,0.0
1,Edmonton,0.0,0.0,0.02,0.0,0.0,0.032,0.024,0.0,0.0,...,0.016,0.0,0.0,0.0,0.008,0.036,0.004,0.004,0.044,0.0
2,Halifax,0.0,0.0,0.022857,0.0,0.0,0.0,0.005714,0.005714,0.0,...,0.051429,0.0,0.0,0.005714,0.0,0.011429,0.011429,0.0,0.017143,0.005714
3,London,0.0,0.006211,0.037267,0.0,0.0,0.031056,0.018634,0.0,0.0,...,0.055901,0.0,0.0,0.0,0.0,0.031056,0.0,0.0,0.024845,0.0
4,Montreal,0.0,0.002242,0.004484,0.0,0.002242,0.015695,0.013453,0.011211,0.0,...,0.022422,0.0,0.004484,0.002242,0.004484,0.011211,0.0,0.0,0.026906,0.002242
5,Ottawa,0.0,0.0,0.015957,0.0,0.0,0.015957,0.021277,0.010638,0.005319,...,0.031915,0.0,0.0,0.005319,0.037234,0.010638,0.0,0.0,0.069149,0.0
6,Quebec City,0.004464,0.0,0.004464,0.0,0.0,0.022321,0.013393,0.004464,0.0,...,0.022321,0.004464,0.0,0.0,0.0,0.022321,0.0,0.0,0.008929,0.0
7,Toronto,0.003731,0.0,0.026119,0.003731,0.0,0.018657,0.007463,0.003731,0.0,...,0.029851,0.0,0.0,0.011194,0.011194,0.022388,0.0,0.0,0.007463,0.0
8,Vancouver,0.0,0.0,0.006079,0.0,0.0,0.012158,0.009119,0.009119,0.006079,...,0.06079,0.0,0.0,0.018237,0.009119,0.012158,0.0,0.0,0.036474,0.0
9,Winnipeg,0.0,0.0,0.042636,0.0,0.003876,0.062016,0.007752,0.0,0.0,...,0.065891,0.0,0.0,0.0,0.007752,0.015504,0.0,0.0,0.031008,0.0


In [197]:
restaurants_mean.to_csv('CAN_All_Rests.csv')

In [198]:
restaurants_mean.sort_values(by = 'City', ascending=False)

Unnamed: 0,City,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Belgian Restaurant,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vietnamese Restaurant,Wings Joint
9,Winnipeg,0.0,0.0,0.042636,0.0,0.003876,0.062016,0.007752,0.0,0.0,...,0.065891,0.0,0.0,0.0,0.007752,0.015504,0.0,0.0,0.031008,0.0
8,Vancouver,0.0,0.0,0.006079,0.0,0.0,0.012158,0.009119,0.009119,0.006079,...,0.06079,0.0,0.0,0.018237,0.009119,0.012158,0.0,0.0,0.036474,0.0
7,Toronto,0.003731,0.0,0.026119,0.003731,0.0,0.018657,0.007463,0.003731,0.0,...,0.029851,0.0,0.0,0.011194,0.011194,0.022388,0.0,0.0,0.007463,0.0
6,Quebec City,0.004464,0.0,0.004464,0.0,0.0,0.022321,0.013393,0.004464,0.0,...,0.022321,0.004464,0.0,0.0,0.0,0.022321,0.0,0.0,0.008929,0.0
5,Ottawa,0.0,0.0,0.015957,0.0,0.0,0.015957,0.021277,0.010638,0.005319,...,0.031915,0.0,0.0,0.005319,0.037234,0.010638,0.0,0.0,0.069149,0.0
4,Montreal,0.0,0.002242,0.004484,0.0,0.002242,0.015695,0.013453,0.011211,0.0,...,0.022422,0.0,0.004484,0.002242,0.004484,0.011211,0.0,0.0,0.026906,0.002242
3,London,0.0,0.006211,0.037267,0.0,0.0,0.031056,0.018634,0.0,0.0,...,0.055901,0.0,0.0,0.0,0.0,0.031056,0.0,0.0,0.024845,0.0
2,Halifax,0.0,0.0,0.022857,0.0,0.0,0.0,0.005714,0.005714,0.0,...,0.051429,0.0,0.0,0.005714,0.0,0.011429,0.011429,0.0,0.017143,0.005714
1,Edmonton,0.0,0.0,0.02,0.0,0.0,0.032,0.024,0.0,0.0,...,0.016,0.0,0.0,0.0,0.008,0.036,0.004,0.004,0.044,0.0
0,Calgary,0.0,0.003546,0.042553,0.0,0.003546,0.024823,0.010638,0.003546,0.0,...,0.039007,0.003546,0.0,0.003546,0.003546,0.010638,0.0,0.0,0.060284,0.0


In [199]:
restaurants_mean.to_csv('CaML10.csv')

In [200]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Now we can start to build our dataframe with the most frequent food venues in each city.

In [201]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Food Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Food Venue'.format(ind+1))

# create a new dataframe
cities_venues_sorted = pd.DataFrame(columns=columns)
cities_venues_sorted['City'] = restaurants_mean['City']

for ind in np.arange(restaurants_mean.shape[0]):
    cities_venues_sorted.iloc[ind, 1:] = return_most_common_venues(restaurants_mean.iloc[ind, :], num_top_venues)
   

cities_venues_sorted

Unnamed: 0,City,1st Most Common Food Venue,2nd Most Common Food Venue,3rd Most Common Food Venue,4th Most Common Food Venue,5th Most Common Food Venue,6th Most Common Food Venue,7th Most Common Food Venue,8th Most Common Food Venue,9th Most Common Food Venue,10th Most Common Food Venue
0,Calgary,Vietnamese Restaurant,Pizza Place,Italian Restaurant,American Restaurant,Sushi Restaurant,Chinese Restaurant,Mexican Restaurant,Steakhouse,Diner,Burger Joint
1,Edmonton,Chinese Restaurant,Italian Restaurant,Vietnamese Restaurant,Pizza Place,Mexican Restaurant,Thai Restaurant,Gastropub,Asian Restaurant,BBQ Joint,Korean Restaurant
2,Halifax,Pizza Place,Seafood Restaurant,Italian Restaurant,Sushi Restaurant,Chinese Restaurant,Japanese Restaurant,Burger Joint,Steakhouse,Gastropub,Diner
3,London,Pizza Place,Sushi Restaurant,Middle Eastern Restaurant,Italian Restaurant,Mexican Restaurant,American Restaurant,Indian Restaurant,Chinese Restaurant,Thai Restaurant,Asian Restaurant
4,Montreal,French Restaurant,Pizza Place,Japanese Restaurant,Vietnamese Restaurant,Indian Restaurant,Middle Eastern Restaurant,Fast Food Restaurant,Italian Restaurant,Sushi Restaurant,Chinese Restaurant
5,Ottawa,Vietnamese Restaurant,Pizza Place,Indian Restaurant,Tapas Restaurant,Italian Restaurant,Sushi Restaurant,New American Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Chinese Restaurant
6,Quebec City,French Restaurant,Pizza Place,Italian Restaurant,Gastropub,Fast Food Restaurant,Bistro,Asian Restaurant,Thai Restaurant,Sushi Restaurant,Steakhouse
7,Toronto,Italian Restaurant,Japanese Restaurant,Gastropub,Sushi Restaurant,French Restaurant,Pizza Place,American Restaurant,Mexican Restaurant,Steakhouse,Diner
8,Vancouver,Japanese Restaurant,Sushi Restaurant,Pizza Place,Seafood Restaurant,Chinese Restaurant,Vietnamese Restaurant,Indian Restaurant,Italian Restaurant,French Restaurant,Gastropub
9,Winnipeg,Sushi Restaurant,Asian Restaurant,Pizza Place,Burger Joint,Chinese Restaurant,American Restaurant,Italian Restaurant,Vietnamese Restaurant,French Restaurant,Greek Restaurant


In [202]:
city = cities_venues_sorted['City'].tolist()
st = cities_venues_sorted['1st Most Common Food Venue'].tolist()
nd = cities_venues_sorted['2nd Most Common Food Venue'].tolist()
rd = cities_venues_sorted['3rd Most Common Food Venue'].tolist()
th4 = cities_venues_sorted['4th Most Common Food Venue'].tolist()
th5 = cities_venues_sorted['5th Most Common Food Venue'].tolist()
th6 = cities_venues_sorted['6th Most Common Food Venue'].tolist()
th7 = cities_venues_sorted['7th Most Common Food Venue'].tolist()
th8 = cities_venues_sorted['8th Most Common Food Venue'].tolist()
th9 = cities_venues_sorted['9th Most Common Food Venue'].tolist()
th10 = cities_venues_sorted['10th Most Common Food Venue'].tolist()
country = ['Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada', 'Canada']

In [203]:
data_c = {'City': city, '1st Most Common Food Venue': st,
          '2nd Most Common Food Venue': nd, '3rd Most Common Food Venue': rd,
          '4th Most Common Food Venue': th4, '5th Most Common Food Venue': th5,
          '6th Most Common Food Venue': th6,
          '7th Most Common Food Venue': th7,
          '8th Most Common Food Venue': th8,
          '9th Most Common Food Venue': th9,
          '10th Most Common Food Venue': th10}
new_df = pd.DataFrame(data = data_c)
new_df.sort_values(by = ['City'], inplace = True)
new_df.set_index('City', inplace = True)

## Now we have the dataframe with the most frequent food venue categories organized, with the cities as index and ordered by country.

In [204]:
new_df.to_csv('Canada10MostTable.csv')

In [205]:
new_df

Unnamed: 0_level_0,1st Most Common Food Venue,2nd Most Common Food Venue,3rd Most Common Food Venue,4th Most Common Food Venue,5th Most Common Food Venue,6th Most Common Food Venue,7th Most Common Food Venue,8th Most Common Food Venue,9th Most Common Food Venue,10th Most Common Food Venue
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Calgary,Vietnamese Restaurant,Pizza Place,Italian Restaurant,American Restaurant,Sushi Restaurant,Chinese Restaurant,Mexican Restaurant,Steakhouse,Diner,Burger Joint
Edmonton,Chinese Restaurant,Italian Restaurant,Vietnamese Restaurant,Pizza Place,Mexican Restaurant,Thai Restaurant,Gastropub,Asian Restaurant,BBQ Joint,Korean Restaurant
Halifax,Pizza Place,Seafood Restaurant,Italian Restaurant,Sushi Restaurant,Chinese Restaurant,Japanese Restaurant,Burger Joint,Steakhouse,Gastropub,Diner
London,Pizza Place,Sushi Restaurant,Middle Eastern Restaurant,Italian Restaurant,Mexican Restaurant,American Restaurant,Indian Restaurant,Chinese Restaurant,Thai Restaurant,Asian Restaurant
Montreal,French Restaurant,Pizza Place,Japanese Restaurant,Vietnamese Restaurant,Indian Restaurant,Middle Eastern Restaurant,Fast Food Restaurant,Italian Restaurant,Sushi Restaurant,Chinese Restaurant
Ottawa,Vietnamese Restaurant,Pizza Place,Indian Restaurant,Tapas Restaurant,Italian Restaurant,Sushi Restaurant,New American Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Chinese Restaurant
Quebec City,French Restaurant,Pizza Place,Italian Restaurant,Gastropub,Fast Food Restaurant,Bistro,Asian Restaurant,Thai Restaurant,Sushi Restaurant,Steakhouse
Toronto,Italian Restaurant,Japanese Restaurant,Gastropub,Sushi Restaurant,French Restaurant,Pizza Place,American Restaurant,Mexican Restaurant,Steakhouse,Diner
Vancouver,Japanese Restaurant,Sushi Restaurant,Pizza Place,Seafood Restaurant,Chinese Restaurant,Vietnamese Restaurant,Indian Restaurant,Italian Restaurant,French Restaurant,Gastropub
Winnipeg,Sushi Restaurant,Asian Restaurant,Pizza Place,Burger Joint,Chinese Restaurant,American Restaurant,Italian Restaurant,Vietnamese Restaurant,French Restaurant,Greek Restaurant


### Now we save it to a file to be used in the other notebooks.

In [206]:
new_df.to_csv('ML10CanadaMostFoodVenues.csv')

Notebook by Francisco Tosetto da Silva, made for the Capstone Project of the IBM Data Science Professional Certificate.