## Project Title: Finding Suitable Toronto Neighborhood(s) for Ethnic Food Chain Restaurant

### Upload and Read CSV file containing Postal Codes and Boroughs for Toronto Neighborhoods into a pandas Data Frame df1_Toronto

In [1]:
import pandas as pd

df1_Toronto = pd.read_csv('TorontoZIP.csv')
df1_Toronto.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Checking for Missing Data in the Data Frame and then Provide a Head of this Missing Data. We will address this missing data a little later where we drop the columns that contain NaN

In [2]:
missing_data = df1_Toronto.isnull()

In [3]:
missing_data.shape

(180, 3)

### First we Rename the Columns of the Above Data Frame ...

In [5]:
df1_Toronto.columns = ['PostalCode','Borough','Neighborhood']
df1_Toronto.dtypes

PostalCode      object
Borough         object
Neighborhood    object
dtype: object

### Then we strip the column data of any extra whitespaces in front or back ...

In [6]:

df1_Toronto['PostalCode'] = df1_Toronto['PostalCode'].str.strip()
df1_Toronto['Borough'] = df1_Toronto['Borough'].str.strip()
df1_Toronto['Neighborhood'] = df1_Toronto['Neighborhood'].str.strip()

df1_Toronto.dtypes


PostalCode      object
Borough         object
Neighborhood    object
dtype: object

In [7]:
df1_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### In the Borough column we replace NOT ASSIGNED by NaN the PYTHON default for no data ...

In [8]:
import numpy as np
df1_Toronto["Borough"].replace("Not assigned",np.nan,inplace=True)

In [8]:
df1_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Drop rows of Data Frame that have no Boroughs

In [9]:
df1_Toronto.dropna(subset=["Borough"], axis=0, inplace=True)

df1_Toronto.reset_index(drop=True, inplace=True)

In [10]:
df1_Toronto.shape

(103, 3)

### In Neighborhood Column replace forward slash with commas for more desirable format for Neighborhoods

In [11]:
df1_Toronto[["Neighborhood"]] = df1_Toronto[["Neighborhood"]].replace(to_replace=r' / ', value=', ', regex=True)

In [12]:
df1_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [13]:
xdim, ydim = df1_Toronto.shape

### Check for any NOT ASSIGNED designations in Neighborhood Column

In [14]:
for i in range(xdim):
    if df1_Toronto.iloc[i,2]=='Not assigned':
        print('There is an Unassigned Neighborhood')

### Upload and Read CSV file containing Postal Codes and Boroughs for Toronto Neighborhoods into a pandas Data Frame df2_Toronto

In [2]:
import pandas as pd
df2_Toronto = pd.read_csv('TorontoGEOSPAT.csv')
df2_Toronto.head()




Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
df2_Toronto.columns = ['PostalCode','Latitude','Longitude']
df2_Toronto.dtypes

PostalCode     object
Latitude      float64
Longitude     float64
dtype: object

In [17]:
df2_Toronto['PostalCode'] = df2_Toronto['PostalCode'].str.strip()

df2_Toronto.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### The Latitude and Longitude Data is Ready to Be Merged with the Data Frame of Postal Codes, Boroughs and Neighborhoods created in the prior part.
### First we make a copy of df1_Toronto and call it df3_Toronto so as to keep the former safe for future reference as follows:


In [18]:

df3_Toronto = df1_Toronto.copy()


In [19]:
df3_Toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Now we use the Pandas DataFrame Merge Function to Merge df3_Toronto with df2_Toronto on the PostalCode Key as below. Then we name the new DataFrame df4_Toronto. Finally we check the shape of df4_Toronto. It should be (103, 5) owing to the new columns of Latitude and Longitude added ...

In [20]:
df4_Toronto = df3_Toronto.merge(df2_Toronto, left_on='PostalCode', right_on='PostalCode')

df4_Toronto.shape

(103, 5)

### Again we make a copy of df4_Toronto and call it dfs_Toronto so we can use it to find the suitable neighborhoods where we should consider opening the new restaurant ...

In [21]:

df4_Toronto.head()

#dfs_Toronto = df4_Toronto.iloc[0:100:10,:]

dfs_Toronto = df4_Toronto.copy()

dfs_Toronto.shape

(103, 5)

### We start with importing and installing the necessary packages

In [22]:
#!pip install geopy
!pip install folium
import pandas as pd

#import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
#from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Setting Up FOURSQUARE API Location Data Credentials ...

In [23]:
CLIENT_ID = 'IFDZA4FFK2YR53ZE4AKVGTABRV0HSRQGDMCDWUQGKSF2EQU2' # your Foursquare ID
CLIENT_SECRET = 'MPYD03EF42WT2Z5RR51JCQAELZAD4TPCIX35GVK020OEOXCA' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version
LIMIT = 100
radius = 1000

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IFDZA4FFK2YR53ZE4AKVGTABRV0HSRQGDMCDWUQGKSF2EQU2
CLIENT_SECRET:MPYD03EF42WT2Z5RR51JCQAELZAD4TPCIX35GVK020OEOXCA


### Establish a Search Query to use in the FOURSQUARE API to look for restaurant venues ... 

In [24]:
search_query = 'Restaurant'
print(search_query + ' .... OK!')


Restaurant .... OK!


### Function getNearybyRestaurants that returns Data Frame with Neighborhood, Latitude, Longitude and Short Name Category for Each Neighborhood ...

In [26]:
def getNearbyRestaurants(names, latitudes, longitudes, radius):
    column_names = ['Neighborhood', 'Latitude', 'Longitude','Category'] 
    TorontoRest = pd.DataFrame(columns = column_names)
    for name, lat, lng in zip(names, latitudes, longitudes):
#        print(name,lat,lng)
#        print(name)           
# create the API request URL
        url='https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(             
        CLIENT_ID,CLIENT_SECRET,lat,lng,VERSION,search_query,radius,LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['venues']
        check = len(results)
#        print('Length of Results = ',check)
#        print(len(results))
#        print(results)
        
        # return only relevant information for each nearby venue
        if (check > 0):
            for v in results:
                check2 = len(v['categories'])
                if (check2 > 0):
#                    print('Length of vcategories = ',len(v['categories']))
#                    print(v['categories'])
                    x = v['categories'][0]['shortName']
                    print('The Short Name is: ',x)
#                x = v['categories'][0]['name']
#                print(v['categories'][0]['shortName'])
                    TorontoRest = TorontoRest.append({'Neighborhood':name,'Latitude':lat,'Longitude':lng,'Category':x},ignore_index=True)
#            venues_list.append([name,lat,lng,v['categories'][0]['shortName']])
#        venues_list.append([(name,lat,lng,v['categories'][0]['shortName']) for v in results])

#    Tor_Rest = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
#    Tor_Rest.columns = ['Neighborhood', 
#                  'Neighborhood Latitude', 
#                  'Neighborhood Longitude', 
#                  'Category']
    
    return(TorontoRest)

### Call The Function Above to Create DataFrame with Neighborhoods, Latitude, Longitude and Category of Restaurant.

### Call the returned data frame Tor_Rest

In [28]:
a = dfs_Toronto['Neighborhood']
b = dfs_Toronto['Latitude']
c = dfs_Toronto['Longitude']

Tor_Rest = getNearbyRestaurants(a,b,c,radius)

The Short Name is:  Caribbean
The Short Name is:  Restaurant
The Short Name is:  Indian
The Short Name is:  American
The Short Name is:  American
The Short Name is:  Ethiopian
The Short Name is:  Breakfast
The Short Name is:  African
The Short Name is:  Asian
The Short Name is:  Food
The Short Name is:  Restaurant
The Short Name is:  Breakfast
The Short Name is:  Diner
The Short Name is:  Restaurant
The Short Name is:  Lounge
The Short Name is:  Restaurant
The Short Name is:  Greek
The Short Name is:  Breakfast
The Short Name is:  Vietnamese
The Short Name is:  Spanish
The Short Name is:  Asian
The Short Name is:  Thai
The Short Name is:  Restaurant
The Short Name is:  Italian
The Short Name is:  Breakfast
The Short Name is:  Sandwiches
The Short Name is:  Restaurant
The Short Name is:  Diner
The Short Name is:  Gastropub
The Short Name is:  Beer Bar
The Short Name is:  Chinese
The Short Name is:  Vietnamese
The Short Name is:  Sushi
The Short Name is:  Breakfast
The Short Name is:  Am

### Finding Shape of Returned Data Frame ...

In [29]:
Tor_Rest.shape


(1365, 4)

## Some Exploratory Data Analytis ...
### 1 . Group Restaurants by Categories to Obtain a Count for Each ...

In [30]:

Tor_Rest.groupby(['Neighborhood']).count()
        

Unnamed: 0_level_0,Latitude,Longitude,Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Agincourt,20,20,20
"Alderwood, Long Branch",3,3,3
"Bathurst Manor, Wilson Heights, Downsview North",5,5,5
"Bedford Park, Lawrence Manor East",6,6,6
Berczy Park,50,50,50
"Birch Cliff, Cliffside West",3,3,3
"Brockton, Parkdale Village, Exhibition Place",13,13,13
Business reply mail Processing CentrE,2,2,2
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",4,4,4
Caledonia-Fairbanks,10,10,10


### 2. Find Unique Restaurant Venue Categories Using df.unique() function from Pandas ...

In [31]:
a = Tor_Rest['Category'].unique()
print("There are %d Unique Venue Categories" % len(a))

There are 101 Unique Venue Categories


### Analyze each Neighborhood for the Restaurant categories via One Hot Encoding. Here we use the pd.get_dummies function to convert categorical data such as restaurant category names to integer numeric data

In [32]:
# One Hot Encoding ...
Tor_Rest_onehot = pd.get_dummies(Tor_Rest[['Category']], prefix="", prefix_sep="")

# Add neighborhood column back to dataframe
Tor_Rest_onehot['Neighborhood'] = Tor_Rest['Neighborhood'] 

# Move neighborhood column to the first column
fixed_columns = [Tor_Rest_onehot.columns[-1]] + list(Tor_Rest_onehot.columns[:-1])
Tor_Rest_onehot = Tor_Rest_onehot[fixed_columns]

Tor_Rest_onehot.shape

Tor_Rest_onehot.head()

Unnamed: 0,Neighborhood,African,American,Argentinian,Asian,Bagels,Bar,Beer Bar,Bistro,Breakfast,Building,Burgers,Café,Cajun / Creole,Cantonese,Caribbean,Chinese,Cocktail,Coworking Space,Deli / Bodega,Department Store,Dim Sum,Diner,Dive Bar,Eastern European,English,Entertainment,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Grocery Store,Gym,Hakka,Halal,Hotel,Hotpot,Hunan,Ice Cream,Indian,Indian Chinese,Italian,Japanese,Jazz Club,Juice Bar,Karaoke,Korean,Latin American,Lebanese,Lounge,Mac & Cheese,Malay,Mediterranean,Mexican,Middle Eastern,Modern European,Molecular Gastronomy,Mongolian,Moroccan,Movie Theater,Music Venue,New American,Nightclub,Nightlife,Noodles,Office,Other Event,Persian,Peruvian,Pizza,Polish,Portuguese,Pub,Restaurant,Russian,Sandwiches,Scandinavian,Seafood,Shop,South American,Souvlaki,Spanish,Steakhouse,Sushi,Szechuan,Tapas,Thai,Theme Restaurant,Tibetan,Turkish,Vegetarian / Vegan,Vietnamese,Vineyard,Wine Bar,Wings
0,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Group Rows by Neighborhoods and Take Mean ... Then Reset Index ...

In [34]:
Tor_Rest_grouped = Tor_Rest_onehot.groupby('Neighborhood').mean().reset_index()
Tor_Rest_grouped.head()

Tor_Rest_grouped.shape

#Tor_Rest_grouped.head(5)

(90, 102)

### Function to Sort Restaurant Categories in Descending Order. This is purely illustrative to show the top restaurant venues for each Neighborhood ...

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Get DataFrame with top 20 Restaurant Categories in Each Neighborhood. Again illustrative in nature.

In [36]:
num_top_venues = 20

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Tor_Rest_venues_sorted = pd.DataFrame(columns=columns)
Tor_Rest_venues_sorted['Neighborhood'] = Tor_Rest_grouped['Neighborhood']

for ind in np.arange(Tor_Rest_grouped.shape[0]):
    Tor_Rest_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Tor_Rest_grouped.iloc[ind, :], num_top_venues)

Tor_Rest_venues_sorted.head()
#neighborhoods_venues_sorted.shape

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Agincourt,Chinese,Restaurant,Filipino,Malay,American,Indian,Asian,Other Event,Korean,Sandwiches,Cantonese,Furniture / Home,Wings,French,Food,Fast Food,Falafel,Gastropub,Grocery Store,German
1,"Alderwood, Long Branch",Pizza,Korean,Shop,Wings,German,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Greek,English,Grocery Store,Gym,Hakka,Halal
2,"Bathurst Manor, Wilson Heights, Downsview North",Middle Eastern,Wings,Bar,Sandwiches,Greek,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Entertainment,Gym,Hakka,Halal,Hotel
3,"Bedford Park, Lawrence Manor East",Restaurant,Italian,Filipino,Breakfast,Sushi,German,Ethiopian,Event Space,Falafel,Fast Food,Food,French,Furniture / Home,Gastropub,Grocery Store,Greek,English,Gym,Hakka,Halal
4,Berczy Park,Restaurant,Asian,Fast Food,Diner,New American,Indian,Japanese,Sandwiches,Wine Bar,Molecular Gastronomy,Food,Deli / Bodega,Lounge,Pub,Caribbean,Mongolian,Thai,American,Spanish,Sushi


### Perform K-Means Clustering on the Data Frame Above. Note this K-Means Machine Learning Algorithm is unsupervised and unlabeled AND it is applied to the Data Frame Toronto_Rest_grouped

In [37]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

Tor_Rest_grouped_clustering = Tor_Rest_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Tor_Rest_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

kmeans.labels_

array([3, 4, 4, 2, 2, 4, 4, 4, 1, 2, 2, 4, 3, 4, 4, 3, 3, 2, 4, 2, 3, 2,
       3, 2, 4, 4, 2, 4, 2, 4, 2, 4, 2, 3, 2, 4, 3, 2, 4, 2, 1, 3, 2, 3,
       1, 0, 2, 3, 4, 4, 4, 4, 2, 4, 2, 2, 4, 3, 3, 2, 2, 4, 2, 4, 2, 4,
       4, 2, 2, 4, 3, 2, 2, 4, 4, 4, 4, 0, 3, 2, 3, 2, 2, 4, 4, 4, 4, 2,
       4, 0])

### So now we have clustered our neighborhoods based on the restaurant venues around them. Next we will use Folium to create a cluster map. To this end we need some more analysis as below.

In [38]:
# add clustering labels
Tor_Rest_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Tor_Rest_merged = dfs_Toronto.copy()

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Tor_Rest_merged = Tor_Rest_merged.join(Tor_Rest_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Tor_Rest_merged.head() 



Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,,,,,,,,,,,,,,,,,,,,,
1,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Indian,Restaurant,Caribbean,Wings,German,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Greek,Entertainment,Grocery Store,Gym,Hakka,Halal,Hotel
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2.0,Restaurant,Breakfast,Diner,American,Asian,Spanish,Lounge,Greek,Gastropub,Food,Ethiopian,Sandwiches,African,Italian,Beer Bar,Thai,Vietnamese,Hunan,Argentinian,Indian
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3.0,Vietnamese,Chinese,Sushi,Wings,German,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Grocery Store,Greek,English,Gym,Hakka,Halal
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3.0,Chinese,Asian,Indian,Restaurant,American,Italian,Thai,Szechuan,New American,Caribbean,Modern European,Gastropub,Cocktail,Korean,Bistro,Breakfast,Dim Sum,Japanese,Theme Restaurant,Event Space


### K-Means Could Not Include All Neighborhoods into Clusters and hence some of the output was a NaN. Therefore we drop those rows and convert the Cluster Labels Column to Integer so that it can be mapped using Folium in the next Cell

In [39]:

Tor_Rest_merged.dropna(subset=["Cluster Labels"], axis=0, inplace=True)
Tor_Rest_merged['Cluster Labels'] = Tor_Rest_merged['Cluster Labels'].astype('int')
Tor_Rest_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Indian,Restaurant,Caribbean,Wings,German,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Greek,Entertainment,Grocery Store,Gym,Hakka,Halal,Hotel
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Restaurant,Breakfast,Diner,American,Asian,Spanish,Lounge,Greek,Gastropub,Food,Ethiopian,Sandwiches,African,Italian,Beer Bar,Thai,Vietnamese,Hunan,Argentinian,Indian
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3,Vietnamese,Chinese,Sushi,Wings,German,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Grocery Store,Greek,English,Gym,Hakka,Halal
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Chinese,Asian,Indian,Restaurant,American,Italian,Thai,Szechuan,New American,Caribbean,Modern European,Gastropub,Cocktail,Korean,Bistro,Breakfast,Dim Sum,Japanese,Theme Restaurant,Event Space
7,M3B,North York,Don Mills,43.745906,-79.352188,2,Italian,Vineyard,American,Indian,Diner,Restaurant,Gym,Gastropub,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Grocery Store,German,Greek,Entertainment,Hakka


### Next we use Folium to create a map. The map is saved to be accesed.

In [41]:
# create map

latitude = 43.6487
longitude = -79.38544

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Tor_Rest_merged['Latitude'], Tor_Rest_merged['Longitude'], Tor_Rest_merged['Neighborhood'], Tor_Rest_merged['Cluster Labels']):    
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters.save('F35.html')

### We now examine each cluster to determine the discriminating venue categories. That way we can try to understand our clusters using the categories therein

### CLUSTER - 1

In [49]:
#Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 0, Tor_Rest_merged.columns[[1] + list(range(5, Tor_Rest_merged.shape[1]))]]

Cluster1 = Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 0,:]

Cluster1

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
23,M4G,East York,Leaside,43.70906,-79.363452,0,Indian,Sushi,Wings,Greek,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,English,Gym,Hakka,Halal,Hotel
66,M2P,North York,York Mills West,43.752758,-79.400049,0,Diner,Sushi,Wings,Greek,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Entertainment,Gym,Hakka,Halal,Hotel,Hotpot
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,0,American,Sushi,Wings,Grocery Store,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Gym,Entertainment,Hakka,Halal,Hotel,Hotpot


### CLUSTER - 2

In [50]:
##Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 1, Tor_Rest_merged.columns[[1] + list(range(5, Tor_Rest_merged.shape[1]))]]

Cluster2 = Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 1,:]

Cluster2

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,1,Asian,Bar,Restaurant,Chinese,Wings,Greek,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Gym,Grocery Store,Ethiopian,Hakka,Halal,Hotel
61,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Bar,Wings,Grocery Store,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Gym,Entertainment,Hakka,Halal,Hotel,Hotpot,Hunan
87,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,1,Eastern European,Asian,Bar,Chinese,Wings,Greek,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Gym,Grocery Store,Ethiopian,Hakka,Halal,Hotel


### CLUSTER - 3

In [51]:
#Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 2, Tor_Rest_merged.columns[[1] + list(range(5, Tor_Rest_merged.shape[1]))]]

Cluster3 = Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 2,:]

Cluster3



Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Indian,Restaurant,Caribbean,Wings,German,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Greek,Entertainment,Grocery Store,Gym,Hakka,Halal,Hotel
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Restaurant,Breakfast,Diner,American,Asian,Spanish,Lounge,Greek,Gastropub,Food,Ethiopian,Sandwiches,African,Italian,Beer Bar,Thai,Vietnamese,Hunan,Argentinian,Indian
7,M3B,North York,Don Mills,43.745906,-79.352188,2,Italian,Vineyard,American,Indian,Diner,Restaurant,Gym,Gastropub,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Grocery Store,German,Greek,Entertainment,Hakka
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Restaurant,Chinese,Diner,American,New American,Indian,Asian,Caribbean,Thai,Italian,Modern European,Food,Dim Sum,Gastropub,Lounge,Mongolian,Sushi,Japanese,Vietnamese,Breakfast
13,M3C,North York,Don Mills,43.7259,-79.340923,2,Italian,Vineyard,American,Indian,Diner,Restaurant,Gym,Gastropub,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Grocery Store,German,Greek,Entertainment,Hakka
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Restaurant,Asian,Thai,American,Indian,Chinese,Japanese,Caribbean,New American,Mongolian,Diner,Lounge,Gastropub,Food,Pub,Ethiopian,African,Molecular Gastronomy,Theme Restaurant,Vietnamese
17,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,2,Restaurant,Scandinavian,Wings,English,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Grocery Store,Gym,Hakka,Halal,Hotel
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Restaurant,Asian,Fast Food,Diner,New American,Indian,Japanese,Sandwiches,Wine Bar,Molecular Gastronomy,Food,Deli / Bodega,Lounge,Pub,Caribbean,Mongolian,Thai,American,Spanish,Sushi
21,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,2,Restaurant,Latin American,Bar,Sandwiches,Spanish,Middle Eastern,Caribbean,American,Food,Filipino,French,Furniture / Home,Fast Food,Gastropub,Falafel,German,Wings,Grocery Store,Greek,Ethiopian
22,M1G,Scarborough,Woburn,43.770992,-79.216917,2,Restaurant,Chinese,Wings,Entertainment,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Grocery Store,Gym,Hakka,Halal,Hotel,Hotpot


### CLUSTER - 4

In [52]:
#Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 3, Tor_Rest_merged.columns[[1] + list(range(5, Tor_Rest_merged.shape[1]))]]

Cluster4 = Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 3,:]

Cluster4



Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3,Vietnamese,Chinese,Sushi,Wings,German,Ethiopian,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Grocery Store,Greek,English,Gym,Hakka,Halal
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Chinese,Asian,Indian,Restaurant,American,Italian,Thai,Szechuan,New American,Caribbean,Modern European,Gastropub,Cocktail,Korean,Bistro,Breakfast,Dim Sum,Japanese,Theme Restaurant,Event Space
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,3,American,Indian,Burgers,Chinese,Wings,Greek,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Gym,Grocery Store,Ethiopian,Hakka,Halal,Hotel
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,3,Chinese,Fast Food,Wings,Greek,Event Space,Falafel,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Entertainment,Gym,Hakka,Halal,Hotel,Hotpot,Hunan
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,3,Chinese,Restaurant,Dim Sum,Indian,Korean,American,New American,Asian,Italian,Thai,Szechuan,Caribbean,Breakfast,Cantonese,Modern European,Gastropub,Noodles,Japanese,Theme Restaurant,Vietnamese
27,M2H,North York,Hillcrest Village,43.803762,-79.363452,3,Japanese,Sandwiches,Chinese,Entertainment,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Grocery Store,Gym,Hakka,Halal,Hotel,Hotpot
29,M4H,East York,Thorncliffe Park,43.705369,-79.349372,3,Chinese,African,Indian,Turkish,Caribbean,Shop,Gastropub,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Greek,German,Entertainment,Grocery Store,Gym,Hakka
51,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476,3,Chinese,Wings,Greek,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Entertainment,Gym,Hakka,Halal,Hotel,Hotpot,Hunan
56,M6M,York,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",43.691116,-79.476013,3,Chinese,American,Portuguese,Sushi,Wings,German,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,Grocery Store,Greek,Entertainment,Gym,Hakka,Halal
65,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town...",43.75741,-79.273304,3,Chinese,American,Indian,Filipino,Middle Eastern,Wings,Greek,Falafel,Fast Food,Food,French,Furniture / Home,Gastropub,German,Gym,Grocery Store,Ethiopian,Hakka,Halal,Hotel


### CLUSTER - 5

In [55]:
#Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 4, Tor_Rest_merged.columns[[1] + list(range(5, Tor_Rest_merged.shape[1]))]]
Cluster5 = Tor_Rest_merged.loc[Tor_Rest_merged['Cluster Labels'] == 4,:]

Cluster5


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
10,M6B,North York,Glencairn,43.709577,-79.445073,4,Pizza,Asian,Japanese,Portuguese,Grocery Store,Argentinian,Indian,Ice Cream,Indian Chinese,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Entertainment
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,4,Breakfast,Wings,Grocery Store,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Gym,Entertainment,Hakka,Halal,Hotel,Hotpot,Hunan
14,M4C,East York,Woodbine Heights,43.695344,-79.318389,4,Greek,Ethiopian,Thai,Wings,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,English,Gym,Hakka,Halal,Hotel,Hotpot
16,M6C,York,Humewood-Cedarvale,43.693781,-79.428191,4,Caribbean,Japanese,Breakfast,Noodles,Chinese,Mexican,Mac & Cheese,Middle Eastern,Halal,Hotel,Indian Chinese,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Greek,Grocery Store
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Japanese,Chinese,Italian,Asian,Caribbean,Greek,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Ethiopian,Gym,Hakka,Halal,Hotel
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,4,Korean,Ethiopian,Restaurant,Middle Eastern,African,South American,Japanese,Event Space,Nightclub,Eastern European,Portuguese,Pub,Russian,Indian,Bar,Vegetarian / Vegan,Cajun / Creole,Greek,Filipino,Food
26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,4,Italian,German,Asian,Fast Food,Restaurant,Thai,Hakka,Ethiopian,Event Space,Falafel,Filipino,Food,French,Furniture / Home,Gastropub,Grocery Store,Greek,English,Gym,Halal
28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,4,Middle Eastern,Wings,Bar,Sandwiches,Greek,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Entertainment,Gym,Hakka,Halal,Hotel
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,4,Portuguese,Middle Eastern,Caribbean,Vietnamese,Diner,Fast Food,Sandwiches,Mediterranean,Event Space,Falafel,Filipino,Food,French,Furniture / Home,Gastropub,Wings,German,Entertainment,Greek,Grocery Store
32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,4,Ice Cream,Japanese,Caribbean,Indian,Restaurant,Breakfast,Event Space,Falafel,Fast Food,Filipino,Food,French,Furniture / Home,Gastropub,German,Grocery Store,Greek,Entertainment,Gym,Hakka
