# Applied Data Science Capstone: Find the best place to open up a high-end steakhouse in Toronto

### Load Libraries and import Toronto Postal Code Data

In [2]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

res = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))

data = pd.DataFrame(df[0])

data = data.rename(columns={0:'Postal Code', 1:'Borough', 2:'Neighbourhood'})

data = data.iloc[1:]

data = data[~data['Borough'].str.contains('Not assigned')]

df2=data.groupby(['Postal Code', 'Borough']).apply(lambda group: ', '.join(group['Neighbourhood']))

df2=df2.to_frame().reset_index()
df2 = df2.rename(columns={0:'Neighborhood'})

df2.loc[df2.Neighborhood == 'Not assigned', 'Neighborhood' ] = df2.Borough

df2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Load  geospatial cooridinates for Toronto and merge with Toronto Postal Code Data

In [3]:
!wget -O to_geo_space.csv http://cocl.us/Geospatial_data

gs = pd.read_csv('to_geo_space.csv')

gs = gs.rename(columns={'Postal Code':'Postal Code'})

gs1 = pd.merge(df2, gs, on='Postal Code', how='inner')

gs1.head()

--2021-01-03 05:28:10--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.63.96.194, 169.63.96.176
Connecting to cocl.us (cocl.us)|169.63.96.194|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cocl.us/Geospatial_data [following]
--2021-01-03 05:28:10--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|169.63.96.194|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2021-01-03 05:28:11--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.29.197
Connecting to ibm.box.com (ibm.box.com)|107.152.29.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2021-01-03 05:28:11--  https://ibm.box.com/public/static/9afzr83p

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Toronto neighborhoods populations by their postal code

In [4]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

df_pop = pd.read_csv('https://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/File.cfm?T=1201&SR=1&RPP=9999&PR=0&CMA=0&CSD=0&S=22&O=A&Lang=Eng&OFT=CSV',encoding = 'unicode_escape')

df_pop = df_pop.rename(columns={'Geographic code':'Postal Code', 'Geographic name':'Postal Code2', 'Province or territory':'Province', 'Incompletely enumerated Indian reserves and Indian settlements, 2016':'Incomplete', 'Population, 2016':'Population_2016', 'Total private dwellings, 2016':'TotalPrivDwellings', 'Private dwellings occupied by usual residents, 2016':'PrivDwellingsOccupied'})
df_pop= df_pop.drop(columns=['Postal Code2', 'Province', 'Incomplete', 'TotalPrivDwellings', 'PrivDwellingsOccupied'])

df_pop = df_pop.iloc[1:]
df_pop.head()

Unnamed: 0,Postal Code,Population_2016
1,A0A,46587.0
2,A0B,19792.0
3,A0C,12587.0
4,A0E,22294.0
5,A0G,35266.0


### Merge Postal Codes with their corresponding populations

In [5]:
gs1
gs1 = pd.merge(df_pop, gs1, on='Postal Code', how='right')
gs1 = gs1.sort_values(by=['Population_2016'], ascending=False)

gs1.head()

Unnamed: 0,Postal Code,Population_2016,Borough,Neighborhood,Latitude,Longitude
22,M2N,75897.0,North York,"Willowdale, Willowdale East",43.77012,-79.408493
0,M1B,66108.0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
18,M2J,58293.0,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
100,M9V,55959.0,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
14,M1V,54680.0,Scarborough,"Milliken, Agincourt North, Steeles East, L'Amo...",43.815252,-79.284577


### Toronto Neighborhoods based on Average After Tax Income by Postal Codes

In [6]:
df_income = pd.read_csv(body)
df_income = pd.read_csv('IncomeToronto.csv',encoding = 'unicode_escape')
df_income = df_income.rename(columns={"Average After Tax Income":"AvgAfterTaxIncome"})
df_income.head()

Unnamed: 0,Postal Code,Average After Tax Income
0,M1B,30801.0
1,M1C,34837.0
2,M1E,43848.0
3,M1G,27341.0
4,M1H,


### Merge Postal Codes with average incomes

In [7]:
gs1 = pd.merge(df_income, gs1, on='Postal Code', how='right')

gs1 = gs1.replace('None', 0)

In [8]:
gs1['Average After Tax Income'] = gs1['Average After Tax Income'].astype('float64') 


In [9]:
gs1 = gs1.sort_values(by=['Average After Tax Income'], ascending=False)

gs1.to_csv('TO_Affluence.csv')

gs1.head(10)

Unnamed: 0,Postal Code,Average After Tax Income,Population_2016,Borough,Neighborhood,Latitude,Longitude
20,M2L,193454.0,11717.0,North York,"York Mills, Silver Hills",43.75749,-79.374714
48,M4T,134865.0,10463.0,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,115033.0,18241.0,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
89,M8X,97836.0,10787.0,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
44,M4N,95343.0,15330.0,Central Toronto,Lawrence Park,43.72802,-79.38879
62,M5M,85678.0,25975.0,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
38,M4G,85496.0,19076.0,East York,Leaside,43.70906,-79.363452
65,M5R,80138.0,26496.0,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
92,M9A,72156.0,35594.0,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
23,M2P,70885.0,7843.0,North York,York Mills West,43.752758,-79.400049


In [10]:
CLIENT_ID = 'Hidden' 


CLIENT_SECRET = 'Hidden' 


VERSION = '20180604'

In [11]:

import requests 
from pandas.io.json import json_normalize 

LIMIT = 200 

radius = 500 

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [12]:
Data1 = gs1
Data1.head()

Unnamed: 0,Postal Code,Average After Tax Income,Population_2016,Borough,Neighborhood,Latitude,Longitude
20,M2L,193454.0,11717.0,North York,"York Mills, Silver Hills",43.75749,-79.374714
48,M4T,134865.0,10463.0,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,115033.0,18241.0,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
89,M8X,97836.0,10787.0,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
44,M4N,95343.0,15330.0,Central Toronto,Lawrence Park,43.72802,-79.38879


In [14]:
Venues1 = getNearbyVenues(names=Data1['Neighborhood'],
                                   latitudes=Data1['Latitude'],
                                   longitudes=Data1['Longitude']
                                  )

York Mills, Silver Hills
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
The Kingsway, Montgomery Road, Old Mill North
Lawrence Park
Bedford Park, Lawrence Manor East
Leaside
The Annex, North Midtown, Yorkville
Islington Avenue, Humber Valley Village
York Mills West
Davisville North
Runnymede, Swansea
The Beaches
Brockton, Parkdale Village, Exhibition Place
Church and Wellesley
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Don Mills
Forest Hill North & West, Forest Hill Road Park
The Danforth West, Riverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
High Park, The Junction South
Guildwood, Morningside, West Hill
Central Bay Street
Humewood-Cedarvale
Berczy Park
Birch Cliff, Cliffside West
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West
Victoria Village
Bathurst Manor, Wilson Heights, Downsview North
India Bazaar, T

In [15]:
print('Unique Venue Categories:')
list(Venues1['Venue Category'].unique())

Unique Venue Categories:


['Trail',
 'Restaurant',
 'Tennis Court',
 'Playground',
 'Supermarket',
 'Liquor Store',
 'Sushi Restaurant',
 'American Restaurant',
 'Coffee Shop',
 'Pub',
 'Fried Chicken Joint',
 'Vietnamese Restaurant',
 'Pizza Place',
 'Bank',
 'Light Rail Station',
 'Sandwich Place',
 'Bagel Shop',
 'River',
 'Park',
 'Business Service',
 'Swim School',
 'Bus Line',
 'Café',
 'Indian Restaurant',
 'Italian Restaurant',
 'Thai Restaurant',
 'Juice Bar',
 'Comfort Food Restaurant',
 'Greek Restaurant',
 'Pharmacy',
 'Grocery Store',
 'Butcher',
 'Japanese Restaurant',
 'Spa',
 'Toy / Game Store',
 'Sports Bar',
 'Sporting Goods Shop',
 'Fish & Chips Shop',
 'Bike Shop',
 'Pet Store',
 'Burger Joint',
 'Smoothie Shop',
 'Shopping Mall',
 'Dessert Shop',
 'Brewery',
 'Department Store',
 'Beer Store',
 'Breakfast Spot',
 'Furniture / Home Store',
 'Mexican Restaurant',
 'BBQ Joint',
 'Donut Shop',
 'History Museum',
 'Middle Eastern Restaurant',
 'Convenience Store',
 'Food & Drink Shop',
 'Hotel',

In [61]:
restuarant_list = ['Steakhouse', 'Coffee Shop', 'Café', 'Ramen Restaurant', 'Indonesian Restaurant', 'Restaurant', 'Japanese Restaurant', 
             'Fast Food Restaurant', 'Sushi Restaurant', 'Vietnamese Restaurant', 'Pizza Place', 'Sandwich Place', 'Middle Eastern Restaurant', 
             'Burger Joint', 'American Restaurant', 'Food Court', 'Wings Joint', 'Burrito Place', 'Asian Restaurant', 'Deli / Bodega', 
             'Greek Restaurant', 'Fried Chicken Joint', 'Airport Food Court', 'Chinese Restaurant', 'Breakfast Spot', 'Mexican Restaurant',
             'Indian Restaurant', 'Latin American Restaurant', 'Bar', 'Pub', 'Italian Restaurant', 'French Restaurant', 'Ice Cream Shop', 
             'Caribbean Restaurant', 'Gastropub', 'Thai Restaurant', 'Cajun / Creole Restaurant', 'Diner', 'Dim Sum Restaurant', 'Seafood Restaurant', 
             'Food & Drink Shop', 'Noodle House', 'Food', 'Fish & Chips Shop', 'Falafel Restaurant', 'Gourmet Shop', 'Vegetarian / Vegan Restaurant', 
             'South American Restaurant', 'Korean Restaurant', 'Cuban Restaurant', 'New American Restaurant', 'Malay Restaurant', 'Mac & Cheese Joint',
             'Bistro', 'Southern / Soul Food Restaurant', 'Tapas Restaurant',  'Sports Bar', 'Polish Restaurant', 'Ethiopian Restaurant', 
             'Creperie', 'Sake Bar', 'Persian Restaurant', 'Afghan Restaurant','Mediterranean Restaurant', 'BBQ Joint', 'Jewish Restaurant', 
             'Comfort Food Restaurant',  'Hakka Restaurant', 'Food Truck', 'Taiwanese Restaurant',  'Snack Place', 'Eastern European Restaurant', 
             'Dumpling Restaurant', 'Belgian Restaurant', 'Arepa Restaurant', 'Taco Place', 'Doner Restaurant', 'Filipino Restaurant', 
             'Hotpot Restaurant', 'Poutine Place', 'Salad Place',  'Portuguese Restaurant', 'Modern European Restaurant', 'Empanada Restaurant', 
             'Irish Pub', 'Molecular Gastronomy Restaurant', 'German Restaurant', 'Brazilian Restaurant', 'Gluten-free Restaurant', 'Soup Place']

restuarant_pd = pd.DataFrame(restuarant_list)

restuarant_pd = restuarant_pd.rename(columns={0:'Venue Category'})

Newframe = pd.merge(Venues1, restuarant_pd, on='Venue Category', how='right')

Newframe.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,2,2,2,2,2,2
"Alderwood, Long Branch",5,5,5,5,5,5
"Bathurst Manor, Wilson Heights, Downsview North",12,12,12,12,12,12
Bayview Village,3,3,3,3,3,3
"Bedford Park, Lawrence Manor East",17,17,17,17,17,17
...,...,...,...,...,...,...
Westmount,5,5,5,5,5,5
"Wexford, Maryvale",3,3,3,3,3,3
"Willowdale, Willowdale East",20,20,20,20,20,20
"Willowdale, Willowdale West",2,2,2,2,2,2


In [58]:
Newframe = Newframe.dropna(axis=0, subset=['Venue'])

In [62]:
Onehot1 = pd.get_dummies(Newframe[['Venue Category']], prefix="", prefix_sep="")

Onehot1['Neighborhood'] = Newframe['Neighborhood'] 


# move neighborhood column to the first column
fixed_columns = [Onehot1.columns[-1]] + list(Onehot1.columns[:-1])
Onehot1 = Onehot1[fixed_columns]

Onehot1.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport Food Court,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bar,Belgian Restaurant,Bistro,...,Sports Bar,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,"Moore Park, Summerhill East",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Bedford Park, Lawrence Manor East",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Leaside,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Runnymede, Swansea",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [63]:
Group1 = Onehot1.groupby('Neighborhood').mean().reset_index()
Group1.shape


Group1.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport Food Court,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bar,Belgian Restaurant,Bistro,...,Sports Bar,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.058824,0.0,0.0,0.0,0.058824,0.0,0.0,0.0


### Utilize Sillhouette Score to Segment Data

In [87]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np


Groupedclustering = Group1.drop('Neighborhood', 1)

kclusters = np.arange(2,10)
results = {}
for size in kclusters:
    model = KMeans(n_clusters = size).fit(Groupedclustering)
    predictions = model.predict(Groupedclustering)
    results[size] = silhouette_score(Groupedclustering, predictions)

Sizefit = max(results, key=results.get)
Sizefit

8

In [88]:
from sklearn.cluster import KMeans

kclusters = Sizefit

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Groupedclustering)

kmeans.labels_[0:10]

array([0, 2, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32)

In [89]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_sortedvenues = pd.DataFrame(columns=columns)
neighborhoods_sortedvenues['Neighborhood'] = Group1['Neighborhood']

for ind in np.arange(Group1.shape[0]):
    neighborhoods_sortedvenues.iloc[ind, 1:] = return_most_common_venues(Group1.iloc[ind, :], num_top_venues)

neighborhoods_sortedvenues.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Breakfast Spot,Latin American Restaurant,Food,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant
1,"Alderwood, Long Branch",Pizza Place,Pub,Coffee Shop,Sandwich Place,Falafel Restaurant,Dim Sum Restaurant,Diner,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pizza Place,Sandwich Place,Diner,Middle Eastern Restaurant,Chinese Restaurant,Restaurant,Deli / Bodega,Fried Chicken Joint,Sushi Restaurant
3,Bayview Village,Chinese Restaurant,Japanese Restaurant,Café,Wings Joint,Fish & Chips Shop,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Sandwich Place,Indian Restaurant,Pizza Place,Pub,Restaurant,Café,Japanese Restaurant,Comfort Food Restaurant


In [90]:
Labeled = pd.merge(Data1,Group1, on='Neighborhood', how='right')

Labeled.shape


Labeled = Labeled.drop(columns=['Steakhouse', 'Coffee Shop', 'Café', 'Ramen Restaurant', 'Indonesian Restaurant', 'Restaurant', 'Japanese Restaurant', 
             'Fast Food Restaurant', 'Sushi Restaurant', 'Vietnamese Restaurant', 'Pizza Place', 'Sandwich Place', 'Middle Eastern Restaurant', 
             'Burger Joint', 'American Restaurant', 'Food Court', 'Wings Joint', 'Burrito Place', 'Asian Restaurant', 'Deli / Bodega', 
             'Greek Restaurant', 'Fried Chicken Joint', 'Airport Food Court', 'Chinese Restaurant', 'Breakfast Spot', 'Mexican Restaurant',
             'Indian Restaurant', 'Latin American Restaurant', 'Bar', 'Pub', 'Italian Restaurant', 'French Restaurant', 'Ice Cream Shop', 
             'Caribbean Restaurant', 'Gastropub', 'Thai Restaurant', 'Cajun / Creole Restaurant', 'Diner', 'Dim Sum Restaurant', 'Seafood Restaurant', 
             'Food & Drink Shop', 'Noodle House', 'Food', 'Fish & Chips Shop', 'Falafel Restaurant', 'Gourmet Shop', 'Vegetarian / Vegan Restaurant', 
             'South American Restaurant', 'Korean Restaurant', 'Cuban Restaurant', 'New American Restaurant', 'Malay Restaurant', 'Mac & Cheese Joint',
             'Bistro', 'Southern / Soul Food Restaurant', 'Tapas Restaurant',  'Sports Bar', 'Polish Restaurant', 'Ethiopian Restaurant', 
             'Creperie', 'Sake Bar', 'Persian Restaurant', 'Afghan Restaurant','Mediterranean Restaurant', 'BBQ Joint', 'Jewish Restaurant', 
             'Comfort Food Restaurant',  'Hakka Restaurant', 'Food Truck', 'Taiwanese Restaurant',  'Snack Place', 'Eastern European Restaurant', 
             'Dumpling Restaurant', 'Belgian Restaurant', 'Arepa Restaurant', 'Taco Place', 'Doner Restaurant', 'Filipino Restaurant', 
             'Hotpot Restaurant', 'Poutine Place', 'Salad Place',  'Portuguese Restaurant', 'Modern European Restaurant', 'Empanada Restaurant', 
             'Irish Pub', 'Molecular Gastronomy Restaurant', 'German Restaurant', 'Brazilian Restaurant', 'Gluten-free Restaurant', 'Soup Place'])
Labeled.head()

Unnamed: 0,Postal Code,Average After Tax Income,Population_2016,Borough,Neighborhood,Latitude,Longitude
0,M4T,134865.0,10463.0,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
1,M4V,115033.0,18241.0,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
2,M5M,85678.0,25975.0,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
3,M4G,85496.0,19076.0,East York,Leaside,43.70906,-79.363452
4,M5R,80138.0,26496.0,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678


In [97]:
Merged1 = Labeled

#Merged1['Cluster Labels'] = kmeans.labels_ 

Merged1 = Merged1.join(neighborhoods_sortedvenues.set_index('Neighborhood'), on='Neighborhood')
print(kmeans.labels_)

Merged1.head() 

[0 2 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 2 0 0 4 0 0 0 0 0 0 6 2
 0 0 0 2 0 0 0 6 0 7 0 0 1 0 0 2 3 0 0 0 1 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0
 2 0 0 2 0]


Unnamed: 0,Postal Code,Average After Tax Income,Population_2016,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4T,134865.0,10463.0,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,Restaurant,Wings Joint,Filipino Restaurant,Diner,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant
1,M4V,115033.0,18241.0,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Coffee Shop,Pizza Place,American Restaurant,Vietnamese Restaurant,Fried Chicken Joint,Sushi Restaurant,Pub,Restaurant,Sandwich Place,Falafel Restaurant
2,M5M,85678.0,25975.0,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975,Italian Restaurant,Coffee Shop,Sandwich Place,Indian Restaurant,Pizza Place,Pub,Restaurant,Café,Japanese Restaurant,Comfort Food Restaurant
3,M4G,85496.0,19076.0,East York,Leaside,43.70906,-79.363452,Coffee Shop,Burger Joint,Breakfast Spot,Mexican Restaurant,Sushi Restaurant,Sports Bar,Fish & Chips Shop,Restaurant,Sandwich Place,Fast Food Restaurant
4,M5R,80138.0,26496.0,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,Café,Sandwich Place,Coffee Shop,Pizza Place,Middle Eastern Restaurant,Indian Restaurant,BBQ Joint,Pub,Burger Joint,Wings Joint


In [116]:
Cluster_0_coorid = Merged1[['Latitude', 'Longitude']]
Cluster_0_coorid = list(Cluster_0_coorid.values) 
lat = []
long = []



for l in Cluster_0_coorid:
  lat.append(l[0])
  long.append(l[1])



Blatitude = sum(lat)/len(lat)
Blongitude = sum(long)/len(long)
print(Blatitude)
print(Blongitude)

43.701714456626505
-79.39324610240962


In [115]:
!pip install opencage
from opencage.geocoder import OpenCageGeocode
from pprint import pprint

pprint(results)

{2: 0.15585874455713347,
 3: 0.16969064732471367,
 4: 0.19188364869285318,
 5: 0.057621136614974665,
 6: 0.1997041382209193,
 7: 0.08293214201469745,
 8: 0.22066544188682236,
 9: 0.17386543570695392}


In [102]:
!pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 3.6 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.11.0


In [110]:
Merged1.head()

Unnamed: 0,Postal Code,Average After Tax Income,Population_2016,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4T,134865.0,10463.0,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,Restaurant,Wings Joint,Filipino Restaurant,Diner,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant
1,M4V,115033.0,18241.0,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Coffee Shop,Pizza Place,American Restaurant,Vietnamese Restaurant,Fried Chicken Joint,Sushi Restaurant,Pub,Restaurant,Sandwich Place,Falafel Restaurant
2,M5M,85678.0,25975.0,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975,Italian Restaurant,Coffee Shop,Sandwich Place,Indian Restaurant,Pizza Place,Pub,Restaurant,Café,Japanese Restaurant,Comfort Food Restaurant
3,M4G,85496.0,19076.0,East York,Leaside,43.70906,-79.363452,Coffee Shop,Burger Joint,Breakfast Spot,Mexican Restaurant,Sushi Restaurant,Sports Bar,Fish & Chips Shop,Restaurant,Sandwich Place,Fast Food Restaurant
4,M5R,80138.0,26496.0,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,Café,Sandwich Place,Coffee Shop,Pizza Place,Middle Eastern Restaurant,Indian Restaurant,BBQ Joint,Pub,Burger Joint,Wings Joint


In [126]:
# getfolium
import folium 
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[43.689574, -79.383160], zoom_start=11)    
for lat, lng, label in zip(Merged1['Latitude'], Merged1['Longitude'], Merged1['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [43.689574, -79.383160],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)  
    
map_clusters

### The best location for a steakhouse is at this location 