# Hypothesis implementation 

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

import json # library to handle JSON files

import requests # library to handle requests

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors # Matplotlib and associated plotting modules

from sklearn.cluster import KMeans # import k-means from clustering stage

from bs4 import BeautifulSoup # website scraping libraries and packages in Python from BeautifulSoup 

!pip install geopy
from geopy.geocoders import Nominatim  # convert an address into latitude and longitude values

print("Libraries imported.")

Libraries imported.


In [2]:
# Scrapping the data using wikipedia
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
data = BeautifulSoup(url, 'html.parser')

In [3]:
# making dataframe to create the dataset
table_contents=[]
table=data.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)
df=pd.DataFrame(table_contents)

In [5]:
# Processed_dataset
df.head(5)

Unnamed: 0,Borough,Neighborhood,PostalCode
0,North York,Parkwoods,M3A
1,North York,Victoria Village,M4A
2,Downtown Toronto,"Regent Park, Harbourfront",M5A
3,North York,"Lawrence Manor, Lawrence Heights",M6A
4,Queen's Park,Ontario Provincial Government,M7A


In [6]:
# getting the co-ordinates by using CSV file provided in this course
coordinates = pd.read_csv('Geospatial_Coordinates.csv')
coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
coordinates.head(3)

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711


In [7]:
# merging the co-ordinates to the original dataset
df = df.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
df = df.merge(coordinates, on="PostalCode", how="left")
df.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [9]:
# checking a specific coordinates
# Check coordinates for a couple of neighborhoods
df[(df['PostalCode']=='M2H') ]


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
17,M2H,North York,Hillcrest Village,43.803762,-79.363452


In [10]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df['Borough'].unique()), df.shape[0]))

The dataframe has 15 boroughs and 103 neighborhoods.


In [11]:
# renaming the column
df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)

In [12]:
# let's do the count of Neighborhood 
df.groupby('Borough').count()['Neighborhood']

Borough
Central Toronto                                                  9
Downtown Toronto                                                17
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
East Toronto                                                     4
East TorontoBusiness reply mail Processing Centre969 Eastern     1
East York                                                        4
East YorkEast Toronto                                            1
Etobicoke                                                       11
EtobicokeNorthwest                                               1
MississaugaCanada Post Gateway Processing Centre                 1
North York                                                      24
Queen's Park                                                     1
Scarborough                                                     17
West Toronto                                                     6
York                                                  

In [15]:
# analysis of a specific area
df_temp = df[df['Borough'].str.contains('Toronto')]
df_temp.reset_index(inplace=True)
df_temp.drop('index', axis=1, inplace=True)
df_temp.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4J,East YorkEast Toronto,The Danforth East,43.685347,-79.338106
2,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
3,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
4,M4M,East Toronto,Studio District,43.659526,-79.340923


In [17]:
print(df_temp.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto                                                  9
Downtown Toronto                                                17
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
East Toronto                                                     4
East TorontoBusiness reply mail Processing Centre969 Eastern     1
East YorkEast Toronto                                            1
West Toronto                                                     6
Name: Neighborhood, dtype: int64


In [18]:
# some pre-made calculations
boroughs = df_temp['Borough'].unique().tolist()

lat = df_temp['Latitude'].mean()
lon = df_temp['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat, lon))

The geographical coordinates of Toronto are 43.667725897435886, -79.38855562564102


In [19]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3))

In [21]:
# import folium
!pip install folium




In [22]:
# let's visualize
import folium
map_toronto = folium.Map(location=[lat, lon], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_temp['Latitude'], 
                                           df_temp['Longitude'],
                                           df_temp['Borough'], 
                                           df_temp['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [24]:
# Now, we have to get the nearby venues by using foursquare APIs

CLIENT_ID = 'A4GIGAD2BH2HDBYRZI1BUACGQRHXGWYRNO1XJG4NMI5EB5M2' # your Foursquare ID
CLIENT_SECRET = 'KRUZDX0BURD5BFI4XVMEDRF03RJUCPP0MFSLYFMW02WIUMNU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
radius=500

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: A4GIGAD2BH2HDBYRZI1BUACGQRHXGWYRNO1XJG4NMI5EB5M2
CLIENT_SECRET:KRUZDX0BURD5BFI4XVMEDRF03RJUCPP0MFSLYFMW02WIUMNU


In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
   
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
toronto_venues = getNearbyVenues(names=df_temp['Neighborhood'],
                                latitudes=df_temp['Latitude'],
                                longitudes=df_temp['Longitude'])


The Beaches
The Danforth  East
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Enclave of M5E
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High Park, The Junction 

In [27]:
toronto_venues.shape

(1584, 7)

In [28]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,59,59,59,59,59,59
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Central Bay Street,65,65,65,65,65,65
Christie,16,16,16,16,16,16
Church and Wellesley,72,72,72,72,72,72
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,36,36,36,36,36,36
Davisville North,8,8,8,8,8,8
"Dufferin, Dovercourt Village",15,15,15,15,15,15


In [29]:
# observe the venue category
toronto_venues['Venue Category'].unique()[:100]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood', 'Park',
       'Intersection', 'Convenience Store', 'Cosmetics Shop',
       'Greek Restaurant', 'Italian Restaurant', 'Ice Cream Shop',
       'Brewery', 'Yoga Studio', 'Fruit & Vegetable Store', 'Restaurant',
       'Pizza Place', 'Bookstore', 'Juice Bar', 'Furniture / Home Store',
       'Dessert Shop', 'Bubble Tea Shop', 'Spa', 'Grocery Store',
       'Coffee Shop', 'Tibetan Restaurant', 'Bakery', 'Indian Restaurant',
       'Caribbean Restaurant', 'Café', 'Lounge', 'Frozen Yogurt Shop',
       'American Restaurant', 'Gym', 'Fast Food Restaurant',
       'Fish & Chips Shop', 'Sushi Restaurant', 'Liquor Store',
       'Pet Store', 'Steakhouse', 'Movie Theater', 'Sandwich Place',
       'Board Shop', 'Food & Drink Shop', 'Fish Market',
       'Seafood Restaurant', 'Gay Bar', 'Cheese Shop', 'Stationery Store',
       'Middle Eastern Restaurant', 'Comfort Food Restaurant',
       'Thai Restaurant', 'Coworking Space', 'Latin Am

In [36]:
# list of asian restaurants
"Japanese Restaurant" in toronto_venues['Venue Category'].unique()
"Sri Lankan Restaurant" in toronto_venues['Venue Category'].unique()
"Taiwanese Restaurant" in toronto_venues['Venue Category'].unique()
"Thai Restaurant" in toronto_venues['Venue Category'].unique()

True

In [37]:
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighborhoods'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1584, 235)


Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Danforth East,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(to_grouped.shape)
to_grouped

(39, 235)


Unnamed: 0,Neighborhoods,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,...,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.015385
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.027778
6,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
7,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Dufferin, Dovercourt Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
len(to_grouped[to_grouped["Indian Restaurant"] > 0])

9

In [40]:
len(to_grouped[to_grouped["Japanese Restaurant"] > 0])

15

In [41]:
len(to_grouped[to_grouped["Sri Lankan Restaurant"] > 0])

1

In [42]:
len(to_grouped[to_grouped["Taiwanese Restaurant"] > 0])

1

In [43]:
len(to_grouped[to_grouped["Thai Restaurant"] > 0])

13

# from the above search, it is seems that indian, japanese, and thai foods are more popular. I will take the lead with japanese restaurant

In [46]:
to_japan = to_grouped[["Neighborhoods","Japanese Restaurant"]]
to_japan

Unnamed: 0,Neighborhoods,Japanese Restaurant
0,Berczy Park,0.016949
1,"Brockton, Parkdale Village, Exhibition Place",0.0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0
3,Central Bay Street,0.015385
4,Christie,0.0
5,Church and Wellesley,0.069444
6,"Commerce Court, Victoria Hotel",0.03
7,Davisville,0.0
8,Davisville North,0.0
9,"Dufferin, Dovercourt Village",0.0


In [47]:
# let's do the clustering

In [48]:
from sklearn.cluster import KMeans
toclusters = 3

to_clustering = to_japan.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=1)
kmeans.fit_transform(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([2, 0, 0, 2, 0, 1, 2, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0])

In [49]:
to_merged = to_japan.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

In [50]:
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.head(5)

Unnamed: 0,Neighborhood,Japanese Restaurant,Cluster Labels
0,Berczy Park,0.016949,2
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0
3,Central Bay Street,0.015385,2
4,Christie,0.0,0


In [51]:
to_merged = to_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(to_merged.shape)
to_merged.head()

(1584, 9)


Unnamed: 0,Neighborhood,Japanese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.016949,2,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.016949,2,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.016949,2,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.016949,2,43.644771,-79.373306,Goose Island Brewhouse,43.647329,-79.373541,Beer Bar
0,Berczy Park,0.016949,2,43.644771,-79.373306,Biff's Bistro,43.647085,-79.376342,French Restaurant


In [52]:
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged.head()

Unnamed: 0,Neighborhood,Japanese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
15,"Harbourfront East, Union Station, Toronto Islands",0.01,0,43.640816,-79.381752,Rainbow Reef,43.64226,-79.385994,Aquarium
23,"Parkdale, Roncesvalles",0.0,0,43.64896,-79.456325,Likely General,43.650622,-79.450635,Gift Shop
23,"Parkdale, Roncesvalles",0.0,0,43.64896,-79.456325,Scout,43.65097,-79.450866,Gift Shop
23,"Parkdale, Roncesvalles",0.0,0,43.64896,-79.456325,Reunion Island Coffee Bar,43.650463,-79.45061,Coffee Shop
23,"Parkdale, Roncesvalles",0.0,0,43.64896,-79.456325,Cider House,43.650688,-79.450685,Restaurant


In [53]:
# visualize it

In [54]:
map_clusters = folium.Map(location=[lat, lon],zoom_start=14)

# set color scheme for the clusters


# add markers to the map
markers_colors={}
markers_colors[0] = 'red'
markers_colors[1] = 'blue'
markers_colors[2] = 'green'
markers_colors[3] = 'yellow'
markers_colors[4] = 'cyan'
markers_colors[5] = 'black'
for lat, lon, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Cluster Labels']):
    
    
    folium.features.CircleMarker(
        [lat, lon],
        radius=5,
       
        color =markers_colors[cluster],
        fill_color=markers_colors[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [55]:
# Cluster 0
to_merged.loc[(to_merged['Cluster Labels'] ==0) & (to_merged['Venue Category'] == 'Japanese Restaurant') ]

Unnamed: 0,Neighborhood,Japanese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
25,"Richmond, Adelaide, King",0.01087,0,43.650571,-79.384568,Fune Japanese Restaurant,43.648514,-79.386457,Japanese Restaurant
15,"Harbourfront East, Union Station, Toronto Islands",0.01,0,43.640816,-79.381752,Miku,43.641374,-79.377531,Japanese Restaurant
14,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,Kinka Izakaya Original,43.660596,-79.378891,Japanese Restaurant
29,St. James Town,0.012195,0,43.651494,-79.375418,Gyu-Kaku Japanese BBQ,43.651422,-79.375047,Japanese Restaurant


In [56]:
# Cluster 1
to_merged.loc[(to_merged['Cluster Labels'] ==1) & (to_merged['Venue Category'] == 'Japanese Restaurant') ]

Unnamed: 0,Neighborhood,Japanese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
38,"University of Toronto, Harbord",0.060606,1,43.662696,-79.400049,Gyubee,43.667088,-79.400571,Japanese Restaurant
38,"University of Toronto, Harbord",0.060606,1,43.662696,-79.400049,Yasu,43.662837,-79.403217,Japanese Restaurant
5,Church and Wellesley,0.069444,1,43.66586,-79.38316,Kawa Sushi,43.663894,-79.38021,Japanese Restaurant
5,Church and Wellesley,0.069444,1,43.66586,-79.38316,Tokyo Kitchen,43.668783,-79.385153,Japanese Restaurant
5,Church and Wellesley,0.069444,1,43.66586,-79.38316,Tokyo Grill,43.665085,-79.384707,Japanese Restaurant
5,Church and Wellesley,0.069444,1,43.66586,-79.38316,Onnki Donburi,43.669757,-79.384574,Japanese Restaurant
5,Church and Wellesley,0.069444,1,43.66586,-79.38316,Kokoni Izakaya,43.664181,-79.380258,Japanese Restaurant


In [57]:
# Cluster 2
to_merged.loc[(to_merged['Cluster Labels'] ==2) & (to_merged['Venue Category'] == 'Japanese Restaurant') ]

Unnamed: 0,Neighborhood,Japanese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
3,Central Bay Street,0.015385,2,43.657952,-79.387383,Omai,43.656006,-79.392494,Japanese Restaurant
6,"Commerce Court, Victoria Hotel",0.03,2,43.648198,-79.379817,Chotto Matte,43.646473,-79.378782,Japanese Restaurant
30,"St. James Town, Cabbagetown",0.021277,2,43.667967,-79.367675,Kingyo Toronto,43.665895,-79.368415,Japanese Restaurant
6,"Commerce Court, Victoria Hotel",0.03,2,43.648198,-79.379817,Ki Modern Japanese + Bar,43.647223,-79.379374,Japanese Restaurant
37,"Toronto Dominion Centre, Design Exchange",0.03,2,43.647177,-79.381576,Ninki Izakaya,43.650228,-79.384863,Japanese Restaurant
0,Berczy Park,0.016949,2,43.644771,-79.373306,Chotto Matte,43.646473,-79.378782,Japanese Restaurant
37,"Toronto Dominion Centre, Design Exchange",0.03,2,43.647177,-79.381576,Chotto Matte,43.646473,-79.378782,Japanese Restaurant
37,"Toronto Dominion Centre, Design Exchange",0.03,2,43.647177,-79.381576,Ki Modern Japanese + Bar,43.647223,-79.379374,Japanese Restaurant
20,"Little Portugal, Trinity",0.022727,2,43.647927,-79.41975,Bazara,43.648535,-79.420521,Japanese Restaurant
12,"First Canadian Place, Underground city",0.04,2,43.648429,-79.38228,Ki Modern Japanese + Bar,43.647223,-79.379374,Japanese Restaurant


# lets check with thai restaurant


In [58]:
to_thai = to_grouped[["Neighborhoods","Thai Restaurant"]]
to_thai

Unnamed: 0,Neighborhoods,Thai Restaurant
0,Berczy Park,0.016949
1,"Brockton, Parkdale Village, Exhibition Place",0.0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0
3,Central Bay Street,0.030769
4,Christie,0.0
5,Church and Wellesley,0.013889
6,"Commerce Court, Victoria Hotel",0.02
7,Davisville,0.055556
8,Davisville North,0.0
9,"Dufferin, Dovercourt Village",0.0


In [59]:
from sklearn.cluster import KMeans
toclusters = 3

to_clustering = to_thai.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=1)
kmeans.fit_transform(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([1, 0, 0, 1, 0, 1, 1, 2, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0])

In [60]:
to_merged = to_thai.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

In [61]:
to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
to_merged.head(5)

Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels
0,Berczy Park,0.016949,1
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0
3,Central Bay Street,0.030769,1
4,Christie,0.0,0


In [62]:
to_merged = to_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(to_merged.shape)
to_merged.head()

(1584, 9)


Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.016949,1,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.016949,1,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.016949,1,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.016949,1,43.644771,-79.373306,Goose Island Brewhouse,43.647329,-79.373541,Beer Bar
0,Berczy Park,0.016949,1,43.644771,-79.373306,Biff's Bistro,43.647085,-79.376342,French Restaurant


In [63]:
map_clusters = folium.Map(location=[lat, lon],zoom_start=14)

# set color scheme for the clusters


# add markers to the map
markers_colors={}
markers_colors[0] = 'red'
markers_colors[1] = 'blue'
markers_colors[2] = 'green'
markers_colors[3] = 'yellow'
markers_colors[4] = 'cyan'
markers_colors[5] = 'black'
for lat, lon, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Cluster Labels']):
    
    
    folium.features.CircleMarker(
        [lat, lon],
        radius=5,
       
        color =markers_colors[cluster],
        fill_color=markers_colors[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [64]:
# Cluster 0
to_merged.loc[(to_merged['Cluster Labels'] ==0) & (to_merged['Venue Category'] == 'Thai Restaurant') ]

Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
11,Enclave of M5E,0.010101,0,43.646435,-79.374846,Sukhothai,43.648487,-79.374547,Thai Restaurant
14,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,Salad King,43.657601,-79.38162,Thai Restaurant


In [65]:
# Cluster 1
to_merged.loc[(to_merged['Cluster Labels'] ==1) & (to_merged['Venue Category'] == 'Thai Restaurant') ]

Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.016949,1,43.644771,-79.373306,Sukhothai,43.648487,-79.374547,Thai Restaurant
3,Central Bay Street,0.030769,1,43.657952,-79.387383,Salad King,43.657601,-79.38162,Thai Restaurant
3,Central Bay Street,0.030769,1,43.657952,-79.387383,Thai Express,43.66163,-79.38734,Thai Restaurant
5,Church and Wellesley,0.013889,1,43.66586,-79.38316,Si Lom,43.66501,-79.380683,Thai Restaurant
6,"Commerce Court, Victoria Hotel",0.02,1,43.648198,-79.379817,Sukhothai,43.648487,-79.374547,Thai Restaurant
6,"Commerce Court, Victoria Hotel",0.02,1,43.648198,-79.379817,Ruby Thai (First Canadian Place),43.649091,-79.3816,Thai Restaurant
12,"First Canadian Place, Underground city",0.02,1,43.648429,-79.38228,Thai Island,43.649146,-79.383798,Thai Restaurant
12,"First Canadian Place, Underground city",0.02,1,43.648429,-79.38228,Ruby Thai (First Canadian Place),43.649091,-79.3816,Thai Restaurant
25,"Richmond, Adelaide, King",0.032609,1,43.650571,-79.384568,Pai,43.647923,-79.388579,Thai Restaurant
25,"Richmond, Adelaide, King",0.032609,1,43.650571,-79.384568,Thai Island,43.649146,-79.383798,Thai Restaurant


In [66]:
# Cluster 2
to_merged.loc[(to_merged['Cluster Labels'] ==2) & (to_merged['Venue Category'] == 'Thai Restaurant') ]

Unnamed: 0,Neighborhood,Thai Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
7,Davisville,0.055556,2,43.704324,-79.38879,Thai Spicy House,43.701962,-79.387513,Thai Restaurant
7,Davisville,0.055556,2,43.704324,-79.38879,Bolan Thai Cuisine,43.706833,-79.3892,Thai Restaurant
16,"High Park, The Junction South",0.08,2,43.661608,-79.464763,Isaan Der,43.665311,-79.468078,Thai Restaurant
16,"High Park, The Junction South",0.08,2,43.661608,-79.464763,Silk,43.665291,-79.466238,Thai Restaurant


# Conclusion

We can observe that most of the japanese restaurant are in cluster 2. Since I took the lead with japanese and thai restaurant, I would suggest the entreprenuers either choose japanaese food or thai food as a lead to open the buisness. If japanese then suitable place would be cluster 2 which is near Richmond, Adelaide, King or Central Bay Street	area. If thai then choose the cluster 1 which is near Richmond, Adelaide, King	and central Bay. In overall, the entreprenuers could take lead with both japanese and thai food in the Richmond, Adelaide, King	and central Bay area because the cluster result suggest that these two place are prime places for these kinds of restaurant. 