In [20]:
import pandas as pd #library for data analysis
import numpy as np # library to handle data in a vectorized manner
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


### Lets import the data

In [21]:
df = pd.read_csv('neighbourhoodswithcoordinates.csv') 
df

Unnamed: 0,Borough,Postal Code,Neighbourhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.728020,-79.388790
1,Central Toronto,M4P,Davisville North\r\r\n,43.712751,-79.390197
2,Central Toronto,M4R,North Toronto West\r\r\n,43.715383,-79.405678
3,Central Toronto,M4S,Davisville\r\r\n,43.704324,-79.388790
4,Central Toronto,M4T,"Moore Park, Summerhill East\r\r\n",43.689574,-79.383160
5,Central Toronto,M4V,"Deer Park, Forest Hill SE\r\r\n, Rathnelly, So...",43.686412,-79.400049
6,Central Toronto,M5N,Roselawn\r\r\n,43.711695,-79.416936
7,Central Toronto,M5P,"Forest Hill North, Forest Hill West\r\r\n",43.696948,-79.411307
8,Central Toronto,M5R,"The Annex, North Midtown\r\r\n, Yorkville",43.672710,-79.405678
9,Downtown Toronto,M4W,Rosedale,43.679563,-79.377529


### Now lets look into the boroughs which contain the word "Toronto"

In [22]:
toronto = df[df['Borough'].str.contains('Toronto', regex=False, case=False, na=False)]
toronto.head(10)

Unnamed: 0,Borough,Postal Code,Neighbourhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,Central Toronto,M4P,Davisville North\r\r\n,43.712751,-79.390197
2,Central Toronto,M4R,North Toronto West\r\r\n,43.715383,-79.405678
3,Central Toronto,M4S,Davisville\r\r\n,43.704324,-79.38879
4,Central Toronto,M4T,"Moore Park, Summerhill East\r\r\n",43.689574,-79.38316
5,Central Toronto,M4V,"Deer Park, Forest Hill SE\r\r\n, Rathnelly, So...",43.686412,-79.400049
6,Central Toronto,M5N,Roselawn\r\r\n,43.711695,-79.416936
7,Central Toronto,M5P,"Forest Hill North, Forest Hill West\r\r\n",43.696948,-79.411307
8,Central Toronto,M5R,"The Annex, North Midtown\r\r\n, Yorkville",43.67271,-79.405678
9,Downtown Toronto,M4W,Rosedale,43.679563,-79.377529


### Nows lets put in the foursquare credentials

In [23]:
CLIENT_ID = 'CRVLOBOKYSFY3MOEWLO2LJWUGEHHMRNITJY21VWYRTYPKFOR' # your Foursquare ID
CLIENT_SECRET = 'SVSKWCVMEEK0WERNY4MXSERLLVVDUXPZFOHN2IU35AQRQ0R5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: CRVLOBOKYSFY3MOEWLO2LJWUGEHHMRNITJY21VWYRTYPKFOR
CLIENT_SECRET:SVSKWCVMEEK0WERNY4MXSERLLVVDUXPZFOHN2IU35AQRQ0R5


### Function to get nearby veneues in a neighbourhood

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### As our neighbourhoods contains multiple values in some cells, lets convert them to single values

In [25]:
def splitDataFrameList(df,target_column,separator):
    ''' df = dataframe to split,
    target_column = the column containing the values to split
    separator = the symbol used to perform the split
    returns: a dataframe with each entry for the target column separated, with each element moved into a new row. 
    The values in the other columns are duplicated across the newly divided rows.
    '''
    row_accumulator = []

    def splitListToRows(row, separator):
        split_row = row[target_column].split(separator)
        for s in split_row:
            new_row = row.to_dict()
            new_row[target_column] = s
            row_accumulator.append(new_row)

    df.apply(splitListToRows, axis=1, args = (separator, ))
    new_df = pd.DataFrame(row_accumulator)
    return new_df

In [26]:
tdata = splitDataFrameList(toronto, 'Neighbourhood\r\r\n' , ',')
tdata

Unnamed: 0,Borough,Latitude,Longitude,Neighbourhood,Postal Code
0,Central Toronto,43.728020,-79.388790,Lawrence Park,M4N
1,Central Toronto,43.712751,-79.390197,Davisville North\r\r\n,M4P
2,Central Toronto,43.715383,-79.405678,North Toronto West\r\r\n,M4R
3,Central Toronto,43.704324,-79.388790,Davisville\r\r\n,M4S
4,Central Toronto,43.689574,-79.383160,Moore Park,M4T
5,Central Toronto,43.689574,-79.383160,Summerhill East\r\r\n,M4T
6,Central Toronto,43.686412,-79.400049,Deer Park,M4V
7,Central Toronto,43.686412,-79.400049,Forest Hill SE\r\r\n,M4V
8,Central Toronto,43.686412,-79.400049,Rathnelly,M4V
9,Central Toronto,43.686412,-79.400049,South Hill,M4V


### The venues can be extracted by the function and the venues are printed below

In [27]:
LIMIT=100
toronto_venues = getNearbyVenues(names=tdata['Neighbourhood\r\r\n'],latitudes=tdata['Latitude'],longitudes=tdata['Longitude'])


Lawrence Park
Davisville North

North Toronto West

Davisville

Moore Park
 Summerhill East

Deer Park
 Forest Hill SE

 Rathnelly
 South Hill
 Summerhill West

Roselawn

Forest Hill North
 Forest Hill West

The Annex
 North Midtown

 Yorkville
Rosedale
Cabbagetown
 St. James Town
Church and Wellesley
Harbourfront
 Regent Park
Ryerson
 Garden District

St. James Town
Berczy Park
Central Bay Street

Adelaide
 King
 Richmond

Harbourfront East

 Toronto Islands
 Union Station
Design Exchange
 Toronto Dominion Centre
Commerce Court
 Victoria Hotel

Harbord

 University of Toronto
Chinatown
 Grange Park
 Kensington Market
CN Tower
 Bathurst Quay

 Island airport

 Harbourfront West

 King and Spadina
 Railway Lands
 South Niagara
Stn A PO Boxes 25 The Esplanade

First Canadian Place
 Underground city
Christie

The Beaches
The Danforth West

 Riverdale
The Beaches West

 India Bazaar
Studio District

Business Reply Mail Processing Centre 969 Eastern

Dovercourt Village
 Dufferin

Little Por

### Nows lets group venues in each neighbourhood

In [28]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bathurst Quay\r\r\n,14,14,14,14,14,14
Dufferin\r\r\n,21,21,21,21,21,21
Exhibition Place,23,23,23,23,23,23
Forest Hill SE\r\r\n,15,15,15,15,15,15
Forest Hill West\r\r\n,4,4,4,4,4,4
Garden District\r\r\n,100,100,100,100,100,100
Grange Park,97,97,97,97,97,97
Harbourfront West\r\r\n,14,14,14,14,14,14
India Bazaar,19,19,19,19,19,19
Island airport\r\r\n,14,14,14,14,14,14


### use one hot encoding to further refine the data

In [29]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Bathurst Quay\r\r\n,0.000000,0.00,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
1,Dufferin\r\r\n,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.047619,0.000000,0.00
2,Exhibition Place,0.043478,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
3,Forest Hill SE\r\r\n,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.00,0.000000,0.000000,0.066667,0.000000,0.000000,0.000000,0.00
4,Forest Hill West\r\r\n,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.250000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
5,Garden District\r\r\n,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.010000,0.000000,0.00,0.010000,0.010000,0.010000,0.010000,0.000000,0.000000,0.00
6,Grange Park,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.010309,0.000000,0.00,0.051546,0.000000,0.051546,0.010309,0.000000,0.000000,0.00
7,Harbourfront West\r\r\n,0.000000,0.00,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
8,India Bazaar,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
9,Island airport\r\r\n,0.000000,0.00,0.000000,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00


### lets print each neighbourhood along with top 5 most common venues

In [31]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Bathurst Quay
----
              venue  freq
0    Airport Lounge  0.14
1   Airport Service  0.14
2  Airport Terminal  0.14
3          Boutique  0.07
4             Plane  0.07


---- Dufferin
----
            venue  freq
0  Discount Store  0.10
1        Pharmacy  0.10
2          Bakery  0.10
3     Supermarket  0.10
4         Brewery  0.05


---- Exhibition Place----
            venue  freq
0  Breakfast Spot  0.09
1            Café  0.09
2     Coffee Shop  0.09
3     Yoga Studio  0.04
4       Pet Store  0.04


---- Forest Hill SE
----
              venue  freq
0               Pub  0.13
1       Coffee Shop  0.13
2        Sports Bar  0.07
3  Sushi Restaurant  0.07
4       Supermarket  0.07


---- Forest Hill West
----
              venue  freq
0     Jewelry Store  0.25
1             Trail  0.25
2              Park  0.25
3  Sushi Restaurant  0.25
4            Museum  0.00


---- Garden District
----
                       venue  freq
0             Clothing Store  0.07
1                

                 venue  freq
0  Japanese Restaurant  0.07
1          Coffee Shop  0.07
2         Burger Joint  0.04
3           Restaurant  0.04
4              Gay Bar  0.04


----Commerce Court----
                 venue  freq
0          Coffee Shop  0.13
1                Hotel  0.06
2                 Café  0.06
3           Restaurant  0.04
4  American Restaurant  0.04


----Davisville
----
                venue  freq
0         Pizza Place  0.08
1        Dessert Shop  0.08
2      Sandwich Place  0.08
3  Italian Restaurant  0.06
4         Coffee Shop  0.06


----Davisville North
----
               venue  freq
0        Pizza Place  0.11
1              Hotel  0.11
2     Breakfast Spot  0.11
3  Food & Drink Shop  0.11
4     Sandwich Place  0.11


----Deer Park----
              venue  freq
0               Pub  0.13
1       Coffee Shop  0.13
2        Sports Bar  0.07
3  Sushi Restaurant  0.07
4       Supermarket  0.07


----Design Exchange----
                 venue  freq
0          Coffe

### lets put that into a dataframe

In [32]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
num_top_venues = 10 ## lets make the venues 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bathurst Quay\r\r\n,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
1,Dufferin\r\r\n,Pharmacy,Discount Store,Bakery,Supermarket,Athletics & Sports,Fast Food Restaurant,Liquor Store,Middle Eastern Restaurant,Music Venue,Park
2,Exhibition Place,Breakfast Spot,Café,Coffee Shop,Yoga Studio,Gym / Fitness Center,Burrito Place,Restaurant,Caribbean Restaurant,Climbing Gym,Pet Store
3,Forest Hill SE\r\r\n,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
4,Forest Hill West\r\r\n,Park,Trail,Sushi Restaurant,Jewelry Store,Women's Store,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


## Now its time for k-Means to cluster these neighborhoods

In [34]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 1, 1, 1, 0, 1, 1, 3, 1, 3])

#### Lets add clustering labels to data

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


toronto_merged = tdata

 #merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood\r\r\n')

toronto_merged # check the last columns!

Unnamed: 0,Borough,Latitude,Longitude,Neighbourhood,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,43.728020,-79.388790,Lawrence Park,M4N,1,Dim Sum Restaurant,Park,Swim School,Bus Line,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,Central Toronto,43.712751,-79.390197,Davisville North\r\r\n,M4P,1,Sandwich Place,Burger Joint,Gym,Pizza Place,Park,Breakfast Spot,Hotel,Clothing Store,Food & Drink Shop,Discount Store
2,Central Toronto,43.715383,-79.405678,North Toronto West\r\r\n,M4R,1,Coffee Shop,Sporting Goods Shop,Clothing Store,Furniture / Home Store,Fast Food Restaurant,Mexican Restaurant,Diner,Miscellaneous Shop,Dessert Shop,Park
3,Central Toronto,43.704324,-79.388790,Davisville\r\r\n,M4S,1,Pizza Place,Sandwich Place,Dessert Shop,Café,Italian Restaurant,Sushi Restaurant,Coffee Shop,Pharmacy,Deli / Bodega,Greek Restaurant
4,Central Toronto,43.689574,-79.383160,Moore Park,M4T,2,Playground,Park,Summer Camp,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
5,Central Toronto,43.689574,-79.383160,Summerhill East\r\r\n,M4T,2,Playground,Park,Summer Camp,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
6,Central Toronto,43.686412,-79.400049,Deer Park,M4V,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
7,Central Toronto,43.686412,-79.400049,Forest Hill SE\r\r\n,M4V,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
8,Central Toronto,43.686412,-79.400049,Rathnelly,M4V,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
9,Central Toronto,43.686412,-79.400049,South Hill,M4V,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant


#### lets visualize these clusters using folium map

In [36]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood\r\r\n'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Now lets examine the neigborhoods with lesser number of cafes and restaurants

### cluster 1

In [53]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[3] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Forest Hill North,0,Park,Trail,Sushi Restaurant,Jewelry Store,Women's Store,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
13,Forest Hill West\r\r\n,0,Park,Trail,Sushi Restaurant,Jewelry Store,Women's Store,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


### cluster 2

In [54]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[3] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Lawrence Park,1,Dim Sum Restaurant,Park,Swim School,Bus Line,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,Davisville North\r\r\n,1,Sandwich Place,Burger Joint,Gym,Pizza Place,Park,Breakfast Spot,Hotel,Clothing Store,Food & Drink Shop,Discount Store
2,North Toronto West\r\r\n,1,Coffee Shop,Sporting Goods Shop,Clothing Store,Furniture / Home Store,Fast Food Restaurant,Mexican Restaurant,Diner,Miscellaneous Shop,Dessert Shop,Park
3,Davisville\r\r\n,1,Pizza Place,Sandwich Place,Dessert Shop,Café,Italian Restaurant,Sushi Restaurant,Coffee Shop,Pharmacy,Deli / Bodega,Greek Restaurant
6,Deer Park,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
7,Forest Hill SE\r\r\n,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
8,Rathnelly,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
9,South Hill,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
10,Summerhill West\r\r\n,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Convenience Store,Medical Center,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant
14,The Annex,1,Sandwich Place,Café,Coffee Shop,Pizza Place,Pharmacy,Indian Restaurant,Liquor Store,Burger Joint,Jewish Restaurant,Cheese Shop


### cluster 3

In [55]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[3] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Moore Park,2,Playground,Park,Summer Camp,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
5,Summerhill East\r\r\n,2,Playground,Park,Summer Camp,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
17,Rosedale,2,Park,Playground,Trail,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


### cluster 4

In [56]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[3] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
43,CN Tower,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
44,Bathurst Quay\r\r\n,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
45,Island airport\r\r\n,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
46,Harbourfront West\r\r\n,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
47,King and Spadina,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
48,Railway Lands,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
49,South Niagara,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport


### cluster 5

In [75]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[3] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Roselawn\r\r\n,4,Garden,Women's Store,Diner,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### from the clusters we can clearly see that Neighborhoods with label 1 can not be optimum for opening up a restaurant as it is filled with alot of eating places

In [None]:
#lets get all the optimal neighborhoods in one place
optimal = pd.DataFrame()

In [85]:
optimal = toronto_merged[toronto_merged['Cluster Labels'] != 1]

# OPTIMAL NEIGHBOURHOODS AND BOROUGHS FOR RESTAURANT OPENING

In [88]:
optimal

Unnamed: 0,Borough,Latitude,Longitude,Neighbourhood,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,43.689574,-79.38316,Moore Park,M4T,2,Playground,Park,Summer Camp,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
5,Central Toronto,43.689574,-79.38316,Summerhill East\r\r\n,M4T,2,Playground,Park,Summer Camp,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
11,Central Toronto,43.711695,-79.416936,Roselawn\r\r\n,M5N,4,Garden,Women's Store,Diner,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
12,Central Toronto,43.696948,-79.411307,Forest Hill North,M5P,0,Park,Trail,Sushi Restaurant,Jewelry Store,Women's Store,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
13,Central Toronto,43.696948,-79.411307,Forest Hill West\r\r\n,M5P,0,Park,Trail,Sushi Restaurant,Jewelry Store,Women's Store,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
17,Downtown Toronto,43.679563,-79.377529,Rosedale,M4W,2,Park,Playground,Trail,Women's Store,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
43,Downtown Toronto,43.628947,-79.39442,CN Tower,M5V,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
44,Downtown Toronto,43.628947,-79.39442,Bathurst Quay\r\r\n,M5V,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
45,Downtown Toronto,43.628947,-79.39442,Island airport\r\r\n,M5V,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
46,Downtown Toronto,43.628947,-79.39442,Harbourfront West\r\r\n,M5V,3,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Harbor / Marina,Airport Gate,Airport
