![Header Image](https://cdn.iconscout.com/icon/free/png-256/python-12-555278.png "header")

# Capstone Project – The Battle of Neighborhoods | Finding a Familiar Place in Toronto/New York

## Part 0: Setting up libraries

In [6]:
# Installing libraries
# !conda install -c conda-forge geopy --yes
# !conda install -c conda-forge folium --yes
!python3 -m pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 7.6MB/s ta 0:00:011
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [7]:
import requests # library to handle requests
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
import pandas as pd
import geopy
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# import k-means from clustering stage
from sklearn.cluster import KMeans

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML

# Matplotlib and associated plotting modules
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
    
# tranforming json file into a pandas dataframe library
# from pandas import json_normalize
import json
from pandas.io.json import json_normalize

import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


---
## Part 1: Creating the initial dataframes

### Toronto: importing data

In [8]:
# Set dataframe display size
desired_width = 320

pd.set_option('display.width', desired_width)
pd.set_option('display.max_columns', 10)

# Read table directly from Wikipedia using pandas inbuilt function
df_torontoll = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [9]:
# Keep only Postal Codes with assigned Boroughs
df_torontoll = df_torontoll[df_torontoll.Borough != 'Not assigned']

In [10]:
# Reset the index of the data frame
df_torontoll.index = pd.RangeIndex(len(df_torontoll.index))

# Check for duplicates
number_duplicates = df_torontoll.duplicated(subset='Postal Code', keep='first').sum()
# Check for Borough with Neighbourhood Not assigned
number_not_assigned_neighbourhood = df_torontoll[df_torontoll.Neighbourhood == 'Not assigned'].shape[0]
print("There are {} duplicate Postal Codes in the dataframe and {} rows with no assigned Neighbourhood.".format(number_duplicates, number_not_assigned_neighbourhood))

There are 0 duplicate Postal Codes in the dataframe and 0 rows with no assigned Neighbourhood.


In [11]:
# Add geographic coordinates to dataframe
# with csv file

df_latlong = pd.read_csv("http://cocl.us/Geospatial_data")

for index, postal_code in zip(df_torontoll.index, df_torontoll['Postal Code']):

    lat_lng_coords = df_latlong.loc[df_latlong['Postal Code'] == postal_code, ['Latitude', 'Longitude']]
    df_torontoll.loc[index, 'Latitude'] = lat_lng_coords.iloc[0, 0]
    df_torontoll.loc[index, 'Longitude'] = lat_lng_coords.iloc[0, 1]


df_torontoll.drop('Postal Code', axis=1, inplace=True)

df_torontoll.head(10)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,North York,Don Mills,43.745906,-79.352188
8,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [12]:
# Check shape
df_torontoll.shape

(103, 4)

### New York: importing data

In [13]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
    
print('Data downloaded!')

with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)


Data downloaded!


In [16]:
neighbourhoods_data = newyork_data['features']

In [17]:
# define the dataframe columns
column_names = ['Borough', 'Neighbourhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
df_newyorkll = pd.DataFrame(columns=column_names)

# populate dataframe
for data in neighbourhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighbourhood_name = data['properties']['name']
        
    neighbourhood_latlon = data['geometry']['coordinates']
    neighbourhood_lat = neighbourhood_latlon[1]
    neighbourhood_lon = neighbourhood_latlon[0]
    
    df_newyorkll = df_newyorkll.append({'Borough': borough,
                                          'Neighbourhood': neighbourhood_name,
                                          'Latitude': neighbourhood_lat,
                                          'Longitude': neighbourhood_lon}, ignore_index=True)

In [18]:
df_newyorkll.head(10)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [19]:
print('The dataframe has {} boroughs and {} neighbourhoods.'.format(
        len(df_newyorkll['Borough'].unique()),
        df_newyorkll.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighbourhoods.


---
## Part 2: Combining the data from Toronto and New York

In [20]:
# Rename Boroughs to better differentiate between Toronto, TO, and New York, NY
df_torontoll['Borough'] = [Borough + " - TO" for Borough in df_torontoll['Borough']]
df_newyorkll['Borough'] = [Borough + " - NY" for Borough in df_newyorkll['Borough']]

In [21]:
# Append data
df_TONY = df_torontoll.append(df_newyorkll, ignore_index=True)
df_TONY.shape

(409, 4)

In [24]:
# Check new dataframe

def ends(df, x=5):
    return df.head(x).append(df.tail(x))

ends(df_TONY, 3)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,North York - TO,Parkwoods,43.753259,-79.329656
1,North York - TO,Victoria Village,43.725882,-79.315572
2,Downtown Toronto - TO,"Regent Park, Harbourfront",43.65426,-79.360636
406,Queens - NY,Bayswater,40.611322,-73.765968
407,Queens - NY,Queensbridge,40.756091,-73.945631
408,Staten Island - NY,Fox Hills,40.617311,-74.08174


---
## Part 3: Exploring and clustering the neighbourhoods in Toronto and New York

In [25]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: ____________
CLIENT_SECRET:__________


In [26]:
# The code was removed by Watson Studio for sharing.

In [28]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude_TO = location.latitude
longitude_TO = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude_TO, longitude_TO))

The geograpical coordinate of Toronto City are 43.6534817, -79.3839347.


In [29]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude_NY = location.latitude
longitude_NY = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude_NY, longitude_NY))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [30]:
# Midpoint between cities:

latitude = (latitude_TO + latitude_NY)/2
longitude = (longitude_TO + longitude_NY)/2

In [31]:
#create map of Toronto and New York
map_TONY = folium.Map(location=[latitude, longitude], zoom_start=7.5)

# add markers to map
for lat, lng, borough, Neighbourhood in zip(df_TONY['Latitude'], df_TONY['Longitude'], df_TONY['Borough'], df_TONY['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_TONY)
    
map_TONY

In [32]:
# explore all neighbourhoods in boroughs

def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
borough_venues = getNearbyVenues(names=df_TONY['Neighbourhood'],
                                   latitudes=df_TONY['Latitude'],
                                   longitudes=df_TONY['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [34]:
print(borough_venues.shape) #the size of the resulting dataframe
borough_venues.head()

(12252, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [35]:
# check how many venues were returned for each neighbourhood
borough_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",6,6,6,6,6,6
Allerton,30,30,30,30,30,30
Annadale,13,13,13,13,13,13
Arden Heights,4,4,4,4,4,4
Arlington,4,4,4,4,4,4
Arrochar,20,20,20,20,20,20
Arverne,20,20,20,20,20,20
Astoria,98,98,98,98,98,98
Astoria Heights,15,15,15,15,15,15


In [36]:
# find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(borough_venues['Venue Category'].unique())))

There are 461 uniques categories.


In [37]:
# analyse each neighbourhood in boroughs

# one hot encoding
borough_onehot = pd.get_dummies(borough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
borough_onehot['Neighbourhood'] = borough_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [borough_onehot.columns[-1]] + list(borough_onehot.columns[:-1])
borough_onehot = borough_onehot[fixed_columns]

borough_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,...,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,...,0,0,0,0,0
1,Parkwoods,0,0,0,0,...,0,0,0,0,0
2,Victoria Village,0,0,0,0,...,0,0,0,0,0
3,Victoria Village,0,0,0,0,...,0,0,0,0,0
4,Victoria Village,0,0,0,0,...,0,0,0,0,0


In [38]:
borough_onehot.shape

(12252, 462)

In [39]:
# let's group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category

borough_grouped = borough_onehot.groupby('Neighbourhood').mean().reset_index()
borough_grouped.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,...,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
2,Allerton,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
3,Annadale,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
4,Arden Heights,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
5,Arlington,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
6,Arrochar,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
7,Arverne,0.0,0.0,0.0,0.0,...,0.0,0.05,0.0,0.0,0.0
8,Astoria,0.0,0.0,0.0,0.0,...,0.0,0.010204,0.0,0.0,0.0
9,Astoria Heights,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0


In [40]:
borough_grouped.shape

(396, 462)

In [41]:
# print each neighbourhood along with the top 5 most common venues
num_top_venues = 5

for hood in borough_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = borough_grouped[borough_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1               Skating Rink  0.25
2                     Lounge  0.25
3  Latin American Restaurant  0.25
4      Outdoors & Recreation  0.00


----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.33
1             Pub  0.17
2             Gym  0.17
3     Coffee Shop  0.17
4  Sandwich Place  0.17


----Allerton----
                venue  freq
0         Pizza Place  0.17
1                 Spa  0.07
2         Supermarket  0.07
3  Chinese Restaurant  0.07
4       Deli / Bodega  0.07


----Annadale----
         venue  freq
0  Pizza Place  0.23
1     Pharmacy  0.08
2         Food  0.08
3       Bakery  0.08
4         Park  0.08


----Arden Heights----
               venue  freq
0           Pharmacy  0.25
1        Coffee Shop  0.25
2           Bus Stop  0.25
3        Pizza Place  0.25
4  Accessories Store  0.00


----Arlington----
                 venue  freq
0  American Restaurant  0.25

In [42]:
# turn into a pandas dataframe

# a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [43]:
# create the new dataframe and display the top 10 venues for each neighbourhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = borough_grouped['Neighbourhood']

for ind in np.arange(borough_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(borough_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Breakfast Spot,Lounge,Skating Rink,Latin American Restaurant,...,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
1,"Alderwood, Long Branch",Pizza Place,Gym,Pub,Coffee Shop,...,Yoga Studio,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
2,Allerton,Pizza Place,Chinese Restaurant,Spa,Deli / Bodega,...,Cosmetics Shop,Bus Station,Martial Arts Dojo,Grocery Store,Gas Station
3,Annadale,Pizza Place,Bakery,Liquor Store,Bar,...,Train Station,Pharmacy,Restaurant,Dance Studio,Park
4,Arden Heights,Pizza Place,Pharmacy,Coffee Shop,Bus Stop,...,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant


In [44]:
# Cluster Neighbourhoods
# Run k-means to cluster the neighbourhood into 5 clusters
# set number of clusters
kclusters = 5

borough_grouped_clustering = borough_grouped.drop('Neighbourhood', axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(borough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 4, 4, 4, 4, 2, 2, 0, 0, 4, 0, 4, 4, 0, 0, 0, 4, 0, 2, 4, 4, 0,
       0, 4, 0, 0, 2, 4, 4, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 4, 0, 4, 0,
       2, 4, 4, 0, 0, 0, 0, 1, 4, 0, 2, 0, 0, 4, 4, 4, 0, 0, 0, 0, 0, 0,
       0, 0, 4, 0, 4, 4, 1, 0, 4, 0, 0, 4, 0, 4, 0, 0, 4, 4, 4, 4, 2, 4,
       4, 0, 0, 0, 4, 0, 4, 0, 4, 4, 0, 4, 0, 4, 0, 4, 0, 2, 1, 4, 0, 0,
       4, 4, 4, 0, 0, 2, 0, 4, 0, 4, 4, 0, 4, 4, 0, 0, 4, 0, 4, 4, 0, 4,
       0, 4, 4, 0, 4, 2, 4, 0, 0, 4, 0, 4, 0, 2, 0, 0, 0, 4, 4, 4, 4, 4,
       0, 4, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 4, 0, 0, 4, 4, 4, 0, 4, 4, 3,
       2, 0, 4, 0, 0, 0, 4, 4, 4, 0, 4, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
       0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 2, 0, 4, 0, 4, 0, 0, 4, 0, 4, 2, 4,
       4, 0, 0, 0, 0, 4, 4, 0, 1, 4, 2, 0, 4, 4, 4, 4, 4, 4, 0, 0, 2, 0,
       2, 4, 4, 0, 0, 2, 0, 4, 0, 0, 0, 0, 4, 4, 0, 2, 4, 3, 0, 4, 4, 2,
       0, 0, 4, 0, 4, 1, 4, 4, 4, 4, 0, 0, 4, 4, 4, 0, 4, 4, 0, 4, 4, 0,
       2, 0, 0, 0, 4, 4, 4, 0, 4, 0, 4, 4, 4, 0, 4,

In [45]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighbourhood

# add clustering labels

neighbourhoods_venues_sorted.insert(0,"Cluster Labels",kmeans.labels_)

In [46]:
neighbourhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Agincourt,Breakfast Spot,Lounge,Skating Rink,...,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
1,4,"Alderwood, Long Branch",Pizza Place,Gym,Pub,...,Yoga Studio,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
2,4,Allerton,Pizza Place,Chinese Restaurant,Spa,...,Cosmetics Shop,Bus Station,Martial Arts Dojo,Grocery Store,Gas Station
3,4,Annadale,Pizza Place,Bakery,Liquor Store,...,Train Station,Pharmacy,Restaurant,Dance Studio,Park
4,4,Arden Heights,Pizza Place,Pharmacy,Coffee Shop,...,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant


In [62]:
borough_merged = df_TONY.copy()

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
borough_merged = borough_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

# go horse
borough_merged = borough_merged.dropna() 
borough_merged[['Cluster Labels']] = borough_merged[['Cluster Labels']].astype('int32')

print(borough_merged.shape)
borough_merged.head(10)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York - TO,Parkwoods,43.753259,-79.329656,1,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
1,North York - TO,Victoria Village,43.725882,-79.315572,0,...,Hockey Arena,Empanada Restaurant,English Restaurant,Entertainment Service,Factory
2,Downtown Toronto - TO,"Regent Park, Harbourfront",43.65426,-79.360636,0,...,Breakfast Spot,Theater,Yoga Studio,Chocolate Shop,Performing Arts Venue
3,North York - TO,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,...,Vietnamese Restaurant,Event Space,Miscellaneous Shop,Boutique,Women's Store
4,Downtown Toronto - TO,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,...,Sandwich Place,Bank,Gym,Theater,Park
6,Scarborough - TO,"Malvern, Rouge",43.806686,-79.194353,4,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
7,North York - TO,Don Mills,43.745906,-79.352188,0,...,Clothing Store,Art Gallery,Café,Supermarket,Bike Shop
8,East York - TO,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,4,...,Pharmacy,Athletics & Sports,Bank,Café,Intersection
9,Downtown Toronto - TO,"Garden District, Ryerson",43.657162,-79.378937,0,...,Cosmetics Shop,Bubble Tea Shop,Pizza Place,Bookstore,Diner
10,North York - TO,Glencairn,43.709577,-79.445073,0,...,Falafel Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service


In [84]:
# visualize the resulting clusters

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=7.5)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for cluster in range(0,kclusters): 
    group = folium.FeatureGroup(name='<span style=\\"color: {0};\\">{1}</span>'.format(rainbow[cluster-1],cluster))
    for lat, lon, poi, label in zip(borough_merged['Latitude'], borough_merged['Longitude'], borough_merged['Neighbourhood'], borough_merged['Cluster Labels']):
        if int(label) == cluster: 
            label = folium.Popup('ORIG. '+ str(poi) + 'Cluster ' + str(cluster), parse_html=True)
            folium.CircleMarker(
                (lat, lon),
                radius=5,
                popup=label,
                color=rainbow[cluster-1],
                fill=True,
                fill_color=rainbow[cluster-1],
                fill_opacity=0.7).add_to(group)
    group.add_to(map_clusters)

folium.map.LayerControl('topright', collapsed=False).add_to(map_clusters)
    
map_clusters

In [85]:
# visualize the resulting clusters

# create map
map_clusters = folium.Map(location=[latitude_TO, longitude_TO], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for cluster in range(0,kclusters): 
    group = folium.FeatureGroup(name='<span style=\\"color: {0};\\">{1}</span>'.format(rainbow[cluster-1],cluster))
    for lat, lon, poi, label in zip(borough_merged['Latitude'], borough_merged['Longitude'], borough_merged['Neighbourhood'], borough_merged['Cluster Labels']):
        if int(label) == cluster: 
            label = folium.Popup('ORIG. '+ str(poi) + 'Cluster ' + str(cluster), parse_html=True)
            folium.CircleMarker(
                (lat, lon),
                radius=5,
                popup=label,
                color=rainbow[cluster-1],
                fill=True,
                fill_color=rainbow[cluster-1],
                fill_opacity=0.7).add_to(group)
    group.add_to(map_clusters)

folium.map.LayerControl('topright', collapsed=False).add_to(map_clusters)
    
map_clusters

In [86]:
# visualize the resulting clusters

# create map
map_clusters = folium.Map(location=[latitude_NY, longitude_NY], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for cluster in range(0,kclusters): 
    group = folium.FeatureGroup(name='<span style=\\"color: {0};\\">{1}</span>'.format(rainbow[cluster-1],cluster))
    for lat, lon, poi, label in zip(borough_merged['Latitude'], borough_merged['Longitude'], borough_merged['Neighbourhood'], borough_merged['Cluster Labels']):
        if int(label) == cluster: 
            label = folium.Popup('ORIG. '+ str(poi) + 'Cluster ' + str(cluster), parse_html=True)
            folium.CircleMarker(
                (lat, lon),
                radius=5,
                popup=label,
                color=rainbow[cluster-1],
                fill=True,
                fill_color=rainbow[cluster-1],
                fill_opacity=0.7).add_to(group)
    group.add_to(map_clusters)

folium.map.LayerControl('topright', collapsed=False).add_to(map_clusters)
    
map_clusters

#### Examine each cluster

In [64]:
#CLUSTER 1

borough_merged.loc[borough_merged['Cluster Labels'] == 0, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,Intersection,French Restaurant,Pizza Place,Coffee Shop,...,Hockey Arena,Empanada Restaurant,English Restaurant,Entertainment Service,Factory
2,"Regent Park, Harbourfront",Coffee Shop,Bakery,Pub,Park,...,Breakfast Spot,Theater,Yoga Studio,Chocolate Shop,Performing Arts Venue
3,"Lawrence Manor, Lawrence Heights",Furniture / Home Store,Clothing Store,Accessories Store,Athletics & Sports,...,Vietnamese Restaurant,Event Space,Miscellaneous Shop,Boutique,Women's Store
4,"Queen's Park, Ontario Provincial Government",Coffee Shop,Diner,Yoga Studio,Arts & Crafts Store,...,Sandwich Place,Bank,Gym,Theater,Park
7,Don Mills,Gym,Japanese Restaurant,Beer Store,Coffee Shop,...,Clothing Store,Art Gallery,Café,Supermarket,Bike Shop
9,"Garden District, Ryerson",Clothing Store,Coffee Shop,Café,Japanese Restaurant,...,Cosmetics Shop,Bubble Tea Shop,Pizza Place,Bookstore,Diner
10,Glencairn,Pub,Japanese Restaurant,Metro Station,Asian Restaurant,...,Falafel Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
11,"West Deane Park, Princess Gardens, Martin Grov...",Jewelry Store,Yoga Studio,Farmers Market,Electronics Store,...,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
12,"Rouge Hill, Port Union, Highland Creek",Bar,Yoga Studio,Farm,Electronics Store,...,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
13,Don Mills,Gym,Japanese Restaurant,Beer Store,Coffee Shop,...,Clothing Store,Art Gallery,Café,Supermarket,Bike Shop


In [65]:
#CLUSTER 2

borough_merged.loc[borough_merged['Cluster Labels'] == 1, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,Food & Drink Shop,Park,Yoga Studio,Falafel Restaurant,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
21,Caledonia-Fairbanks,Park,Women's Store,Pool,Yoga Studio,...,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
35,"East Toronto, Broadview North (Old East York)",Convenience Store,Park,Coffee Shop,Yoga Studio,...,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant
52,"Willowdale, Newtonbrook",Park,Yoga Studio,Eastern European Restaurant,Electronics Store,...,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
64,Weston,Convenience Store,Park,Yoga Studio,Farm,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
66,York Mills West,Convenience Store,Park,Yoga Studio,Farm,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
85,"Milliken, Agincourt North, Steeles East, L'Amo...",Playground,Park,Falafel Restaurant,Egyptian Restaurant,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
98,"The Kingsway, Montgomery Road, Old Mill North",Park,River,Pool,Yoga Studio,...,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service
130,Clason Point,Park,Pool,Grocery Store,Bus Stop,...,South American Restaurant,Convenience Store,English Restaurant,Entertainment Service,Falafel Restaurant
295,Somerville,Park,Yoga Studio,Eastern European Restaurant,Electronics Store,...,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space


In [66]:
#CLUSTER 3

borough_merged.loc[borough_merged['Cluster Labels'] == 2, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Humewood-Cedarvale,Hockey Arena,Playground,Trail,Field,...,Eye Doctor,Exhibit,Event Space,Event Service,Yoga Studio
32,Scarborough Village,Playground,Pizza Place,Falafel Restaurant,Egyptian Restaurant,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
83,"Moore Park, Summerhill East",Trail,Tennis Court,Lawyer,Playground,...,Factory,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
110,Woodlawn,Pizza Place,Deli / Bodega,Pub,Playground,...,Train Station,Rental Car Location,Pharmacy,Donut Shop,Park
131,Throgs Neck,Deli / Bodega,Pizza Place,Coffee Shop,Asian Restaurant,...,American Restaurant,Chinese Restaurant,Italian Restaurant,Sports Bar,Factory
132,Country Club,Sandwich Place,Athletics & Sports,Spa,Playground,...,Yoga Studio,Eye Doctor,Exhibit,Event Space,Falafel Restaurant
135,Van Nest,Pizza Place,Deli / Bodega,Donut Shop,Coffee Shop,...,Bus Station,Spa,Middle Eastern Restaurant,Supermarket,BBQ Joint
175,East New York,Deli / Bodega,Plaza,Gym,Pizza Place,...,Home Service,Caribbean Restaurant,Event Service,Business Service,Metro Station
177,Canarsie,Gym,Grocery Store,Caribbean Restaurant,Deli / Bodega,...,Yoga Studio,Factory,Empanada Restaurant,English Restaurant,Entertainment Service
192,Ocean Hill,Deli / Bodega,Supermarket,Bus Stop,Southern / Soul Food Restaurant,...,Playground,Convenience Store,Pharmacy,Mexican Restaurant,Chinese Restaurant


In [67]:
#CLUSTER 4

borough_merged.loc[borough_merged['Cluster Labels'] == 3, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,"Humberlea, Emery",Baseball Field,Yoga Studio,Farm,Electronics Store,...,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
101,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Baseball Field,Yoga Studio,Farm,Electronics Store,...,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space


In [68]:
#CLUSTER 5

borough_merged.loc[borough_merged['Cluster Labels'] == 4, borough_merged.columns[[1] + list(range(5, borough_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Malvern, Rouge",Fast Food Restaurant,Yoga Studio,Falafel Restaurant,Egyptian Restaurant,...,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
8,"Parkview Hill, Woodbine Gardens",Pizza Place,Breakfast Spot,Gastropub,Gym / Fitness Center,...,Pharmacy,Athletics & Sports,Bank,Café,Intersection
17,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Pizza Place,Pet Store,Café,Coffee Shop,...,Pharmacy,Convenience Store,Beer Store,Entertainment Service,English Restaurant
26,Cedarbrae,Fried Chicken Joint,Gas Station,Hakka Restaurant,Thai Restaurant,...,Caribbean Restaurant,Bakery,Bank,Fish & Chips Shop,Event Space
28,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Supermarket,Shopping Mall,...,Grocery Store,Gas Station,Middle Eastern Restaurant,Sandwich Place,Mobile Phone Shop
29,Thorncliffe Park,Indian Restaurant,Sandwich Place,Yoga Studio,Pizza Place,...,Bank,Gym,Liquor Store,Middle Eastern Restaurant,Coffee Shop
31,"Dufferin, Dovercourt Village",Bakery,Pharmacy,Pizza Place,Brewery,...,Café,Middle Eastern Restaurant,Grocery Store,Bar,Bank
39,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,...,Food,Electronics Store,Food Court,Empanada Restaurant,English Restaurant
40,Downsview,Grocery Store,Park,Liquor Store,Baseball Field,...,Korean Restaurant,Shopping Mall,Business Service,Athletics & Sports,Gym / Fitness Center
46,Downsview,Grocery Store,Park,Liquor Store,Baseball Field,...,Korean Restaurant,Shopping Mall,Business Service,Athletics & Sports,Gym / Fitness Center


## The END

### Thank you