# Toronto Neighborhoods

## Part 1 - Creating the pandas dataframe from the Wikipedia web page

### Importing libraries and scraping url

In [1]:
# Unmark module install if required
#! pip install lxml html5lib beautifulsoup4
import pandas as pd

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs = pd.read_html(url)
print(dfs[0])

    Postal Code           Borough  \
0           M1A      Not assigned   
1           M2A      Not assigned   
2           M3A        North York   
3           M4A        North York   
4           M5A  Downtown Toronto   
..          ...               ...   
175         M5Z      Not assigned   
176         M6Z      Not assigned   
177         M7Z      Not assigned   
178         M8Z         Etobicoke   
179         M9Z      Not assigned   

                                         Neighbourhood  
0                                         Not assigned  
1                                         Not assigned  
2                                            Parkwoods  
3                                     Victoria Village  
4                            Regent Park, Harbourfront  
..                                                 ...  
175                                       Not assigned  
176                                       Not assigned  
177                                       

### Creating the dataframe

In [3]:
df = pd.DataFrame()
postal = dfs[0]['Postal Code']
borough = dfs[0]['Borough']
neighbor = dfs[0]['Neighbourhood']
df['PostalCode'] = postal
df['Borough'] = borough
df['Neighborhood'] = neighbor

In [4]:
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


### The postal codes with multiple neighborhoods are already in the right format.

### Droping the "Not assigned" Boroughs

In [5]:
df1 = df[~df.Borough.str.contains("Not assigned")]
df1.reset_index(inplace = True, drop = True) 
df1.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### There are no "Not assigned" neighborhoods in the dataframe, so no cleaning to do for this.

In [6]:
df1.shape

(103, 3)

## Part 2 - Adding latitude and longitude to the dataframe

(geocoder did not work!)

### Retrieving csv and creating dataframe

In [7]:
url = "http://cocl.us/Geospatial_data"
df2 = pd.read_csv(url)
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Creating new columns

In [8]:
df1['Latitude'] = 0.0
df1['Longitude'] = 0.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


In [9]:
df1.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0
99,M4Y,Downtown Toronto,Church and Wellesley,0.0,0.0
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",0.0,0.0
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.0
102,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.0


In [10]:
print(len(df1.index))

103


### Iterating through postal codes in df2 to assign lat/long to df1

In [11]:
for i in range(0,len(df2.index)):
    for j in range(0,len(df1.index)):
        if df2.loc[i,'Postal Code'] == df1.loc[j,'PostalCode']:
            df1.loc[j,'Latitude'] = df2.loc[i,'Latitude']
            df1.loc[j,'Longitude'] = df2.loc[i,'Longitude']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [12]:
df1.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Part 3 - Explore and cluster the neighborhoods in Toronto

In [13]:
from geopy.geocoders import Nominatim # convert an address into latitude and lon gitude values
! pip install folium
import folium # map rendering library



In [14]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [15]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df1['Latitude'], df1['Longitude'], df1['Borough'], df1['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [16]:
# The code was removed by Watson Studio for sharing.

### Grouping boroughs together and averaging lat/long

In [17]:
df4 = pd.DataFrame()
df4['Borough'] = ""
df4['Latitude'] = 0.0
df4["Longitude"] = 0.0

latavg = 0.0
latcount = 0
longavg = 0.0
longcount = 0

for j in range(0,len(df1.index)):   
    latavg = 0.0
    latcount = 0
    longavg = 0.0
    longcount = 0
    
    for i in range(0,len(df1.index)):
        
        if df1.loc[j,'Borough'] == df1.loc[i,'Borough']:
            latavg = latavg + df1.loc[i,'Latitude']
            latcount = latcount + 1
            longavg = longavg + df1.loc[i,'Longitude']
            longcount = longcount + 1
            df4.loc[j,'Borough'] = df1.loc[j,'Borough']
            df4.loc[j,'Latitude'] = latavg/latcount
            df4.loc[j,'Longitude'] = longavg/longcount
 
df4.drop_duplicates(subset=['Borough'],inplace = True)
df4.reset_index(inplace = True, drop = True) 
df4



Unnamed: 0,Borough,Latitude,Longitude
0,North York,43.750727,-79.429338
1,Downtown Toronto,43.654597,-79.383972
2,Etobicoke,43.660043,-79.542074
3,Scarborough,43.766229,-79.249085
4,East York,43.700303,-79.335851
5,York,43.690797,-79.472633
6,East Toronto,43.669436,-79.324654
7,West Toronto,43.652653,-79.44929
8,Central Toronto,43.70198,-79.398954
9,Mississauga,43.636966,-79.615819


In [18]:
# create map of Toronto Boroughs using latitude and longitude values
map_toronto2 = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough in zip(df4['Latitude'], df4['Longitude'], df4['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto2)  
    
map_toronto2

### Looks good, I'll use all the boroughs for analysis. For here to the end, I replicate the same analysis we did to the New York City data.

In [19]:
import json
import requests

borough_latitude = df4.loc[0,'Latitude'] # neighborhood latitude value
borough_longitude = df4.loc[0,'Longitude'] # neighborhood longitude value
borough_name = df4.loc[0,'Borough'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(borough_name, 
                                                               borough_latitude, 
                                                               borough_longitude))

VERSION = '20180605' # Foursquare API version

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 3000 # define radius of 3000m the boroughs are approximately 6 km appart

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    borough_latitude, 
    borough_longitude,
    radius, 
    LIMIT)

print(url) # display URL    
 

results = requests.get(url).json()
#results

Latitude and longitude values of North York are 43.750727425, -79.42933832499999.
https://api.foursquare.com/v2/venues/explore?&client_id=HGTQKEYL2BGEFGU2JS43LI4AOWASODLQC33WIYUKCBAMAPVK&client_secret=K2CFUHUWDCGEWHF144ORU0QSB32HRYSLVF4FFNYX2DJY3UMJ&v=20180605&ll=43.750727425,-79.42933832499999&radius=3000&limit=100


In [20]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
from pandas.io.json import json_normalize # tranform JSON file into a pandas dat aframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()



Unnamed: 0,name,categories,lat,lng
0,Earl Bales Park,Park,43.753043,-79.436228
1,Earl Bales Ski and Snowboard Centre,Ski Chalet,43.752631,-79.431865
2,Best for Bride,Bridal Shop,43.755789,-79.437834
3,Pancer's Original Deli,Deli / Bodega,43.740804,-79.43519
4,Auberge du Pommier,French Restaurant,43.746962,-79.407879


In [22]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=3000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
toronto_venues = getNearbyVenues(names=df4['Borough'],
                                   latitudes=df4['Latitude'],
                                   longitudes=df4['Longitude']
                                  )

North York
Downtown Toronto
Etobicoke
Scarborough
East York
York
East Toronto
West Toronto
Central Toronto
Mississauga


In [25]:
print(toronto_venues.shape)
toronto_venues.head()

(980, 7)


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,North York,43.750727,-79.429338,Earl Bales Park,43.753043,-79.436228,Park
1,North York,43.750727,-79.429338,Earl Bales Ski and Snowboard Centre,43.752631,-79.431865,Ski Chalet
2,North York,43.750727,-79.429338,Best for Bride,43.755789,-79.437834,Bridal Shop
3,North York,43.750727,-79.429338,Pancer's Original Deli,43.740804,-79.43519,Deli / Bodega
4,North York,43.750727,-79.429338,Auberge du Pommier,43.746962,-79.407879,French Restaurant


In [26]:
toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,100,100,100,100,100,100
Downtown Toronto,100,100,100,100,100,100
East Toronto,100,100,100,100,100,100
East York,100,100,100,100,100,100
Etobicoke,100,100,100,100,100,100
Mississauga,80,80,80,80,80,80
North York,100,100,100,100,100,100
Scarborough,100,100,100,100,100,100
West Toronto,100,100,100,100,100,100
York,100,100,100,100,100,100


In [27]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 196 uniques categories.


In [28]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add borough column back to dataframe
toronto_onehot['Borough'] = toronto_venues['Borough'] 

# move borough column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Borough,Afghan Restaurant,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio
0,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
toronto_onehot.shape

(980, 197)

In [30]:
toronto_grouped = toronto_onehot.groupby('Borough').mean().reset_index()
toronto_grouped

Unnamed: 0,Borough,Afghan Restaurant,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01
1,Downtown Toronto,0.0,0.02,0.0,0.0,0.01,0.01,0.01,0.01,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,East Toronto,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,East York,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,...,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0
5,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,...,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North York,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
7,Scarborough,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,...,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0
8,West Toronto,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.03,0.01,...,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0
9,York,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01


In [31]:
toronto_grouped.shape

(10, 197)

In [32]:
num_top_venues = 5

for hood in toronto_grouped['Borough']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
                 venue  freq
0                 Park  0.09
1          Coffee Shop  0.09
2   Italian Restaurant  0.07
3                 Café  0.06
4  Sporting Goods Shop  0.03


----Downtown Toronto----
                 venue  freq
0          Coffee Shop  0.08
1           Restaurant  0.05
2                Diner  0.03
3                 Park  0.03
4  Japanese Restaurant  0.03


----East Toronto----
         venue  freq
0         Café  0.07
1  Coffee Shop  0.06
2         Park  0.06
3      Brewery  0.05
4  Pizza Place  0.05


----East York----
         venue  freq
0         Park  0.06
1    Gastropub  0.05
2  Coffee Shop  0.05
3      Brewery  0.04
4         Café  0.04


----Etobicoke----
            venue  freq
0     Coffee Shop  0.08
1        Pharmacy  0.06
2            Bank  0.06
3  Sandwich Place  0.05
4   Grocery Store  0.05


----Mississauga----
                 venue  freq
0                Hotel  0.10
1        Grocery Store  0.05
2  Japanese Restaurant  0.05
3   

In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [34]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
borough_venues_sorted = pd.DataFrame(columns=columns)
borough_venues_sorted['Borough'] = toronto_grouped['Borough']

for ind in np.arange(toronto_grouped.shape[0]):
    borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

borough_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Park,Italian Restaurant,Café,Bagel Shop,Bakery,Sporting Goods Shop,Spa,Middle Eastern Restaurant,Grocery Store
1,Downtown Toronto,Coffee Shop,Restaurant,Pizza Place,Hotel,Café,Plaza,Park,Japanese Restaurant,Diner,Sandwich Place
2,East Toronto,Café,Park,Coffee Shop,Brewery,Pizza Place,Indian Restaurant,Middle Eastern Restaurant,Bakery,French Restaurant,Vietnamese Restaurant
3,East York,Park,Gastropub,Coffee Shop,Greek Restaurant,Bakery,Café,Brewery,Ice Cream Shop,Pizza Place,Italian Restaurant
4,Etobicoke,Coffee Shop,Bank,Pharmacy,Grocery Store,Sandwich Place,Pizza Place,Italian Restaurant,Restaurant,Golf Course,Burger Joint


In [35]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 1, 1, 0, 2, 0, 0, 1, 4], dtype=int32)

In [36]:
# add clustering labels
borough_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df4

# merge toronto_grouped with df4 to add latitude/longitude for each borough
toronto_merged = toronto_merged.join(borough_venues_sorted.set_index('Borough'), on='Borough')

toronto_merged.head()

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,43.750727,-79.429338,0,Coffee Shop,Sushi Restaurant,Grocery Store,Japanese Restaurant,Bakery,Café,Burger Joint,Pub,Restaurant,Fast Food Restaurant
1,Downtown Toronto,43.654597,-79.383972,3,Coffee Shop,Restaurant,Pizza Place,Hotel,Café,Plaza,Park,Japanese Restaurant,Diner,Sandwich Place
2,Etobicoke,43.660043,-79.542074,0,Coffee Shop,Bank,Pharmacy,Grocery Store,Sandwich Place,Pizza Place,Italian Restaurant,Restaurant,Golf Course,Burger Joint
3,Scarborough,43.766229,-79.249085,0,Coffee Shop,Indian Restaurant,Chinese Restaurant,Bank,Bookstore,Sandwich Place,Pharmacy,Bakery,Supermarket,Gym
4,East York,43.700303,-79.335851,1,Park,Gastropub,Coffee Shop,Greek Restaurant,Bakery,Café,Brewery,Ice Cream Shop,Pizza Place,Italian Restaurant


In [37]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0]+[4]+list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Coffee Shop,Sushi Restaurant,Grocery Store,Japanese Restaurant,Bakery,Café,Burger Joint,Pub,Restaurant,Fast Food Restaurant
2,Etobicoke,Coffee Shop,Bank,Pharmacy,Grocery Store,Sandwich Place,Pizza Place,Italian Restaurant,Restaurant,Golf Course,Burger Joint
3,Scarborough,Coffee Shop,Indian Restaurant,Chinese Restaurant,Bank,Bookstore,Sandwich Place,Pharmacy,Bakery,Supermarket,Gym


In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0]+[4]+list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East York,Park,Gastropub,Coffee Shop,Greek Restaurant,Bakery,Café,Brewery,Ice Cream Shop,Pizza Place,Italian Restaurant
6,East Toronto,Café,Park,Coffee Shop,Brewery,Pizza Place,Indian Restaurant,Middle Eastern Restaurant,Bakery,French Restaurant,Vietnamese Restaurant
7,West Toronto,Café,Coffee Shop,Bar,Bakery,Park,Italian Restaurant,Brewery,Restaurant,Asian Restaurant,Beer Bar
8,Central Toronto,Coffee Shop,Park,Italian Restaurant,Café,Bagel Shop,Bakery,Sporting Goods Shop,Spa,Middle Eastern Restaurant,Grocery Store


In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0]+[4]+list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Mississauga,Hotel,Japanese Restaurant,Grocery Store,Middle Eastern Restaurant,Breakfast Spot,Sandwich Place,Café,Indian Restaurant,Caribbean Restaurant,Burrito Place


In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0]+[4]+list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,Coffee Shop,Restaurant,Pizza Place,Hotel,Café,Plaza,Park,Japanese Restaurant,Diner,Sandwich Place


In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0]+[4]+list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,York,Coffee Shop,Italian Restaurant,Furniture / Home Store,Bakery,Burger Joint,Grocery Store,Brewery,Ice Cream Shop,Café,Caribbean Restaurant
