# Peer-graded assignment : Segmentation and Clustering Neighborhoods of Toronto

In [75]:
import numpy as np
import pandas as pd

## Step 1 :
Getting data from wikipedia : https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M,

In [76]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df_raw = pd.read_html(url)

print('There are {} table(s) on this pages'.format(len(df_raw)))

There are 3 table(s) on this pages


## Step 2:
We only need the first table. 

In [77]:
df_toronto = df_raw[0]
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Step 3 : Clean Data

### Remove Borough equals 'Not Assigned' rows
### Replace Nan Neighborhoods by Borough.

In [78]:
#### Function to replace Nan or 'Not Assigned Neighborhoods with the Borough value'

def normalize_Neighborhood(row):
    if (row['Neighborhood'] == np.nan or row['Neighborhood'] == 'Not Assigned'):
        print('Replacing {} by {}'.format(row['Neighborhood'], row['Borough']))
        return row['Borough']
    else:
        return row['Neighborhood']


Removing Not assigned rows en reset index

In [79]:
print('Number of rows before Not Assigned cleaning {}', len(df_toronto))
df_toronto = df_toronto[df_toronto['Borough'] != 'Not assigned'].reset_index(drop=True)
print('Number of rows after N ot Assigned cleaning {}', len(df_toronto))

Number of rows before Not Assigned cleaning {} 180
Number of rows after N ot Assigned cleaning {} 103


replace 'Not assigned neighborhoods by Borough values.

In [80]:
df_toronto['Neighborhood'] = df_toronto.apply(normalize_Neighborhood, axis=1)
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [81]:
print('thez dimensions of the dataframe is {}.'.format(df_toronto.shape))

thez dimensions of the dataframe is (103, 3).


## Step 4 : Get coordinates from  http://cocl.us/Geospatial_data (csv file) and merge with df_toronto dataframe

Save data local

In [82]:
#!wget -q -O 'Geospatial_data.csv' http://cocl.us/Geospatial_data
#print('Data downloaded!')

Load data from local file to dataframe. If the file doesn't exists fetch it from http://cocl.us/Geospatial_data.

In [83]:
try:
    df_geo = pd.read_csv("Geospatial_data.csv")
except:
    !wget -q -O 'Geospatial_data.csv' http://cocl.us/Geospatial_data
    print('Data downloaded!')
    df_geo = pd.read_csv("Geospatial_data.csv")

df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge df_toronto with df_geo

In [84]:
df_toronto = pd.merge(df_toronto, df_geo, on="Postal Code")
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [85]:
print('The Totronto dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_toronto['Borough'].unique()),
        df_toronto.shape[0]
    )
)
df_toronto['Borough'].unique()

The Totronto dataframe has 10 boroughs and 103 neighborhoods.


array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

## Get more information of the Neighborhoods of Toronto from Foursquare

In [86]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: E5A3GWKSLRSOJZIPZZD5V3FOXFWMI1LDXZYHGCC3WFW52BZY
CLIENT_SECRET:Q5TZKSCDK41J1F0DEVKBMYCE0KE2L5RB2AJ4RGEQZUWYQDYX


Borrowed from the lab Clustering Neighborhoods. Get NearbyVenues

In [87]:
import requests #

def getNearbyVenues(boroughs, names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for borough,name, lat, lng in zip(boroughs, names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            borough,
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough',
                  'Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [88]:
#!rm df_toronto_venues.csv 
# First try to see if data is already uploaded from Foursquare (save credits). If you want to start always from fresh data uncomment line abouve to remove local file.
file_toronto_venues='df_toronto_venues.csv'
try:
    df_toronto_venues = pd.read_csv(file_toronto_venues)
    print('Reading data from local file {}'.format(file_toronto_venues))
except:
    radius = 500
    LIMIT = 100
    df_toronto_venues = getNearbyVenues(boroughs=df_toronto['Borough'], names=df_toronto['Neighborhood'], latitudes=df_toronto['Latitude'], longitudes=df_toronto['Longitude'], radius=radius, LIMIT=LIMIT)
    df_toronto_venues.to_csv(file_toronto_venues)

Reading data from local file df_toronto_venues.csv


In [89]:
df_toronto_number_neighborhoods_borough = df_toronto_venues[['Borough','Neighborhood','Venue']].groupby(['Borough','Neighborhood'], as_index=False).count()

df_toronto_venus_analysis = df_toronto_number_neighborhoods_borough.groupby('Borough').agg(['count','sum','max', 'min', 'mean', 'std']).rename( columns={"count":"#Boroughs","sum" : "#Venues"})
df_toronto_venus_analysis

Unnamed: 0_level_0,Venue,Venue,Venue,Venue,Venue,Venue
Unnamed: 0_level_1,#Boroughs,#Venues,max,min,mean,std
Borough,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Central Toronto,9,114,35,2,12.666667,11.726039
Downtown Toronto,19,1210,100,4,63.684211,32.405508
East Toronto,5,124,42,4,24.8,16.037456
East York,5,74,34,3,14.8,11.987493
Etobicoke,11,75,13,1,6.818182,4.729021
Mississauga,1,12,12,12,12.0,
North York,18,239,64,1,13.277778,16.516678
Scarborough,16,94,14,1,5.875,4.129165
West Toronto,6,156,42,13,26.0,11.207141
York,5,17,4,2,3.4,0.894427


### Analysis of the Boroughs and Venues in Toronto
There is a hugh difference in the number of venues per Borough. ie. York has just 17 venues in 5 neighborhoods where as Downtown Toronto has 1210 in 19 neighborhoods. 

# Now the data is ready lets play.

## Visualize Boroughs, Neighborhoods en number of venues on map
Mark Neigborhoods on map with circle with:
<ul>
    <li> 
        the bordor-color to distiguist between Boroughs
    </li>
    <li>
        fill-color indicating the number of venues in that neighborhood
    </li>
    <li>
        popup when clicking on the circle displaying Neigborhood, Borough and number of Venues in the Neighborhood.
    </li>
        
</ul>
    

### Import libraries

In [90]:
try:
    from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
except errorValue:
    print('geopy not installed. Installing now. May take a while')
    !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
    from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

try:
    import folium # map rendering library
except:
    print('Folium not installed. Installing now. This can take a while')
    !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
    import folium # map rendering library

print('Libraries imported.')

Libraries imported.


Funcion to define and calculate the color.

In [91]:
import matplotlib.cm as cm
import matplotlib.colors as colors

def make_rainbow(plist):
    colors_array = cm.rainbow(np.linspace(0, 1, len(plist)))
    return [colors.rgb2hex(i) for i in colors_array]

def get_color(elem, plist, rainbow):
    index = np.where(plist == elem)[0]
    return rainbow[index[0]]

def get_color_cat(total):
    if total < 5:
        return 'green'
    elif total < 10:
        return 'blue'
    elif total < 15:
        return 'yellow'
    elif total < 20:
        return 'orange'
    elif total < 30:
        return 'red'
    else:
        return 'purple'

In [92]:

def create_map(address, df_map):
    

    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
    print('Boroughs of Toronto {}'.format(df_toronto['Borough'].unique()))
    
    map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


    #df_map = df_toronto_venues #[(df_toronto['Borough'] == 'Downtown Toronto') | (df_toronto['Borough'] == 'North York')]
    unique_list = df_map['Borough'].unique()
    rainbow = make_rainbow(unique_list)
    print('Availabe colors {}'.format(rainbow))
    # add markers to map
    for lat, lng, borough, neighborhood, venues in zip(df_map['Latitude'], df_map['Longitude'], df_map['Borough'], df_map['Neighborhood'], df_map['Venues']):
        label = '{}, {} has {} venues'.format(neighborhood, borough, venues)
        label = folium.Popup(label, parse_html=True)

        folium.CircleMarker(
            [lat, lng],
            radius=10,
            popup=label,
            color=get_color(elem=borough,plist=unique_list, rainbow=rainbow),
            fill=True,
            fill_color=get_color_cat(venues),
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)
    
        
    return map_toronto

## Map of Toronto with the neighborhoods marked with a circle. Neighborhoods of the same Borough have the same border-color. The fill-color represent a qualification of the number of venues per neighborhood. green < 5, blue < 10, yellow < 15, orange < 20, red < 30, purple >= 30 

In [93]:
address = 'Toronto, Canada'
df = df_toronto_venues.groupby(['Borough', 'Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude'], as_index=False).count()
df = df[['Borough', 'Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue']]
df.columns = ['Borough', 'Neighborhood', 'Latitude', 'Longitude', 'Venues']
map_toronto = create_map(address=address, df_map=df)
map_toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.
Boroughs of Toronto ['North York' 'Downtown Toronto' 'Etobicoke' 'Scarborough' 'East York'
 'York' 'East Toronto' 'West Toronto' 'Central Toronto' 'Mississauga']
Availabe colors ['#8000ff', '#4856fb', '#10a2f0', '#2adddd', '#62fbc4', '#9cfba4', '#d4dd80', '#ffa256', '#ff562c', '#ff0000']


## Trying to get cluster information of Borough Downtown Toronto

In [94]:
df_downtown_toronto = df_toronto_venues[df_toronto_venues['Borough'] == 'Downtown Toronto']
df_downtown_toronto.head()

Unnamed: 0.1,Unnamed: 0,Borough,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
8,8,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
9,9,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
10,10,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
11,11,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
12,12,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [95]:
df_downtown_toronto_one_hot = pd.get_dummies(df_downtown_toronto[['Venue Category']], prefix="", prefix_sep="")


df_downtown_toronto_one_hot['Neighborhoods'] = df_downtown_toronto['Neighborhood']
df_downtown_toronto_one_hot['Neighborhood Latitude'] = df_downtown_toronto['Neighborhood Latitude']
df_downtown_toronto_one_hot['Neighborhood Longitude'] = df_downtown_toronto['Neighborhood Longitude']

# move postal, borough and neighborhood column to the first column
fixed_columns = list(df_downtown_toronto_one_hot.columns[-3:]) + list(df_downtown_toronto_one_hot.columns[:-3])
df_downtown_toronto_one_hot = df_downtown_toronto_one_hot[fixed_columns].rename(columns={"Neighborhood Longitude" : "Longitude", "Neighborhood Latitude" : "Latitude"})
print(df_downtown_toronto_one_hot.shape)
df_downtown_toronto_one_hot.head()

(1210, 210)


Unnamed: 0,Neighborhoods,Latitude,Longitude,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
8,"Regent Park, Harbourfront",43.65426,-79.360636,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Regent Park, Harbourfront",43.65426,-79.360636,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10,"Regent Park, Harbourfront",43.65426,-79.360636,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11,"Regent Park, Harbourfront",43.65426,-79.360636,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12,"Regent Park, Harbourfront",43.65426,-79.360636,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [96]:
toronto_grouped = df_downtown_toronto_one_hot.groupby(["Neighborhoods", "Latitude", 'Longitude']).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped


(19, 210)


Unnamed: 0,Neighborhoods,Latitude,Longitude,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,43.644771,-79.373306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,43.657952,-79.387383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.016393
3,Christie,43.669542,-79.422564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,43.66586,-79.38316,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,...,0.013699,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397
5,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
6,"First Canadian Place, Underground city",43.648429,-79.38228,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0
7,"Garden District, Ryerson",43.657162,-79.378937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
8,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0
9,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.036364,0.0,0.054545,0.018182,0.0,0.0


In [97]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Neighborhoods', 'Latitude', 'Longitude']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']
neighborhoods_venues_sorted['Latitude'] = toronto_grouped['Latitude']
neighborhoods_venues_sorted['Longitude'] = toronto_grouped['Longitude']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(19, 13)


Unnamed: 0,Neighborhoods,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,43.644771,-79.373306,Coffee Shop,Cocktail Bar,Restaurant,Bakery,Seafood Restaurant,Beer Bar,Café,Cheese Shop,Portuguese Restaurant,Japanese Restaurant
1,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,Airport Service,Airport Lounge,Harbor / Marina,Coffee Shop,Sculpture Garden,Boat or Ferry,Rental Car Location,Bar,Plane,Airport Terminal
2,Central Bay Street,43.657952,-79.387383,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Department Store,Thai Restaurant,Bubble Tea Shop,Bar,Burger Joint,Salad Place
3,Christie,43.669542,-79.422564,Grocery Store,Café,Park,Diner,Italian Restaurant,Athletics & Sports,Candy Store,Restaurant,Baby Store,Nightclub
4,Church and Wellesley,43.66586,-79.38316,Japanese Restaurant,Sushi Restaurant,Coffee Shop,Restaurant,Yoga Studio,Pub,Gay Bar,Hotel,Gastropub,Mediterranean Restaurant
5,"Commerce Court, Victoria Hotel",43.648198,-79.379817,Coffee Shop,Restaurant,Café,Hotel,Gym,American Restaurant,Italian Restaurant,Seafood Restaurant,Japanese Restaurant,Deli / Bodega
6,"First Canadian Place, Underground city",43.648429,-79.38228,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Gym,American Restaurant,Deli / Bodega,Salad Place,Asian Restaurant
7,"Garden District, Ryerson",43.657162,-79.378937,Clothing Store,Coffee Shop,Café,Italian Restaurant,Cosmetics Shop,Bubble Tea Shop,Middle Eastern Restaurant,Restaurant,Japanese Restaurant,Lingerie Store
8,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,Coffee Shop,Aquarium,Hotel,Café,Scenic Lookout,Brewery,Sporting Goods Shop,Restaurant,Italian Restaurant,Fried Chicken Joint
9,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049,Café,Coffee Shop,Bakery,Vietnamese Restaurant,Mexican Restaurant,Vegetarian / Vegan Restaurant,Dessert Shop,Gaming Cafe,Bar,Japanese Restaurant


## cluster on common categories

In [98]:

# set number of clusters
kclusters = 10

toronto_grouped_clustering = toronto_grouped.drop(["Neighborhoods", "Latitude", "Longitude"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 4, 7, 3, 2, 8, 8, 2, 2, 9], dtype=int32)

In [99]:
toronto_grouped["Cluster Labels"] = kmeans.labels_
toronto_grouped

Unnamed: 0,Neighborhoods,Latitude,Longitude,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio,Cluster Labels
0,Berczy Park,43.644771,-79.373306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,2
1,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
2,Central Bay Street,43.657952,-79.387383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.016393,7
3,Christie,43.669542,-79.422564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
4,Church and Wellesley,43.66586,-79.38316,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,...,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,2
5,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,8
6,"First Canadian Place, Underground city",43.648429,-79.38228,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,8
7,"Garden District, Ryerson",43.657162,-79.378937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,2
8,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,2
9,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.036364,0.0,0.054545,0.018182,0.0,0.0,9


## Visulize Clusters

In [100]:
# create map


geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode('Downtown Toronto, Canada')
latitude = location.latitude
longitude = location.longitude
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_grouped['Latitude'], toronto_grouped['Longitude'], toronto_grouped['Neighborhoods'], toronto_grouped['Cluster Labels']):
    label = folium.Popup('{} - Cluster {}'.format(poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters