## Segmenting and Clustering Neighborhoods in Toronto -- Part 3

In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
# load combined dataset 
df_combined = pd.read_csv('capstone_part2.csv')
df_combined.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [3]:
# work with only "Borough" which contain the word Toronto 
df_combined_2 = df_combined[df_combined['Borough'].str.contains('Toronto')]
df_combined_3 = df_combined_2.reset_index(drop = True)
df_combined_3.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [4]:
# check the data shape
df_combined_3.shape

(39, 5)

In [5]:
# Learn the different value of Borough 
df_combined_3['Borough'].value_counts()

Downtown Toronto                                                17
Central Toronto                                                  9
West Toronto                                                     6
East Toronto                                                     4
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
East TorontoBusiness reply mail Processing Centre969 Eastern     1
East YorkEast Toronto                                            1
Name: Borough, dtype: int64

In [6]:
# Create a new column as Label and get the date from 'Borough' as integer
df_combined_3['Label']=df_combined_3['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto'],value=[1,2,3,4],inplace=False)
df_combined_3.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Label
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1


In [7]:
address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(f'The geograpical coordinate of Toronto are {latitude}, {longitude}.')

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Preparing to create the clustering map of Toronto

In [8]:
#for set the cluster number as label number
kclusters=len(df_combined_3.Label.unique())

# create map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(df_combined_3['Latitude'], df_combined_3['Longitude'], df_combined_3['Label']):
    label = folium.Popup(str(df_combined_3['Borough']) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(map_toronto)

In [9]:
# check the map
map_toronto