<a href="https://colab.research.google.com/github/ApoorvAkash/Coursera_Capstone/blob/main/Neighbors_Toronto_KMeans_Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Segmenting and Clustering Neighbourhoods in Toronto (KMeans Clustering)

### Importing Libraries

In [1]:
import pandas as pd
import numpy as np

import folium

from sklearn.cluster import KMeans

### Reading Dataset

In [2]:
url = 'https://raw.githubusercontent.com/ApoorvAkash/Coursera_Capstone/main/neighborhoods_toronto_data.csv'
df = pd.read_csv(url, index_col=0)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.653482,-79.383935
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Queen's Park,Ontario Provincial Government,43.653482,-79.383935


### Visualizing all Neighborhoods using folium 

In [3]:
map_all_data = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,borough,neighbourhood in zip(df['latitude'],df['longitude'],df['Borough'],df['Neighborhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='green',
    fill=True,
    fill_color='yellow',
    fill_opacity=0.7,
    parse_html=False).add_to(map_all_data)
map_all_data

### Visualizing all neighborhoods where Borough contains Toronto

In [4]:
df_with_toronto_borough = df[df['Borough'].str.contains('Toronto', regex=False)]
df_with_toronto_borough.reset_index(drop=True, inplace=True)

df_with_toronto_borough.shape

(39, 5)

In [5]:
df_with_toronto_borough.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783
2,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
3,M4E,East Toronto,The Beaches,43.6784,-79.2941
4,M5E,Downtown Toronto,Berczy Park,43.642106,-79.377445


In [6]:
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,borough,neighbourhood in zip(df_with_toronto_borough['latitude'],df_with_toronto_borough['longitude'],df_with_toronto_borough['Borough'],df_with_toronto_borough['Neighborhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='green',
    fill=True,
    fill_color='yellow',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
map_toronto

### Performing KMeans Clustering on df_with_borough_toronto data

In [7]:
k=5
df_toronto_kmeans = df_with_toronto_borough.drop(['PostalCode','Borough','Neighborhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(df_toronto_kmeans)
df_with_toronto_borough.insert(0, 'Cluster Labels', kmeans.labels_)

In [8]:
df_with_toronto_borough.head()

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighborhood,latitude,longitude
0,0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
1,0,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783
2,0,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
3,4,M4E,East Toronto,The Beaches,43.6784,-79.2941
4,0,M5E,Downtown Toronto,Berczy Park,43.642106,-79.377445


### Visualizing Clusters

In [9]:
cluster_map = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = ['red', 'blue', 'green', 'orange', 'yellow']

# add markers to the map
markers_colors = []
for lat, lon, neighbourhood, cluster in zip(df_with_toronto_borough['latitude'], df_with_toronto_borough['longitude'], df_with_toronto_borough['Neighborhood'], df_with_toronto_borough['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=colors_array[cluster-1],
        fill=True,
        fill_color=colors_array[cluster-1],
        fill_opacity=0.7).add_to(cluster_map)
       
cluster_map