## Clustering

In [1]:
import pandas as pd
import folium
from geopy.geocoders import Nominatim
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

df = pd.read_csv("toronto_full.csv")
df['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
East Toronto         5
York                 5
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

In [2]:
toronto = df[df['Borough'].str.contains("Toronto")]
toronto['Cluster']=toronto['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto'],value=[1,2,3,4],inplace=False)
toronto.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  toronto['Cluster']=toronto['Borough'].replace(to_replace=['Downtown Toronto','Central Toronto','West Toronto','East Toronto'],value=[1,2,3,4],inplace=False)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,4
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,4
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,4
43,M4M,East Toronto,Studio District,43.659526,-79.340923,4
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2


In [3]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [4]:
toronto_cluster = len(toronto.Cluster.unique())
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(toronto_cluster)
ys = [i + x + (i*x)**2 for i in range(toronto_cluster)]
colors_array = cm.brg(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighbourhood'], toronto['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters