<H1> INSTALLATION AND IMPORT OF LIBS </H1>

In [103]:
# !pip install geocoder
# !pip install folium
import folium
import requests
import pandas as pd
import numpy as np
import geocoder
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
pd.options.display.max_rows = 10

<H1> PARSE OF HTML DATA FROM WIKI </H1>

In [5]:
from bs4 import BeautifulSoup

URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(URL)

soup = BeautifulSoup(page.content, 'html.parser')

In [19]:
table = soup.find_all('table')
df = pd.read_html(str(table))[0]

<H1> FILTER DATA TO REMOVE NOT ASSIGNED BOROUGH  </H1>

In [23]:
df_wna=df[df['Borough']!='Not assigned']

In [33]:
df_grouped=df_wna.groupby(by='Postal Code',as_index=False).agg(lambda x: ', '.join(set(x.dropna())))

<H3> <i>TABLE AFTER JOINING NEIGHBOURHOODS WITH COMMA</i> </H3>

In [65]:
df_grouped

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


<H1> <i> SHAPE OF TABLE  </i> </H1>

In [42]:
df_grouped.shape

(103, 3)

<H1> <i> IMPORTING LAT LONGS AND ADDING TO TABLE  </i> </H1>

In [55]:
geocodes=pd.read_csv('https://cocl.us/Geospatial_data')

In [58]:
table_wcoords=df_grouped.merge(geocodes,on='Postal Code')

<H1> <i> FINAL TABLE WITH COORDINATES  </i> </H1>

In [68]:
table_wcoords

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


<H1> <i> GET COORDINATES OF TORONTO </i> </H1>

In [73]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


<H1> <i> MAP OF THE LOCATION OF POINTS  </i> </H1>

In [76]:
map_toronto=folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, label in zip(table_wcoords['Latitude'], table_wcoords['Longitude'], table_wcoords['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<H1> <i> PREPARATION FOR DATA ANALYSIS </i> </H1>

In [128]:
table_wcoords_onehot=pd.get_dummies(table_wcoords[['Borough']], prefix="", prefix_sep="")
table_wcoords_onehot['Neighbourhood'] = table_wcoords['Neighbourhood'] 
fixed_columns = [table_wcoords_onehot.columns[-1]] + list(table_wcoords_onehot.columns[:-1])
table_wcoords_onehot = table_wcoords_onehot[fixed_columns]

<H1> <i> CLUSTERING OF DATA 4 CLUSTERS </i> </H1>

In [120]:
kclusters = 4

clustering_table = table_wcoords_onehot.drop('Neighbourhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(clustering_table)
kmeans_result=pd.DataFrame(kmeans.labels_)
table_wcoords['cluster_results']=kmeans_result

<H1> <i> MAP OF THE CLUSTERS </i> </H1>

In [121]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(table_wcoords['Latitude'], table_wcoords['Longitude'], table_wcoords['Neighbourhood'], table_wcoords['cluster_results']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<H1> <i>  NEIGHBORHOODS OF EACH CLUSTER DEFINED </i> </H1>

In [122]:
table_wcoords[table_wcoords['cluster_results']==0]['Borough'].unique()

array(['East York', 'East Toronto', 'Central Toronto', 'York',
       'West Toronto', 'Toronto/York', 'Mississauga', 'Etobicoke'],
      dtype=object)

In [123]:
table_wcoords[table_wcoords['cluster_results']==1]['Borough'].unique()

array(['Downtown Toronto'], dtype=object)

In [124]:
table_wcoords[table_wcoords['cluster_results']==2]['Borough'].unique()

array(['Scarborough'], dtype=object)

In [125]:
table_wcoords[table_wcoords['cluster_results']==3]['Borough'].unique()

array(['North York'], dtype=object)