# Neigabors in Torento
#### This notebook will be mainly used to gather info about Torento

##### first we import the modules we'll need in this notebook

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim 
# from sklearn.datasets.samples_generator import make_blobs

##### download the database from the wikipedia page and make a pandas dataframe out of it

In [2]:
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(URL)
soup = BeautifulSoup(response.text, 'html.parser')

table = soup.find('table', {'class':'wikitable sortable'}).tbody

rows = table.find_all('tr')
columns = [v.text.replace('\n', '') for v in rows[0].find_all('th')]

df = pd.DataFrame(columns=columns)

for i in range(1, len(rows)):
    tds = rows[i].find_all('td')
    
    if (len(tds) == 3):
        values = [tds[0].text.replace('\n', ''), tds[1].text.replace('\n', ''), tds[2].text.replace('\n', '')]
    else:
        values = [td.text.replace('\n', '') for td in tds]
        
    df = df.append(pd.Series(values, index=columns), ignore_index=True)
    
    df.drop(df[df['Borough'] == 'Not assigned'].index, axis=0, inplace=True)
    
    df.to_csv(r'C:\Users\Mohamed\TorentoData.csv', index=False)
    
df.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


##### download the longitude and latitude database, make a dataframe out of it and merge it with the wikipedia database we got before

In [3]:
llURL = 'https://cocl.us/Geospatial_data'
lcsv = pd.read_csv(llURL)
ldf = pd.DataFrame(lcsv)
mdf = df
mdf = pd.merge(mdf, ldf, on='Postal Code')
mdf

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


##### Get the longitude and the latitude of Toronto (the city we have data on) with geopy

In [4]:
address = 'Toronto'

geolocator = Nominatim(user_agent="torento_city_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

map_torento = folium.Map(location=[latitude, longitude], zoom_start=10)

The geograpical coordinate of New York City are 43.6534817, -79.3839347.


##### Show the blank map

In [5]:
map_torento

##### Define the unique bourough values in the dataframe

In [6]:
mdfu = pd.unique(mdf['Borough'])
mdfu

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

##### Define colors to use to mark each cluster

In [7]:
colors = ['blue', 'red', 'yellow', 'cyan', 'green', 'brown', 'pink', 'black', 'orange', 'purple']

##### Split boroughs, marker each one with a color and add them to the map

In [8]:
north_york_data = mdf[mdf['Borough'] == 'North York'].reset_index(drop=True)

for lat, lng, label in zip(north_york_data['Latitude'], north_york_data['Longitude'], north_york_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[0],
        fill=True,
        fill_color=colors[0],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
downtown_torento_data = mdf[mdf['Borough'] == 'Downtown Torento'].reset_index(drop=True)

for lat, lng, label in zip(downtown_torento_data['Latitude'], downtown_torento_data['Longitude'], downtown_torento_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[1],
        fill=True,
        fill_color=colors[1],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
etobicoke_data = mdf[mdf['Borough'] == 'Etobicoke'].reset_index(drop=True)

for lat, lng, label in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], etobicoke_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[2],
        fill=True,
        fill_color=colors[2],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
scarborough_data = mdf[mdf['Borough'] == 'Scarborough'].reset_index(drop=True)

for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[3],
        fill=True,
        fill_color=colors[3],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
east_york_data = mdf[mdf['Borough'] == 'East York'].reset_index(drop=True)

for lat, lng, label in zip(east_york_data['Latitude'], east_york_data['Longitude'], east_york_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[4],
        fill=True,
        fill_color=colors[4],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
york_data = mdf[mdf['Borough'] == 'York'].reset_index(drop=True)

for lat, lng, label in zip(york_data['Latitude'], york_data['Longitude'], york_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[5],
        fill=True,
        fill_color=colors[5],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
east_toronto_data = mdf[mdf['Borough'] == 'East Toronto'].reset_index(drop=True)

for lat, lng, label in zip(east_toronto_data['Latitude'], east_toronto_data['Longitude'], east_toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[6],
        fill=True,
        fill_color=colors[6],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
west_toronto_data = mdf[mdf['Borough'] == 'West Toronto'].reset_index(drop=True)

for lat, lng, label in zip(west_toronto_data['Latitude'], west_toronto_data['Longitude'], west_toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[7],
        fill=True,
        fill_color=colors[7],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
central_toronto_data = mdf[mdf['Borough'] == 'Central Toronto'].reset_index(drop=True)

for lat, lng, label in zip(central_toronto_data['Latitude'], central_toronto_data['Longitude'], central_toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[8],
        fill=True,
        fill_color=colors[8],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)
    
mississauga_data = mdf[mdf['Borough'] == 'Mississauga'].reset_index(drop=True)

for lat, lng, label in zip(mississauga_data['Latitude'], mississauga_data['Longitude'], mississauga_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color=colors[9],
        fill=True,
        fill_color=colors[9],
        fill_opacity=0.7,
        parse_html=False).add_to(map_torento)

##### Show the map of the marked clusters

In [9]:
map_torento