# Postal Codes of Toronto, scraped from Wikipedia

In [106]:
#Starting by importing the neccesary library. I will be using pandas.

import pandas as pd  #In order to get panda dataframes.
from geopy.geocoders import Nominatim #Converts an address into latitude and longitude.
import geocoder #Importing geocoder.
import folium #Importing folium for rendering maps.

print('Libraries imported.')

Libraries imported.


In [57]:
#Creating a pandas table from the Table on the Wikipedia page, and renaming Postcode column.

url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(url, header=0)[0]
df.rename(columns={'Postcode': 'PostalCode'}, inplace=True)

In [58]:
#Creating a new DF, that doesn't contain rows Boroughs that are equal to Not assigned.

toronto_df = df[df.Borough !='Not assigned']

##  Adding neighborhoods with same PostalCode to one row.

In [59]:
#Adding neighborhoods with same PostalCode to one row. Seperated by comma.

toronto_df = toronto_df.groupby(['PostalCode', 'Borough'], sort = False).agg(','.join)
toronto_df.reset_index(inplace = True)
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned


##  Checking which neighborhoods that are set to 'Not assigned', then replacing.

In [60]:
#Checking which Neighbourhoods that are set to "Not assigned"

no_neigh = toronto_df[toronto_df.Neighbourhood =='Not assigned']
no_neigh.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
4,M7A,Queen's Park,Not assigned


In [61]:
#Setting the Neighborgood to the same value as the Borough on the same row.

toronto_df = toronto_df.replace("Not assigned", "Queen's Park")

In [65]:
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [64]:
toronto_df.shape

(103, 3)

## Adding coordinates for the neighborhoods.

In [84]:
df_geocode = pd.read_csv('https://cocl.us/Geospatial_data')  #Loading content into a dataframe.
df_geocode.columns = ['PostalCode', 'Latitude', 'Longitude'] #Renaming column to fit previous dataframe, toronto_df.
df_geocode.head() #printing first 5 rows.


Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [85]:
#Merging the 2 columns into a new dataframe.

df_full = pd.merge(toronto_df, df_geocode, on=['PostalCode'], how='inner')  

# Below is the final dataframe.

In [88]:
df_full.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [97]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_full['Borough'].unique()),
        df_full.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


# Clustering the different Boroughs.

## Getting the coordinates for Toronto.

In [104]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of the City of Toronto are 43.653963, -79.387207.


## Now when we have the coordinates, lets put it all on a map for visualization.

In [121]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_full['Latitude'], df_full['Longitude'], df_full['Borough'], df_full['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Lets create a new dataframe, only for downtown Toronto.

In [117]:
downtown_df = df_full[df_full['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


## Lets get the coordinates for downtown Toronto.

In [118]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of downtown torono are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of downtown torono are 43.6541737, -79.3808116451341.


## Lets create a map to visualize downtown toronto and it's neighborhoods.

In [120]:
# create map of downtown Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(downtown_df['Latitude'], downtown_df['Longitude'], downtown_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [None]:
#By Petter 16/7/2019