In [104]:
import pandas as pd
import numpy as np

First, we read the data of Toronto neighborhoods for Canada from a Wikipedia article. Following that, we clean up any incomplete data.

In [107]:
Canada = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M','r')

df = pd.DataFrame(columns = ['Postcode','Borough','Neighborhood'])
df = df.append(Canada)

df['Borough'].replace('Not assigned', np.NaN, inplace = True)
df.dropna(axis = 0, inplace = True)
df.reset_index(inplace = True)
df.drop(['index'], axis = 1)

df.replace('Not assigned','Queen\'s Park', inplace = True)
df.drop(['index'], axis = 1, inplace = True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


We now adjust our data set so that multiple postcodes are concatenated to one row of data. The neighborhoods for each postcode are then combined into a string, seperated by commas. We also adjust the index to be the postcode for later use.

In [108]:
ndf = pd.DataFrame(columns = ['Postcode','Borough','Neighborhoods'])
postcode = 'M3A'
for i in list(range(len(df[['Postcode']])-7)):
    if postcode not in ndf['Postcode']:
        postcode = df['Postcode'].iloc[i]
        nlist = [postcode,df.iloc[i,2]]
        town = []
        for j in range(df['Postcode'].value_counts().loc[postcode]):
            if df['Postcode'].iloc[i+j] == postcode:
                town.append(df['Neighborhood'].iloc[i+j])
        nlist.append(', '.join(town))
        ndata = pd.DataFrame([[nlist[0],nlist[1],nlist[2]]],columns = ['Postcode','Borough','Neighborhoods'])
        ndf = ndf.append(ndata)
        i = i + j

ndf.reset_index(inplace = True)
droplist = []
for i in range((len(ndf[['Postcode']])-7)):
    if i > 0 and ndf['Postcode'].iloc[i] == ndf['Postcode'].iloc[i-1]:
        droplist.append(i)
ndf.drop(droplist, axis = 0, inplace = True)
ndf.set_index('Postcode', inplace = True)

In [109]:
ndf.shape
ndf.drop(['index'], axis = 1, inplace = True)
ndf.head()

Unnamed: 0_level_0,Borough,Neighborhoods
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,Parkwoods,Parkwoods
M4A,Victoria Village,Victoria Village
M5A,Harbourfront,Harbourfront
M6A,Lawrence Heights,"Lawrence Heights, Lawrence Manor"
M7A,Queen's Park,Queen's Park


We now import a csv containing the location, latitude, and longitude for each postal code. Following the cleaning a seperating of data, we once again set the index to be the Postcode. Doing so allows us to join the two dataframes.

In [110]:
ll = pd.read_csv('https://cocl.us/Geospatial_data','r')

In [111]:
coor = []
for i in range(ll.size):
    coor.append(ll.iloc[i,0].split(','))
coordinates = pd.DataFrame(coor,columns = ['Postcode','Latitude','Longitude'])
coordinates.set_index('Postcode', inplace = True)
coordinates.head()

Unnamed: 0_level_0,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.8066863,-79.1943534
M1C,43.7845351,-79.1604971
M1E,43.7635726,-79.1887115
M1G,43.7709921,-79.2169174
M1H,43.773136,-79.2394761


In [117]:
Toronto = ndf.join(coordinates)

In [118]:
Toronto.head()

Unnamed: 0_level_0,Borough,Neighborhoods,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Rouge,"Rouge, Malvern",43.8066863,-79.1943534
M1C,Highland Creek,"Highland Creek, Rouge Hill, Port Union",43.7845351,-79.1604971
M1E,Guildwood,"Guildwood, Morningside, West Hill",43.7635726,-79.1887115
M1G,Woburn,Woburn,43.7709921,-79.2169174
M1H,Cedarbrae,Cedarbrae,43.773136,-79.2394761


Now use a folium to show a map of all the neighborhoods.

In [136]:
Toronto[['Latitude','Longitude']] = Toronto[['Latitude','Longitude']].apply(pd.to_numeric, errors = 'coerce')

In [123]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium


In [141]:

map_toronto = folium.Map(location=[43.70011, -79.4163], zoom_start=10)

for lat, lng, borough, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Borough'], Toronto['Neighborhoods']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto