In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
soup = BeautifulSoup(website_url, 'lxml')

In [4]:
A = []
B = []
C = []

In [5]:
table = soup.find('table', class_="wikitable sortable") 

In [6]:
for row in table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells) > 0: #this excludes the column titles which have no td values
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

In [7]:
df = pd.DataFrame(A , columns = ['PostalCode'])
df['Borough'] = B
df['Neighborhood'] = C

In [8]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


This removes any Boroughs without an assignment

In [9]:
df2 = df[df['Borough'] != 'Not assigned']
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


This step groups by post code and borough and joins multiple meighborhoods with a ','

In [10]:
df3 = df2.groupby(['PostalCode', 'Borough']).agg(lambda col: ','.join(col))
df3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood\n,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


Getting rid of the extra lines

In [11]:
df3["Neighborhood"]= df3["Neighborhood"].replace('\n', '', regex=True)

The neighborhood which isnt assigned is found and assigned the borough name in the step below

In [12]:
for index, row in df3.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = index[1]

The index is reset from the postalcode/borough to the normal column of integers

In [13]:
df4 = df3.reset_index()

In [14]:
df4

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [15]:
df4.shape

(103, 3)

In [16]:
url="http://cocl.us/Geospatial_data"
geo_data=pd.read_csv(url)
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
lat = []
long = []

In [18]:
for index, row in df4.iterrows():
    value = str(row['PostalCode'])
    geo_data1 = geo_data[geo_data['Postal Code'] == value]
    Latitude = float(geo_data1['Latitude'])
    Longitude = float(geo_data1['Longitude'])
    
    lat.append(Latitude)
    long.append(Longitude)
    
#print(lat)
#print(long)

In [19]:
df4['Latitude'] = lat
df4['Longitude'] = long
df4

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [20]:
df4.shape

(103, 5)

In [21]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

In [22]:
map_Toronto = folium.Map(location=[43.6532,-79.3832], zoom_start=10)
for lat, lng, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Toronto)  
map_Toronto

In [23]:
df5 = df4['Borough'].unique()
df5
df5_list = df5.tolist()
#print(len(df5_list))

The step below creates a list of integers which correspond to the borough names.

In [24]:
Borough_Code = []
for borough in df4['Borough']:
    Borough_Code.append(df5_list.index(borough))    

In [25]:
df4['Borough Code'] = Borough_Code
df4.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Borough Code
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0


In [26]:
colors_array = cm.rainbow(np.linspace(0, len(df5) - 1))
rainbow = [colors.rgb2hex(i) for i in colors_array]

This clusters the boroughs with their own colours.

In [27]:
map_Toronto2 = folium.Map(location=[43.6532,-79.3832], zoom_start=10)
for lat, lng, borough, neighborhood, borough_code in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighborhood'], df4['Borough Code']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[borough_code-1],
        fill=True,
        fill_color=rainbow[borough_code-1],
        fill_opacity=0.7).add_to(map_Toronto2)  
map_Toronto2