# Import libraries

In [22]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import io
import folium

# Build the table with postal codes and coordinats (old assignments)

In [23]:
# get data from wikipedia
get_data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(get_data,'xml')

# Build initial table
postal_codes_table = soup.table
elements = postal_codes_table('td')

Postcode, Borough, Neighbourhood = [], [], []

# put the information into lists
for n in range (0,len(elements), 3):
    Postcode.append(elements[n])
    Borough.append(elements[n+1])
    Neighbourhood.append(elements[n+2])

# remove html tags
for n in range (len(Postcode)):
    Postcode[n] = Postcode[n].text.strip()
    Borough[n] = Borough[n].text.strip()
    Neighbourhood[n] = Neighbourhood[n].text.strip()

# creating dataframe
initial_dataframe = pd.DataFrame([Postcode, Borough, Neighbourhood])
initial_dataframe = initial_dataframe.transpose()
initial_dataframe.columns = ['Postcode', 'Borough', 'Neighbourhood']

# remove rows with 'Not assigned'
dataframe = initial_dataframe[initial_dataframe.Borough != 'Not assigned']

# Combine more than one neighborhood in one postal code area
dataframe = dataframe.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()

# Not assigned neighborhood have to be the same as the borough
for index, row in dataframe.iterrows():
    if row['Neighbourhood'] == 'Not assigned': row['Neighbourhood'] = row['Borough']

# get coordinates
coord = requests.get("http://cocl.us/Geospatial_data").content.decode('utf-8')
dataframe_coord = pd.read_csv(io.StringIO(coord))
dataframe_coord.head()

# rename column to be equal in both dataframes
dataframe.rename(columns = {'Postcode': 'Postal Code'}, inplace=True)
# merge two dataframes
df_coord = pd.merge(dataframe, dataframe_coord, on = 'Postal Code')

# New assignment

# Statistics of Toronto boroughs and neighbourhoods

In [24]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_coord['Borough'].unique()), df_coord.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [25]:
# Number of neighbourhoods per borough
df_coord.groupby('Borough').count()['Neighbourhood']

Borough
Central Toronto      9
Downtown Toronto    18
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Queen's Park         1
Scarborough         17
West Toronto         6
York                 5
Name: Neighbourhood, dtype: int64

# Concentrate on boroughs that contain the word 'Toronto'

In [26]:
df_coord_toronto = df_coord[df_coord['Borough'].str.contains('Toronto')]
df_coord_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


# Statistics on selection (boroughs that contain the word 'Toronto')

In [27]:
print('The new dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_coord_toronto['Borough'].unique()), df_coord_toronto.shape[0]))

The new dataframe has 4 boroughs and 38 neighborhoods.


In [28]:
# Number of neighbourhoods per borough
df_coord_toronto.groupby('Borough').count()['Neighbourhood']

Borough
Central Toronto      9
Downtown Toronto    18
East Toronto         5
West Toronto         6
Name: Neighbourhood, dtype: int64

# Define different colours for boroughs

In [29]:
# list of boroughs
boroughs_list = df_coord_toronto['Borough'].unique().tolist()

# define colours
boroughs_dict = {}
boroughs_dict[boroughs_list[0]] = "red"
boroughs_dict[boroughs_list[1]] = "blue"
boroughs_dict[boroughs_list[2]] = "green"
boroughs_dict[boroughs_list[3]] = "yellow"

# Extract the Toronto coordinates

In [30]:
latitude_toronto = df_coord_toronto['Latitude'].mean()
longitude_toronto = df_coord_toronto['Longitude'].mean()
print('The coordinates of Toronto are {:2f} (lat.) and {:2f} (long.)'.format(latitude_toronto, longitude_toronto))

The coordinates of Toronto are 43.667262 (lat.) and -79.389883 (long.)


# Build map of Toronto

In [31]:
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# draw markers
for latitude, longitude, borough, neighbourhood in zip(df_coord_toronto['Latitude'], df_coord_toronto['Longitude'], df_coord_toronto['Borough'], df_coord_toronto['Neighbourhood']):
    label_text = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label_text, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=boroughs_dict[borough],
        fill=True,
        fill_color=boroughs_dict[borough],
        fill_opacity=0.6).add_to(map_toronto)
    
map_toronto