## Coursera Project

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
main_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
req = requests.get(main_url)
soup = BeautifulSoup(req.text, "html.parser")

In [3]:
table = soup.find('tbody')
table_rows = table.find_all('tr')

In [4]:
raw_df = []
for tr in table_rows:
    row = tr.text.split('\n')[1:-1] # first and last lines are empty string
    raw_df.append(row)
raw_df = pd.DataFrame(raw_df[1:], columns=raw_df[0])
raw_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [5]:
raw_df = raw_df[raw_df.Borough != 'Not assigned']
raw_df.reset_index(drop=True, inplace=True)
raw_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [6]:
grouped = raw_df.groupby(['Postcode'])
neighborhood_grouped = grouped['Neighbourhood'].apply(lambda x: x.sum()) 
neighborhood_grouped = grouped['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
borough_grouped = grouped['Borough'].apply(lambda x: set(x).pop())
borough = borough_grouped.to_frame()
neighborhood = neighborhood_grouped.to_frame()
grouped_final = borough.merge(neighborhood, on="Postcode")
grouped_final.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [7]:
grouped_final.shape

(103, 2)

## Geo Data

In [8]:
from geopy.geocoders import Nominatim 
from IPython.display import Image 
from IPython.core.display import HTML 
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import json
from pandas.io.json import json_normalize

In [9]:
geospatial_data = pd.read_csv('C:/Users/Florian/Desktop/Geospatial_Coordinates.csv')
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
geospatial_data = geospatial_data.rename(columns={geospatial_data.columns[0]: "Postcode" })

In [11]:
full_table = grouped_final.merge(geospatial_data, on = 'Postcode')
full_table.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
from geopy.geocoders import Nominatim 
from IPython.display import Image 
from IPython.core.display import HTML 
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import json

#### Clustering Data

In [13]:
address = 'Toronto, Ontario, Canada'
geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Toronto is {}, {}.'.format(latitude, longitude))

Toronto is 43.653963, -79.387207.


We will focus on the city of Toronto and only consider the neighbourhoods in Toronto.

In [15]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

for lat, long, bor, neigh in zip(full_table['Latitude'], full_table['Longitude'], 
                                 full_table['Borough'], full_table['Neighbourhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 5, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'white',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto)
        
map_toronto