### Importing libraries

In [24]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import folium

### Choosing the Wikipedia page with the table and then getting the base contents of it and saving it to the variable titled `CanadaWikiPage`

In [9]:
CanadaURL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(CanadaURL).text

#I needed to install lxml package for this to work, not entirely sure why
CanadaWikiPage  = BeautifulSoup(source, 'lxml')

In [10]:
table = CanadaWikiPage.find('table', class_ = 'wikitable sortable') # Gets the table from the webpage
tableRows = table.find_all('tr')

In [11]:
#td0 is Postcode
#td1 is Borough
#td2 is Neighborhood

# Three columns of the table: PostalCode, Borough, and Neighborhood
columns = ['Postalcode', 'Borough', 'Neighborhood']
CanadaDF = pd.DataFrame(columns = columns)

# Search all the postcode, borough, neighborhood 
for row in tableRows:
    row_data=[]
    for cell in row.find_all('td'):
        row_data.append(cell.text.strip())
    if len(row_data)==3: #there's one row that's blank so we ignore it like this
        CanadaDF.loc[len(CanadaDF)] = row_data

### Clean the dataframe

In [12]:
#remove rows where Borough is Not Assigned
CanadaDF = CanadaDF[CanadaDF['Borough'] != 'Not assigned']

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
CanadaDF.loc[CanadaDF[
    'Neighborhood'] == 'Not assigned', 'Borough'] = CanadaDF['Neighborhood']

### Final Result for first criteria

In [13]:
CanadaDF.shape

(103, 3)

### Adding the coordinates to the dataframe

In [19]:
#reading the df from the website
geoData = pd.read_csv('http://cocl.us/Geospatial_data')
#rename column so we can merge more easily
geoData.rename(columns={'Postal Code':'Postalcode'}, inplace=True)

CanadaCoordinates = pd.merge(CanadaDF, geoData, on='Postalcode')

CanadaCoordinates.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Clustering for Toronto to add to Folium Map

In [20]:
TorontoDF = CanadaCoordinates[CanadaCoordinates['Borough'].str.contains("Toronto")]
TorontoDF.reset_index(drop=True, inplace=True)

TorontoDF.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [34]:
# Create blank Toronto map
TorontoMap = folium.Map(location=[43.65, -79.38], zoom_start=12) #Toronto coordinates found on Google

# Adding markers to the map
for lat, lng, borough, neighborhood in zip(
    TorontoDF['Latitude'], TorontoDF['Longitude'], TorontoDF['Borough'], TorontoDF['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#ADD8E6',
        fill_opacity=0.7).add_to(TorontoMap)  
    
TorontoMap