# Importing libs 

In [57]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests

from folium.plugins import MarkerCluster
import folium

colors = [
    'red',
    'blue',
    'gray',
    'darkred',
    'lightred',
    'orange',
    'beige',
    'green',
    'darkgreen',
    'lightgreen',
    'darkblue',
    'lightblue',
    'purple',
    'darkpurple',
    'pink',
    'cadetblue',
    'lightgray',
    'black'
]


# Getting Wikipage Table Data (Web Scraping part)

In [64]:
# getting html text

wikipage = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

soup = BeautifulSoup(wikipage.text,"html.parser")

# finding all texts which contains <table> tag
tables = soup.find_all("table")

# need first table only
final_table = tables[0]

# getting table data
table_data = [[cell.text for cell in row("td")]
                         for row in final_table("tr")]
#getting table headers
table_columns = [x.text for x in final_table("th")]

#  Cleaning and formating cluster Dataframe

In [65]:
# Creating cluster dataframe using table data and table columns and removing '\n'
cluster_df = pd.DataFrame(table_data[1:],columns=table_columns).rename(columns={"Neighbourhood\n":"Neighbourhood"})

cluster_df.loc[:,"Neighbourhood"] = cluster_df["Neighbourhood"].str.replace('\n','').values

# Cleaning data
cluster_assigned_df = cluster_df.loc[cluster_df["Borough"]!="Not assigned",:].reset_index(drop=True)

# Fromating naeighbours into the list
cluster_assigned_df = cluster_assigned_df.groupby(["Postcode","Borough"],as_index=True).apply(lambda x : ','.join(x["Neighbourhood"].tolist()))

cluster_assigned_df = cluster_assigned_df.reset_index().rename(columns={0:"Neighbourhood"})

# if neighbourhood is 'Not Assigned' then assigning 'Borough' name there
cluster_assigned_df.loc[:,"Neighbourhood"] = cluster_assigned_df.T.apply(lambda x : x["Borough"] if x["Neighbourhood"]=="Not assigned" else x["Neighbourhood"]).values

#  Total Rows and Columns 

In [4]:
print("Rows : {}  Columns: {}".format(cluster_assigned_df.shape[0],cluster_assigned_df.shape[1]))

Rows : 103  Columns: 3


# Getting Latitude and Longitude and merging with cluster dataframe 

In [66]:
# Couldn't get lats and long from given method so using this file

location_df = pd.read_csv("Geospatial_Coordinates.csv")

clusters_df = pd.merge(cluster_assigned_df,location_df,left_on="Postcode",right_on="Postal Code").drop("Postal Code",axis=1)

In [67]:
# Showing first 5 rows 

clusters_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# visualizing clusters

In [63]:


clr_i = 0

some_map = folium.Map(location=(43.65,-79.38) , zoom_start=9)

for city in clusters_df.Borough.unique():
    city_neighbs = clusters_df.loc[clusters_df.Borough == city,["Postcode",'Latitude', 'Longitude']]
    neighs = folium.map.FeatureGroup()
    clr = colors[clr_i]
    clr_i+=1
    for pc,lat,lng in city_neighbs.values:
        folium.CircleMarker(
            [lat, lng],
            radius=3, 
            color=clr,
            fill=True,
            fill_opacity=0.8
        ).add_to(some_map)

some_map