# Part 1

In [51]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import folium
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

## get each record 

In [5]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)

data = response.text
soup = BeautifulSoup(data,'html.parser')
table=soup.find('table')

records = [i.findAll('p')[0] for i in table.findAll('td')]

## extract postcodes, boroughs, neighborhoods 

In [6]:
postcodes = []
borough = []
neighborhood = []
combined = []

for record in records:
    combined.append(record.find('span').text)
    postcodes.append(re.findall('<p>(.*?)<br\/>', str(record))[0])
    
    split = combined[-1].split('(')
    
    borough.append(split[0])
    if (len(split) > 1):
        neighborhood.append(split[1].replace(')', ''))
    else:
        neighborhood.append(split[0])
    

In [13]:
data = pd.DataFrame()

data['postcodes'] = postcodes
data['borough'] = borough
data['neighborhood'] = neighborhood

In [14]:
data = data[~(data.borough == 'Not assigned')]
data.neighborhood = data.neighborhood.str.replace(' /',',')

In [15]:
data

Unnamed: 0,postcodes,borough,neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [18]:
data.shape

(103, 4)

# Part 2

In [16]:
import geocoder

data['location'] = data.postcodes.apply(lambda x: geocoder.api.arcgis(f'{x}, Canada').latlng)

In [26]:
data['latitude'] = data.location.str[0]
data['longitude'] = data.location.str[1]
data.drop('location', axis=1, inplace=True)

data.head()

Unnamed: 0,postcodes,borough,neighborhood,latitude,longitude
2,M3A,North York,Parkwoods,43.75245,-79.32991
3,M4A,North York,Victoria Village,43.73057,-79.31306
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
6,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188


# Part 3

In [30]:
data = data[data.borough.str.contains('Toronto')]

In [33]:
data.borough.value_counts()

Downtown Toronto                                                17
Central Toronto                                                  9
West Toronto                                                     6
East Toronto                                                     4
East TorontoBusiness reply mail Processing Centre969 Eastern     1
East YorkEast Toronto                                            1
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
Name: borough, dtype: int64

In [40]:
data.borough = data.borough.astype('category').cat.codes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [43]:
latitude, longitude = geocoder.api.arcgis('Toronto, Canada').latlng

In [56]:
kclusters=len(data.borough.unique())

# create map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(data['latitude'], data['longitude'], data['borough']):
    label = folium.Popup(str(data['borough']) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_toronto)
    
map_toronto