#                               Segmenting and Clustering Neighborhoods in Toronto

### Step 1: Scraping the Postal Data

Import the proper libraries

In [30]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

Use Beautiful Soup to parse the data from the website and show the table's first 5 rows

In [35]:
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text

# using beautiful soup to parse the HTML/XML codes.
soup = BeautifulSoup(raw_wikipedia_page,'xml')
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


Show the size of the parsed table

In [34]:
df.shape

(103, 3)

## End of Step 1

### Step 2: Gathering the latitude and longitude coordinates

Import the CSV file and check the data frame

In [37]:
latlong = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv')
latlong.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


Merge the data frames, remove the Postal Code column, and check the data frame

In [40]:
df_toronto = pd.merge(df, latlong, how='left', left_on = 'PostalCode', right_on = 'Postal Code')
df_toronto.drop("Postal Code", axis=1, inplace=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


## End of Step 2

### Step 3: Cluster the Toronto neighborhoods

Find the coordinates of Toronto

In [41]:
address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto city are 43.6534817, -79.3839347.


Import the Folium map package and verify install

In [43]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python-3.7-main

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _libgcc_mutex-0.1          |      conda_forge           3 KB  conda-forge
    _openmp_mutex-4.5          |           1_llvm           5 KB  conda-forge
    _py-xgboost-mutex-2.0      |            cpu_0           8 KB  conda-forge
    _pytorch_select-0.2        |            gpu_0           2 KB
    absl-py-0.12.0             |     pyhd8ed1ab_0          96 KB  conda-forge
    aiohttp-3.7.4              |   py37h5e8e339_0  

jupyterlab_server-1. | 25 KB     | ##################################### | 100% 
protobuf-3.17.2      | 347 KB    | ##################################### | 100% 
pillow-8.2.0         | 684 KB    | ##################################### | 100% 
mkl_fft-1.3.0        | 204 KB    | ##################################### | 100% 
libssh2-1.9.0        | 226 KB    | ##################################### | 100% 
liblapack-3.9.0      | 11 KB     | ##################################### | 100% 
attrs-21.2.0         | 44 KB     | ##################################### | 100% 
grpcio-1.38.0        | 2.2 MB    | ##################################### | 100% 
mistune-0.8.4        | 54 KB     | ##################################### | 100% 
pygments-2.9.0       | 754 KB    | ##################################### | 100% 
dataclasses-0.8      | 7 KB      | ##################################### | 100% 
urllib3-1.26.5       | 99 KB     | ##################################### | 100% 
libpng-1.6.37        | 306 K

xorg-libxdmcp-1.1.3  | 19 KB     | ##################################### | 100% 
send2trash-1.5.0     | 12 KB     | ##################################### | 100% 
mpfr-4.0.2           | 648 KB    | ##################################### | 100% 
libxkbcommon-1.0.3   | 581 KB    | ##################################### | 100% 
tornado-6.1          | 646 KB    | ##################################### | 100% 
bleach-3.3.0         | 111 KB    | ##################################### | 100% 
blinker-1.4          | 13 KB     | ##################################### | 100% 
libpq-13.3           | 2.7 MB    | ##################################### | 100% 
pyodbc-4.0.30        | 71 KB     | ##################################### | 100% 
sqlalchemy-1.4.17    | 2.2 MB    | ##################################### | 100% 
prometheus_client-0. | 46 KB     | ##################################### | 100% 
mock-4.0.3           | 51 KB     | ##################################### | 100% 
python-dateutil-2.8. | 220 K

keras-applications-1 | 30 KB     | ##################################### | 100% 
libgcc-ng-9.3.0      | 7.8 MB    | ##################################### | 100% 
ipywidgets-7.6.3     | 101 KB    | ##################################### | 100% 
brotlipy-0.7.0       | 341 KB    | ##################################### | 100% 
patsy-0.5.1          | 187 KB    | ##################################### | 100% 
pyrsistent-0.17.3    | 89 KB     | ##################################### | 100% 
tensorflow-base-1.14 | 87.6 MB   | ##################################### | 100% 
snowballstemmer-2.1. | 57 KB     | ##################################### | 100% 
notebook-6.4.0       | 6.1 MB    | ##################################### | 100% 
ncurses-6.2          | 985 KB    | ##################################### | 100% 
libzopfli-1.0.3      | 164 KB    | ##################################### | 100% 
mkl-service-2.3.0    | 54 KB     | ##################################### | 100% 
c-ares-1.17.1        | 109 K

plotly-4.14.3        | 5.9 MB    | ##################################### | 100% 
boto3-1.17.88        | 70 KB     | ##################################### | 100% 
zstd-1.4.9           | 431 KB    | ##################################### | 100% 
opt_einsum-3.3.0     | 53 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: / b'Exception while loading config file /var/pod/.ws/ax-ext/config/wscloud/jupyter_notebook_config.py\n    Traceback (most recent call last):\n      File "/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/traitlets/config/application.py", line 737, in _load_config_files\n        config = loader.load_config()\n      File "/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/traitlets/config/loader.py", line 616, in load_config\n        self._read_file_as_dict()\n      File "/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/traitlets/config/loader.py", line 648, in 

Create a map of Toronto

In [44]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto

Populate the map with markers

In [45]:
for lat, lng, borough, neighborhood in zip(
        df_toronto['Latitude'], 
        df_toronto['Longitude'], 
        df_toronto['Borough'], 
        df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

Cluster the neighborhoods with "Toronto" together, such as Downtown Toronto, East Toronto, etc.

In [49]:
df_toronto_denc = df_toronto[df_toronto['Borough'].str.contains("Toronto")].reset_index(drop=True)
map_toronto_denc = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, borough, neighborhood in zip(
        df_toronto_denc['Latitude'], 
        df_toronto_denc['Longitude'], 
        df_toronto_denc['Borough'], 
        df_toronto_denc['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_denc)  

map_toronto_denc

## End of Step 3