## Clustering Neighborhoods in Toronto

# Import package

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
res = requests.get(url)
soup = BeautifulSoup(res.content, "html.parser")
table = soup.find_all('table', class_= "wikitable sortable")[0]

# Header of table

In [4]:
HeadListRaw = []
for row in table.find_all('th'):
    HeadListRaw.append(row.text)
HeadListRaw

['Postcode', 'Borough', 'Neighbourhood\n']

In [5]:
HeadList = []
for cell in HeadListRaw:
    HeadList.append(cell.strip('\n'))
HeadList

['Postcode', 'Borough', 'Neighbourhood']

# Data

In [6]:
ListRaw = []
for row in table.find_all('tr'):
    for cell in row.find_all('td'):
        ListRaw.append(cell.text)

List = []
for cell in ListRaw:
    List.append(cell.strip('\n'))

# Dict

In [7]:
Dict = {}
Dict[HeadList[0]] = List[::3]
Dict[HeadList[1]] = List[1::3]
Dict[HeadList[2]] = List[2::3]

In [8]:
df = pd.DataFrame(Dict)
df = df[['Postcode','Borough', 'Neighbourhood']]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [9]:
print(df.shape)

(289, 3)


In [1]:
!conda install -c conda-forge geocoder --yes

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20180916061553-0000
Solving environment: done

## Package Plan ##

  environment location: /opt/ibm/conda/miniconda3

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.5.11               |           py35_0         636 KB  conda-forge
    orderedset-2.0             |           py35_0         685 KB  conda-forge
    geocoder-1.38.1            |             py_0          52 KB  conda-forge
    openssl-1.0.2p             |       h470a237_0         3.5 MB  conda-forge
    ratelim-0.1.6              |           py35_0           5 KB  conda-forge
    certifi-2018.8.24          |           py35_1         138 KB  conda-forge
    ca-certificates-2018.8.24  |       ha4d7672_0         136 KB  conda-forge
    ----------------------------------------------------------

In [None]:
import geocoder
LatList = []
LngList = []

for Pcode in df['Postcode']:
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(Pcode))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    LatList.append(latitude)
    LngList.append(longitude)

In [None]:
df['Latitude'] = LatList
df['Longitude'] = LngList
df

In [11]:
df_Toronto  = df[df['Borough'].str.contains('Toronto')]
df_Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
17,M5B,Downtown Toronto,Ryerson
18,M5B,Downtown Toronto,Garden District
34,M5C,Downtown Toronto,St. James Town


# Import package

In [12]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 
import requests 
import pandas as pd 
import numpy as np 
import random 


from IPython.display import Image 
from IPython.core.display import HTML 
    

from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/ibm/conda/miniconda3

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.17.0               |             py_0          49 KB  conda-forge
    conda-4.5.11               |           py35_0         636 KB  conda-forge
    openssl-1.0.2p             |       h470a237_0         3.5 MB  conda-forge
    ca-certificates-2018.8.24  |       ha4d7672_0         136 KB  conda-forge
    certifi-2018.8.24          |           py35_1         138 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0             conda-forge
    geopy:       

# Find address

In [15]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)



43.653963 -79.387207


In [18]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel
# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Toronto',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

for lat, lng, label in zip(df_Toronto.Latitude, df_Toronto.Longitude, df_Toronto.Neighbourhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

venues_map