## Segmenting and Clustering Neighborhoods in Toronto

### Import lib

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Scraping from the web

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [4]:
res = requests.get(url)
soup = BeautifulSoup(res.content, "html.parser")

In [5]:
table = soup.find_all('table', class_= "wikitable sortable")[0]

### Get header of table

In [6]:
HeadListRaw = []
for row in table.find_all('th'):
    HeadListRaw.append(row.text)
HeadListRaw

['Postcode', 'Borough', 'Neighbourhood\n']

In [7]:
HeadList = []
for cell in HeadListRaw:
    HeadList.append(cell.strip('\n'))
HeadList    

['Postcode', 'Borough', 'Neighbourhood']

### Get data in table

In [8]:
ListRaw = []
for row in table.find_all('tr'):
    for cell in row.find_all('td'):
        ListRaw.append(cell.text)

List = []
for cell in ListRaw:
    List.append(cell.strip('\n'))   
print('')   




## Create Dict

In [9]:
Dict = {}

In [10]:
Dict[HeadList[0]] = List[::3]
Dict[HeadList[1]] = List[1::3]
Dict[HeadList[2]] = List[2::3]

In [11]:
df = pd.DataFrame(Dict)
df = df[['Postcode','Borough', 'Neighbourhood']]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [12]:
print(df.shape)

(289, 3)


In [13]:
!conda install -c conda-forge geocoder --yes 

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geocoder                  1.38.1                     py_0    conda-forge


In [14]:
import geocoder
LatList = []
LngList = []

for Pcode in df['Postcode']:
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(Pcode))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    LatList.append(latitude)
    LngList.append(longitude)                     

In [15]:
df['Latitude'] = LatList
df['Longitude'] = LngList
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1A,Not assigned,Not assigned,43.653226,-79.383184
1,M2A,Not assigned,Not assigned,43.653226,-79.383184
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
5,M5A,Downtown Toronto,Regent Park,43.654260,-79.360636
6,M6A,North York,Lawrence Heights,43.718518,-79.464763
7,M6A,North York,Lawrence Manor,43.718518,-79.464763
8,M7A,Queen's Park,Not assigned,43.662301,-79.389494
9,M8A,Not assigned,Not assigned,43.653226,-79.383184


## Start Visualize

### I dicided to mark only data that locate in Toronto and do it like in the lab

In [20]:
df_Toronto = df[df['Borough'].str.contains('Toronto')]
df_Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
4,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
5,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
17,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
18,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
34,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [17]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.16.0                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Folium installed
Libraries imported.


In [18]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)



43.653963 -79.387207


In [19]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Toronto',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

for lat, lng, label in zip(df_Toronto.Latitude, df_Toronto.Longitude, df_Toronto.Neighbourhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

venues_map