# Segmenting and Clustering Neighborhoods in Toronto

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from unicodedata import normalize

#### Transforming the data from the wikipedia page into a pandas  dataframe

In [8]:
table = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
PC_df = table[0]
PC_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Droping cells with a borough that is Not assigned.


In [17]:
for i, borough in enumerate(PC_df['Borough']):
    if borough == 'Not assigned':
        PC_df.drop([i], inplace = True)

In [19]:
PC_df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


#### Aparently all the postal codes in the wikipedia table were unique

In [36]:
print('Size of the dataframe:', PC_df['Postal Code'].size)
PC_unique = PC_df['Postal Code'].unique()
print('Size of the dataframe with unique postal codes:', PC_unique.size)

Size of the dataframe: 103
Size of the dataframe with unique postal codes: 103


#### Aparently there was no cell with a borough and a not assigened neighbourhood. 

In [30]:
x = 0

for i in PC_df['Neighbourhood']:
    if i == 'Not assigned':
        print(i)
        x += 1
        
if x == 0:
    print('There is no Neighbourhood not assigned in the dataframe')

There is no Neighbourhood not assigned in the dataframe


#### Reseting the index

In [49]:
PC_df = PC_df.reset_index(drop=True)
PC_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


#### Printing the number of rows of the dataframe

In [50]:
print('Shape of the dataframe:', PC_df.shape)

Shape of the dataframe: (103, 3)


### Part 2

#### Installing and importing geocoder

In [42]:
!pip install geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 5.2 MB/s eta 0:00:011
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [43]:
import geocoder

#### Note: I tried using geocoder but wasn't able to get any results

In [62]:
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('M5G, Toronto, Ontario')
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

KeyboardInterrupt: 

#### Getting the data from the csv file

In [63]:
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data

latlng_df = pd.read_csv('Geospatial_Coordinates.csv')
latlng_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merging the two dataframes

In [65]:
final_df = pd.merge(PC_df, latlng_df, on='Postal Code')
final_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509
