# Week 3 - Part 1 and 2
## Segmenting and Clustering Neighborhoods in Toronto

### importing the libraries

In [42]:
import pandas as pd
import numpy as np
import requests 

### obtaining the data and removing the cells that have borrow not assigned

In [43]:
wiki = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_page = requests.get(wiki)

wiki_raw = pd.read_html(wiki_page.content, header = 0)[0]
df = wiki_raw[wiki_raw.Neighbourhood != 'Not assigned']
df.reset_index(inplace = True)
df.head()

Unnamed: 0,index,Postal Code,Borough,Neighbourhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,5,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### grouping the neibourhoods by postal code

In [44]:

df.groupby(['Postal Code']).first()

Unnamed: 0_level_0,index,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M1B,9,Scarborough,"Malvern, Rouge"
M1C,18,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,27,Scarborough,"Guildwood, Morningside, West Hill"
M1G,36,Scarborough,Woburn
M1H,45,Scarborough,Cedarbrae
M1J,54,Scarborough,Scarborough Village
M1K,63,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
M1L,72,Scarborough,"Golden Mile, Clairlea, Oakridge"
M1M,81,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
M1N,90,Scarborough,"Birch Cliff, Cliffside West"


In [45]:
df[df['Borough'] == 'Not assigned']

Unnamed: 0,index,Postal Code,Borough,Neighbourhood


### printing the number of rows of the dataframe

In [46]:

df.shape

(103, 4)

#  Part 2 : getting the latitude and the longitude coordinates of each neighborhood

In [47]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

ModuleNotFoundError: No module named 'geocoder'

## geocoder not working. Hence using the csv file

In [48]:

url = 'http://cocl.us/Geospatial_data'

In [49]:
df_geo = pd.read_csv(url)
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### checking the datatypes of the columns of both the dataframes

In [50]:
df_geo.dtypes

Postal Code     object
Latitude       float64
Longitude      float64
dtype: object

In [51]:
df.dtypes

index             int64
Postal Code      object
Borough          object
Neighbourhood    object
dtype: object

### checking the shape of both the dataframes

In [52]:
df_geo.shape

(103, 3)

In [53]:
df.shape

(103, 4)

### joining both the dataframes

In [54]:
df = df.join(df_geo.set_index('Postal Code'), on='Postal Code')
df.head()

Unnamed: 0,index,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,2,M3A,North York,Parkwoods,43.753259,-79.329656
1,3,M4A,North York,Victoria Village,43.725882,-79.315572
2,4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### creating the dataframe required by the project

In [58]:
column_names = ["Postal Code", "Borough", "Neighbourhood", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(df[df["Postal Code"]==postcode], sort=False, ignore_index=True)
    
test_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,index
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,40.0
1,M2H,North York,Hillcrest Village,43.803762,-79.363452,46.0
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,12.0
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,54.0
4,M4G,East York,Leaside,43.70906,-79.363452,39.0
5,M4M,East Toronto,Studio District,43.659526,-79.340923,84.0
6,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849,108.0
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,143.0
8,M9L,North York,Humber Summit,43.756303,-79.565963,80.0
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,139.0


In [59]:
test_df.drop(['index'], axis=1)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452
5,M4M,East Toronto,Studio District,43.659526,-79.340923
6,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
8,M9L,North York,Humber Summit,43.756303,-79.565963
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442
