# In this analysis, I will be utilizing the Foursquare API as well as Folium to perform clustering of Neighborhoods in Toronto. 
### First, I will need to scrape the neighborhood information from Wikipedia and turn it into a pandas Dataframe

In [213]:
#Installing our dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [214]:
data = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [215]:
#Since read_html produces a LIST of dataframes, we must index in order to capture the dataframe we are looking for.
data = data[0]
data.head()
print(data.shape)

(288, 3)


In [216]:
#Drop the Neighborhoods with no assigned Borough
data = data[data['Borough'] != 'Not assigned']

In [217]:
#Shape of data after dropping the rows with no Borough assigned
data.shape

(211, 3)

In [218]:
data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [219]:
#Use comma as seperator and join all neighborhoods in a particular group or postcode in this example
data = data.groupby(['Postcode','Borough']).apply(lambda group: ','.join(group['Neighbourhood'])).reset_index()

In [220]:
data.columns = ['Postcode','Borough','Neighborhood']
data.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [221]:
data.shape

(103, 3)

In [222]:
#Check which Boroughs have a Neighborhod with a value Not assigned
data[data['Neighborhood'] == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighborhood
85,M7A,Queen's Park,Not assigned


In [223]:
#Replace the Neighborhood value with the value of the Borough Column
data.iloc[85].Neighborhood = "Queen's Park"

In [224]:
#Double Checking that column was changed
data.iloc[85]

Postcode                 M7A
Borough         Queen's Park
Neighborhood    Queen's Park
Name: 85, dtype: object

In [225]:
#One more search to make sure no columns have an unassigned Neighborhood
data[data['Neighborhood'] == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighborhood


In [227]:
#Final Data Shape
data.shape

(103, 3)

# Part 2: Adding latitude/longitude info to Dataframe

In [237]:
location = pd.read_csv('Geospatial_Coordinates.csv')

In [243]:
location.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [250]:
data.rename(columns= {'Postcode':'Postal Code'},inplace=True)

In [254]:
#Concat means to simply glue together
#Merge means to bring together based on a common column
data = data.merge(location)

In [259]:
print(data.isnull().sum())
print(data.shape)

Postal Code     0
Borough         0
Neighborhood    0
Latitude        0
Longitude       0
dtype: int64
(103, 5)


In [257]:
data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
