# Get Neighborhoods in Toronto Data

In [1]:
#Import Library
import pandas as pd

In [2]:
# Read data from Wikipedia using Pandas
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
tables[0].columns = tables[0].iloc[0]

In [3]:
# Create dataframe
df_tab = tables[0]
df_tab.columns = ['PostalCode', 'Borough', 'Neighborhood']
df_tab = df_tab.iloc[1:]

# Only process the cells that have an assigned borough.
df_tab = df_tab[df_tab.Borough != 'Not assigned']

# If a cell has a borough but a Not assigned neighborhood, set neighborhood name to its borough.
df_tab.Neighborhood.replace('Not assigned',df_tab.Borough,inplace=True)

# Combine neighborhoods with same postal code in a single row and separate with commas (as_index=False to retain columns)
df_tab = df_tab.groupby(['PostalCode','Borough'], as_index=False).agg(lambda x: ','.join(x))
df_tab.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [4]:
df_tab.shape

(103, 3)

# Get Geospatial Data

In [5]:
# Get the latitude and the longitude coordinates of each neighborhood
geo_df=pd.read_csv('http://cocl.us/Geospatial_data')
geo_df.head(12)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [6]:
geo_df.shape

(103, 3)

# Merge Data

In [7]:
# Merge neighborhoods data and the latitude and longitude coordinates data
geo_df.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_tabfinal = pd.merge(df_tab, geo_df, on = 'PostalCode')
df_tabfinal.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [8]:
df_tabfinal.shape

(103, 5)