# Segmenting and Clustering Neighborhoods in Toronto

## Leobardo Gómez

### Part 1

In [1]:
# Read the data
import pandas as pd # library to process data as dataframes

toronto_df = pd.read_csv('Toronto.csv')
#toronto_df.head()

# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
toronto_df.columns = ['PostalCode','Borough', 'Neighborhood']

# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
toronto_assigned = toronto_df[toronto_df['Borough'] != 'Not assigned']
toronto_assigned = toronto_assigned.reset_index(drop=True)

# More than one neighborhood can exist in one postal code area. These two rows will be combined into one row with the neighborhoods separated with a comma.
toronto_combined = toronto_assigned.groupby(['PostalCode','Borough']).agg(Neighborhood=('Neighborhood',', '.join)).reset_index()

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
toronto_combined.loc[toronto_combined.Neighborhood == 'Not assigned', 'Neighborhood'] = toronto_combined.loc[toronto_combined.Neighborhood == 'Not assigned', 'Borough']

toronto_combined.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [2]:
# In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.
toronto_combined.shape

(103, 3)

### Part 2

## **Important Note**
I used the CV file to get the coordinates. It was esaier this way

In [6]:
coordinates_df = pd.read_csv('Geospatial_Coordinates.csv')
coordinates_df.columns = ['PostalCode','Latitude','Longitude']
coordinates_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
df_merge = pd.merge(toronto_combined, coordinates_df, on='PostalCode')
df_merge.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
