### Segmenting and Clustering (Toronto) Part II
##### by Francisco J. O'Meany

In [1]:
import pandas as pd
from project_lib import Project

In [2]:
# get the neighborhoods data file
!wget -O toronto_cluster.csv https://francisco.omeany.net/toronto_cluster.csv

--2020-01-08 06:18:01--  https://francisco.omeany.net/toronto_cluster.csv
Resolving francisco.omeany.net (francisco.omeany.net)... 192.254.157.172
Connecting to francisco.omeany.net (francisco.omeany.net)|192.254.157.172|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4916 (4.8K) [text/csv]
Saving to: ‘toronto_cluster.csv’


2020-01-08 06:18:02 (162 MB/s) - ‘toronto_cluster.csv’ saved [4916/4916]



In [3]:
# read csv file
t_cluster = pd.read_csv('toronto_cluster.csv')
t_cluster.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [4]:
# get the geo data file
!wget -O Geospatial_Coordinates.csv http://cocl.us/Geospatial_data

--2020-01-08 06:18:03--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 159.8.72.228, 159.8.69.24, 159.8.69.21
Connecting to cocl.us (cocl.us)|159.8.72.228|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cocl.us/Geospatial_data [following]
--2020-01-08 06:18:03--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|159.8.72.228|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-01-08 06:18:06--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.27.197, 107.152.26.197
Connecting to ibm.box.com (ibm.box.com)|107.152.27.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-01-08 06:18:06--  https://ibm.box.co

In [5]:
# define dataframe columns
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [6]:
neighborhoods

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude


In [7]:
df_geocode = pd.read_csv('Geospatial_Coordinates.csv')
df_geocode.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
# fill the dataframe
for index, data in t_cluster.iterrows():
    postal = data['PostalCode']
    borough = data['Borough']
    neighborhood = data['Neighborhood']
    latlon = df_geocode[df_geocode['Postal Code'].str.contains(postal, regex=False)]
    latlon = latlon.round(decimals=6)
    latitude = list(latlon['Latitude'].astype(str))[0]
    longitude = list(latlon['Longitude'].astype(str))[0]
    
    neighborhoods = neighborhoods.append({'PostalCode':postal,
                                          'Borough':borough,
                                          'Neighborhood':neighborhood,
                                          'Latitude':latitude,
                                          'Longitude':longitude}, ignore_index=True)


In [9]:
# Save dataframe as csv file to storage
project = Project(None, "b54d8622-d5b9-40f8-83ec-c6ee80f7d05f", "p-b7d8e41e53341a4e89ef2c22ad70876d93cf3e63")
project.save_data(data=neighborhoods.to_csv(index=False),file_name='toronto_neighborhoods.csv',overwrite=True)

neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188712
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
