# Toronto Segmentation and Clustering

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Libraries imported.


#### Read the Toronto Postal Code Wikipeida Page and Extract the Postal Code table

In [2]:
wiki_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_tables = pd.read_html(wiki_link)
Toronto_zip_table = wiki_tables[0]

Toronto_zip_table.columns = Toronto_zip_table.iloc[0]
Toronto_zip_table.drop(0, inplace = True)
Toronto_zip_table.reset_index(inplace = True, drop = True)
Toronto_zip_table.columns = ['Postalcode', 'Borough', 'Neighbourhood']

#### Loop through the dataframe to drop rows where Borough =  'Not Assigned' and change Neighbourhood to Borough if Neighbourhood = 'Not Assigned'

In [3]:

for inx in Toronto_zip_table.index:
    if Toronto_zip_table['Borough'][inx] == 'Not assigned':
        Toronto_zip_table.drop(axis = 0, index = inx, inplace = True)
    elif Toronto_zip_table['Neighbourhood'][inx] == 'Not assigned':
        Toronto_zip_table['Neighbourhood'][inx] = Toronto_zip_table['Borough'][inx]

#Toronto_zip_table.reset_index(inplace = True, drop = True)
Toronto_zip_table.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#### Combine rows with Postal Code across different Neighborhoods into one row with comma-separated Neighbourhoods

In [4]:

Toronto_zip_table.sort_values(by = ['Postalcode', 'Borough', 'Neighbourhood'], inplace = True)

Toronto_zip_table = Toronto_zip_table.groupby(['Postalcode', 'Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x.astype(str))).reset_index()
Toronto_zip_table.head(10)


Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [5]:
print('There are {} rows in the Toronto Postal Code DataFrame'.format(Toronto_zip_table.shape[0]))

There are 103 rows in the Toronto Postal Code DataFrame


#### Read Geospatial data from csv and get lat-long for Toronto postcodes

In [6]:
import sys
import types
import pandas as pd
from ibm_botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share your notebook.
client_c9f19e9623c7413db1ee38fbae5e4995 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='Y3VnJULYOo-Uvrvi5HimJm2cIHDvZumlvofJLw5MTGQY',
    ibm_auth_endpoint="https://iam.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_c9f19e9623c7413db1ee38fbae5e4995.get_object(Bucket='torontoneighborhoodanalysis-donotdelete-pr-hby8mbtcoxk9zh',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

geo_data = pd.read_csv(body)
geo_data.columns = ['Postalcode', 'Latitude', 'Longitude']
geo_data.head(10)

Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [7]:
Toronto_lat_long_table = pd.merge(Toronto_zip_table, geo_data, on='Postalcode', how='inner')
Toronto_lat_long_table.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
