# Segmenting and Clustering Neighborhoods in Toronto

## Import the relevant libraries

In [1]:
import pandas as pd
import numpy as np
import requests

pd.options.display.max_columns =None # full rows in output

#### get the url of requried data

In [2]:
url ='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

### Let fetch the table from url

In [3]:
df = pd.read_html(url, match ='Postcode')  # there is pandas method to extract table from html page
df[0].head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
df = df[0].copy()

## Step -1

### Ignore cells with a borough that is Not assigned.

In [5]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


## Step -2

### two rows will be combined into one row with the neighborhoods separated with a comma

In [6]:
df =df.groupby(['Postcode','Borough']).Neighbourhood.agg([('Neighbourhood', ', '.join)])
df.reset_index(inplace =True)

In [7]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Step -3 

### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [8]:
df[df['Neighbourhood'] == 'Not assigned'] # check the any 'Not assigned' value in 'Neighbourhood' column
        
        #OR
        
#df['Neighbourhood'].str.contains("Not assigned")

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M7A,Queen's Park,Not assigned


###### There is one column. So as by rule it replace by Borough value

In [9]:
df['Neighbourhood'].replace('Not assigned',"Queen's Park",inplace =True)

In [10]:
df.shape

(103, 3)

In [11]:

import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_c2429444175e4fc9b7ac6b4b6bfab95a = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='cOyZRNqVdTBMvZP0ZslOp1PO3PZdLSVm4SsuwmIPJGCp',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_c2429444175e4fc9b7ac6b4b6bfab95a.get_object(Bucket='courseracapstone-donotdelete-pr-gak31r2m5bxa7c',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
df_data_1.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [21]:
df.rename(columns = {'Postcode':'Postal Code'}, inplace =True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Postal Code.1,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [20]:
df = pd.merge(df,df_data_1,on ='Postal Code')
df

ValueError: The column label 'Postal Code' is not unique.

In [14]:
test = kd[kd['Borough'].str.contains('Toronto')]
test

Unnamed: 0,Postcode,Borough,Neighbourhood
37,M4E,East Toronto,The Beaches
41,M4K,East Toronto,"The Danforth West, Riverdale"
42,M4L,East Toronto,"The Beaches West, India Bazaar"
43,M4M,East Toronto,Studio District
44,M4N,Central Toronto,Lawrence Park
45,M4P,Central Toronto,Davisville North
46,M4R,Central Toronto,North Toronto West
47,M4S,Central Toronto,Davisville
48,M4T,Central Toronto,"Moore Park, Summerhill East"
49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi..."
