# Segmenting and Clustering Neighborhoods in Toronto

## Importing usefull Parkages

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')


Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  25.25 MB/s
geopy-1.18.1-p 100% |################################| Time: 0:00:00  37.60 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  50.02 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  34.40 MB/s
vincent-0.4.4- 100% |###################

### Installing Wikipedia module

In [2]:
!conda install -c conda-forge wikipedia 
import wikipedia as wp
print('Library imported.')


Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    wikipedia: 1.4.0-py35_0 conda-forge

wikipedia-1.4. 100% |################################| Time: 0:00:00  16.23 MB/s
Library imported.


### Scaping Wikipedia page

In [3]:
#Get the html source
html = wp.page("List_of_postal_codes_of_Canada:_M").html().encode("UTF-8")
df = pd.read_html(html)[0]
df = df.rename(columns=df.iloc[0])
df=df.drop(0, axis=0)
df


Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
10,M8A,Not assigned,Not assigned


In [6]:
print(df.shape)


(289, 3)


### Data processing

### Ignore cells with a borough that is Not assigned

In [7]:
toronto_clean = df[df['Borough'] != 'Not assigned'].reset_index(drop=True)
toronto_clean


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


In [8]:
toronto_clean.shape

(212, 3)

### Combined rows with same postal code and separate their different neighborhoods with a comma

In [9]:
foo = lambda a: ", ".join(a) 

In [10]:
toronto_clean1=toronto_clean.groupby(['Postcode','Borough']).agg({'Neighbourhood': foo}).reset_index()
toronto_clean1

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [11]:
toronto_clean1.shape

(103, 3)

### If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.

In [12]:
for row in range(toronto_clean1.shape[0]):
    if  toronto_clean1.at[row,'Neighbourhood'] == 'Not assigned':
        print( toronto_clean1.at[row,'Borough'],toronto_clean1.at[row,'Neighbourhood'])
        toronto_clean1.at[row,'Neighbourhood']=toronto_clean1.at[row,'Borough']
        print( toronto_clean1.at[row,'Borough'],toronto_clean1.at[row,'Neighbourhood'])
        
toronto_clean1

Queen's Park Not assigned
Queen's Park Queen's Park


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [13]:
toronto_clean1.shape

(103, 3)

### Creating DataFrame with the geographical Coordinates

In [14]:
!conda install -c conda-forge geocoder

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geocoder:   1.38.1-py_0  conda-forge
    orderedset: 2.0-py35_0   conda-forge
    ratelim:    0.1.6-py35_0 conda-forge

orderedset-2.0 100% |################################| Time: 0:00:00  56.61 MB/s
ratelim-0.1.6- 100% |################################| Time: 0:00:00  13.20 MB/s
geocoder-1.38. 100% |################################| Time: 0:00:00  44.15 MB/s


In [15]:
import geocoder # import geocoder

In [34]:
# initialize your variable to None
#lat_lng_coords = None
#g = geocoder.google('{}, Toronto, Ontario'.format('M5G'))
#g = geocoder.google('Mountain View, CA')
#lat_lng_coords = g.latlng
# loop until you get the coordinates
#while(lat_lng_coords is None):
#  g = geocoder.google('{}, Downtown Toronto,  Central Bay Street'.format('M5G'))
#  lat_lng_coords = g.latlng

#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]

In [35]:
address = 'Mountain View, CA'

geolocator = Nominatim()
location = geolocator.geocode(' Downtown Toronto,  Central Bay Street')
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

  app.launch_new_instance()


43.6530438 -79.382659


In [21]:
coordinates=pd.read_csv("https://cocl.us/Geospatial_data",header=0)
coordinates

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [30]:
print('Coordinate ',coordinates.shape)
print('Toronto ',toronto_clean1.shape)

Coordinate  (103, 3)
Toronto  (103, 3)


In [32]:
toronto_final=pd.merge(toronto_clean1, coordinates, left_on='Postcode', right_on='Postal Code')
print(toronto_final.shape)
toronto_final

(103, 6)


Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",M1N,43.692657,-79.264848


In [33]:
del toronto_final['Postal Code']
toronto_final

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
