### Segmenting and Clustering Neighborhoods in Toronto

In [1]:
#reading/scraping the webpage using pandas
import pandas as pd
df=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]

In [2]:
#intial shape of the dataframe
df.shape

(287, 3)

In [3]:
#indexing the 'Not assigned' Boroughs
indexNames = df[df['Borough']=='Not assigned'].index

In [4]:
#Dropping the Indexed 'Not assigned' Boroughs
df.drop(indexNames, inplace=True)

In [5]:
#shape after dropping 'Not assigned' Boroughs
df.shape

(210, 3)

In [6]:
#Grouping by Postcode
df=df.groupby(['Postcode', 'Borough']) ['Neighbourhood'].apply (list)
df=df.sample(frac=1).reset_index()
df['Neighbourhood']=df['Neighbourhood'].str.join(',')

In [7]:
#finding the 'Not assigned' Neighbourhood 
df.loc[df['Postcode']=='M9A']

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M9A,Queen's Park,Not assigned


In [8]:
#replacing Neighbourhood value 'Not assigned' with Borough value
df.loc[(df.Postcode == 'M9A'),'Neighbourhood']=df['Borough']

### Replacing not assigned Neighbourhood 

In [9]:
#checking if the Neighbourhood value 'Not assigned' has updated with Borough value for Postcode M9A 
df.loc[df['Postcode']=='M9A']

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M9A,Queen's Park,Queen's Park


# Result

In [10]:
#shape of the dataframe
df.shape

(103, 3)

### Renaming the column

In [11]:
df2=df.rename(columns={'Postcode': 'Postal Code'})

In [12]:
df2.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern
1,M9P,Etobicoke,Westmount
2,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
3,M9W,Etobicoke,Northwest
4,M1P,Scarborough,"Dorset Park,Scarborough Town Centre,Wexford He..."


# Longitude & Latitude

In [13]:
path="http://cocl.us/Geospatial_data"
df3=pd.read_csv(path)

In [14]:
df3.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merging Dataframes

In [15]:
df_merged=pd.merge(df2,df3, on='Postal Code')

In [16]:
df_merged.head(11)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
1,M9P,Etobicoke,Westmount,43.696319,-79.532242
2,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724
3,M9W,Etobicoke,Northwest,43.706748,-79.594054
4,M1P,Scarborough,"Dorset Park,Scarborough Town Centre,Wexford He...",43.75741,-79.273304
5,M5S,Downtown Toronto,"Harbord,University of Toronto",43.662696,-79.400049
6,M3N,North York,Downsview Northwest,43.761631,-79.520999
7,M9N,York,Weston,43.706876,-79.518188
8,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",43.686412,-79.400049
9,M9B,Etobicoke,"Cloverdale,Islington,Martin Grove,Princess Gar...",43.650943,-79.554724


In [17]:
import numpy as np
import pandas as pd
import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [18]:
#toronto_data = df_merged[df_merged['Borough'] == 'Toronto'].reset_index(drop=True)
#toronto_data.head()

In [19]:
address = 'Toronto City, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


In [20]:
# create map of Toronto using latitude and longitude values
toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto)  
    
toronto