In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [78]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df= pd.read_html(url)[0]

In [79]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [80]:
df = df.set_index('Borough')
df=df.drop('Not assigned',axis=0)
df

Unnamed: 0_level_0,Postal Code,Neighbourhood
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1
North York,M3A,Parkwoods
North York,M4A,Victoria Village
Downtown Toronto,M5A,"Regent Park, Harbourfront"
North York,M6A,"Lawrence Manor, Lawrence Heights"
Downtown Toronto,M7A,"Queen's Park, Ontario Provincial Government"
Etobicoke,M9A,"Islington Avenue, Humber Valley Village"
Scarborough,M1B,"Malvern, Rouge"
North York,M3B,Don Mills
East York,M4B,"Parkview Hill, Woodbine Gardens"
Downtown Toronto,M5B,"Garden District, Ryerson"


## Check to see if there is any Null Values Present ?

In [11]:
df.isnull().values.any()

False

## Calculate the Shape of the Dataframe 

In [8]:
df.shape

(103, 2)

# Using the Geospatial Data Provided, Creating a new Dataset for the Latitude and Longitudes of the Neighbourhood 

In [16]:
df1 = pd.read_csv('/Users/abhijitmurali/Downloads/Geospatial_Coordinates.csv')
df1

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


## To create the Required Dataframe where both the Previous Datasets are merged, we need to JOIN() them together to create one Single Dataset 

In [18]:
df.set_index('Postal Code').join(df1.set_index('Postal Code'))
Dataset = df.join(df1.set_index('Postal Code'), on = 'Postal Code')

In [19]:
Dataset

Unnamed: 0_level_0,Postal Code,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
North York,M3A,Parkwoods,43.753259,-79.329656
North York,M4A,Victoria Village,43.725882,-79.315572
Downtown Toronto,M5A,"Regent Park, Harbourfront",43.65426,-79.360636
North York,M6A,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
Downtown Toronto,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
Etobicoke,M9A,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
Scarborough,M1B,"Malvern, Rouge",43.806686,-79.194353
North York,M3B,Don Mills,43.745906,-79.352188
East York,M4B,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
Downtown Toronto,M5B,"Garden District, Ryerson",43.657162,-79.378937


# Taking Toronto as our Destination, let us create a Folium Map which will give us a closer look at each Neighbourhood.

In [81]:
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10,min_zoom=8,max_zoom=14)

for lat, lng, label in zip(Dataset['Latitude'], Dataset['Longitude'], Dataset['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Taking "Toronto" as the Borough of Choice, I am creating a Dataset which seperates all Borough's which contain the word 'Toronto'

In [116]:
Toronto_data = Dataset.loc[['Downtown Toronto','East Toronto','West Toronto','Central Toronto']]
Toronto_data.head(40)

Unnamed: 0_level_0,Postal Code,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Downtown Toronto,M5A,"Regent Park, Harbourfront",43.65426,-79.360636
Downtown Toronto,M7A,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
Downtown Toronto,M5B,"Garden District, Ryerson",43.657162,-79.378937
Downtown Toronto,M5C,St. James Town,43.651494,-79.375418
Downtown Toronto,M5E,Berczy Park,43.644771,-79.373306
Downtown Toronto,M5G,Central Bay Street,43.657952,-79.387383
Downtown Toronto,M6G,Christie,43.669542,-79.422564
Downtown Toronto,M5H,"Richmond, Adelaide, King",43.650571,-79.384568
Downtown Toronto,M5J,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752
Downtown Toronto,M5K,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576


In [105]:
Toronto_data.shape

(39, 4)

In [106]:
latitude = 43.6532
longitude = -79.3832
map_toronto2 = folium.Map(location=[latitude, longitude], zoom_start=10,min_zoom=8,max_zoom=14)

for lat, lng, label in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto2)  
    
map_toronto2