In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


Scraping Of Data from Wikipedia

In [2]:
data=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
soup= BeautifulSoup(data,'html.parser')

In [4]:
postalCode = []
boroughList = []
neighborhoodList = []

In [5]:
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCode.append(cells[0].text.rstrip('\n'))
        boroughList.append(cells[1].text.rstrip('\n'))
        neighborhoodList.append(cells[2].text.rstrip('\n'))

Dropping Not Assigned rows

In [6]:
toronto_df = pd.DataFrame({"PostalCode": postalCode,"Borough": boroughList,"Neighborhood": neighborhoodList})

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [7]:
toronto_df_drop=toronto_df[toronto_df.Borough!='Not assigned'].reset_index(drop=True)
toronto_df_drop.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Group Neighbourhood in the name of Same Borough

In [8]:
toronto_df_group=toronto_df_drop.groupby(['PostalCode','Borough'],as_index=False).agg(lambda x: ", ".join(x))

toronto_df_group.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


For Not assigned Neighbourhood Make same value as Borough

In [9]:
for index,row in toronto_df_group.iterrows():
    if(row['Neighborhood']=='Not assigned'):
        row['Neighborhood']=row['Borough']
toronto_df_group.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Making dataframe required by the Question

In [10]:
column_names=['PostalCode','Borough','Neighborhood']
toronto=pd.DataFrame(columns=column_names)
test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]
for postcode in test_list:
    toronto=toronto.append(toronto_df_group[toronto_df_group['PostalCode']==postcode],ignore_index=True)
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford, Maryvale"
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har..."


In [11]:
toronto_df_group.shape

(103, 3)

Reading Csv file of Geospatial_Coordinates

In [12]:
coordinates=pd.read_csv('Geospatial_Coordinates.csv')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merging Two DataFrames 

In [13]:
coordinates.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
toronto_df_new=toronto_df_group.merge(coordinates,on='PostalCode' ,how='left')
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Finally, check to make sure the coordinates are added as required by the question

In [15]:
column_names=['PostalCode','Borough','Neighborhood']
toronto=pd.DataFrame(columns=column_names)
test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]
for postcode in test_list:
    toronto=toronto.append(toronto_df_new[toronto_df_new['PostalCode']==postcode],ignore_index=True)
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452
5,M4M,East Toronto,Studio District,43.659526,-79.340923
6,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
8,M9L,North York,Humber Summit,43.756303,-79.565963
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442


Using Geopy get Latitude and Longitude

In [21]:
address='Toronto'
geolocator=Nominatim(user_agent='My-application')
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('The geographical coordinates of Toronto are {},{}'.format(latitude,longitude))

The geographical coordinates of Toronto are 43.6534817,-79.3839347


Using folium displaying map from above latitude and longitude

In [25]:
map_toronto=folium.Map(location=[latitude,longitude],zoom_start=10)

for lat,lon,borough,neigh in zip(toronto_df_new['Latitude'],toronto_df_new['Longitude'],toronto_df_new['Borough'],toronto_df_new['Neighborhood']):
    label='{},{}'.format(borough,neigh)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,lon],
                        radius=5,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity='0.7',
                        parse_html=False).add_to(map_toronto)
map_toronto

Filter only boroughs that contain the word Toronto

In [29]:
borough_names=list(toronto_df_new.Borough.unique())
borough_tor_names=[]
for x in borough_names:
    if 'toronto' in x.lower():
        borough_tor_names.append(x)
borough_tor_names

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [32]:
toronto_df_new=toronto_df_new[toronto_df_new['Borough'].isin(borough_tor_names)].reset_index(drop=True)
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [33]:
map_toronto=folium.Map(location=[latitude,longitude],zoom_start=10)

for lat,lon,borough,neigh in zip(toronto_df_new['Latitude'],toronto_df_new['Longitude'],toronto_df_new['Borough'],toronto_df_new['Neighborhood']):
    label='{},{}'.format(borough,neigh)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,lon],
                        radius=5,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity='0.7',
                        parse_html=False).add_to(map_toronto)
map_toronto