In [8]:
import requests # library to handle requests
import pandas as pd # library for data analysis
import numpy as np # library for scientific calcualtions (in this case vectorized manner)
import random # library for random number generation

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # module to convert address in to latitude and longitude

# libraries for displaying images
from IPython.display import Image
from IPython.core.display import HTML

# Converting json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('folium, and geopy installed')
print('Libraries imported')

Solving environment: done


  current version: 4.4.10
  latest version: 4.8.0

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.

Solving environment: done


  current version: 4.4.10
  latest version: 4.8.0

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.

folium, and geopy installed
Libraries imported


### Changing the table available on the wikipedia to json format

In [9]:
from bs4 import BeautifulSoup as bs

In [10]:
response = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = bs(response.content,'lxml')
table = soup.find_all('table')
df = pd.read_html(str(table))
print(df[0].to_json(orient='records'))

[{"Postcode":"M1A","Borough":"Not assigned","Neighborhood":"Not assigned"},{"Postcode":"M2A","Borough":"Not assigned","Neighborhood":"Not assigned"},{"Postcode":"M3A","Borough":"North York","Neighborhood":"Parkwoods"},{"Postcode":"M4A","Borough":"North York","Neighborhood":"Victoria Village"},{"Postcode":"M5A","Borough":"Downtown Toronto","Neighborhood":"Harbourfront"},{"Postcode":"M6A","Borough":"North York","Neighborhood":"Lawrence Heights"},{"Postcode":"M6A","Borough":"North York","Neighborhood":"Lawrence Manor"},{"Postcode":"M7A","Borough":"Queen's Park","Neighborhood":"Not assigned"},{"Postcode":"M8A","Borough":"Not assigned","Neighborhood":"Not assigned"},{"Postcode":"M9A","Borough":"Downtown Toronto","Neighborhood":"Queen's Park"},{"Postcode":"M1B","Borough":"Scarborough","Neighborhood":"Rouge"},{"Postcode":"M1B","Borough":"Scarborough","Neighborhood":"Malvern"},{"Postcode":"M2B","Borough":"Not assigned","Neighborhood":"Not assigned"},{"Postcode":"M3B","Borough":"North York","Ne

##### define columns for the dataframe

In [11]:
col_names = ['PostalCode','Borough','Neighborhood']
df_neigh = pd.DataFrame(columns = col_names)

In [12]:
response = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = bs(response.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))[0]
PostalCode = df["Postcode"].tolist()
Borough = df["Borough"].tolist()
Neighborhood = df["Neighborhood"].tolist()

In [13]:
df.head(15)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Queen's Park,Not assigned
8,M8A,Not assigned,Not assigned
9,M9A,Downtown Toronto,Queen's Park


#### Removing cells with 'Not assigned' value in Borough column

In [14]:
df = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Queen's Park,Not assigned
6,M9A,Downtown Toronto,Queen's Park
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


#### Combining neighborhoods with the same postal code

In [15]:
df1 = df.groupby(['Postcode','Borough'],sort=False).agg(','.join)
df1.reset_index(inplace=True)
df1.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned
5,M9A,Downtown Toronto,Queen's Park
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


#### Replacing Neighborhood column 'Not assigned' value with Borough

In [16]:
df1[df1['Neighborhood'].isin(['Not assigned'])]

Unnamed: 0,Postcode,Borough,Neighborhood
4,M7A,Queen's Park,Not assigned


In [17]:
df1 = df1.replace('Not assigned', "Queen's Park")

In [18]:
df1.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Downtown Toronto,Queen's Park
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [19]:
df1.shape

(103, 3)

In [20]:
coordinate_df = pd.read_csv('http://cocl.us/Geospatial_data')
coordinate_df.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [21]:
coordinate_df.rename(columns = {'Postal Code':'Postcode'},inplace = True)
coordinate_df.head(7)

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029


In [22]:
coordinate_df1 = pd.merge(df1,coordinate_df, on = 'Postcode')
coordinate_df1.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [24]:
coordinate_df1.shape

(103, 5)

### Visualizing the Data

In [25]:
## Visualizing neighborhoods

# Convert to latitude and longitude 
from geopy.geocoders import Nominatim

# Import visualization framework
import folium

In [None]:

# Init "Toronto" setting to geopy
addr = 'Toronto'
geolocator = Nominatim(user_agent="Toronto")
to_loc   = geolocator.geocode(addr)

# Get Toronto's latitude and longitude
to_lat, to_lgt   = to_loc.latitude, to_loc.longitude
to_map = folium.Map(location=[to_lat, to_lgt], zoom_start=10)

# Visualizing by using folium
for ix, row in coordinate_df.iterrows():
    label = folium.Popup(f"{row['Borough']}, {row['Neighborhood']}", parse_html=True)
    folium.CircleMarker(
        [row['Latitude'], row['Longitude']],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_opacity=0.9,
        parse_html=False).add_to(to_map) 
to_map