# Part one: Toronto Neighbourghood Data

In [6]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import geopy

### Scraping Data
Using beautifulsouls package to scrape data from the Toronto neighbourhood Wikipedia page. The data is taken from the table on the webpage.

In [7]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = bs(source,'lxml')
table = soup.find('table',{'class':'wikitable sortable'})

A new csv file is created containing the data from the scaraped webpage.

In [8]:
f = open('Toronto_Data.csv', 'w')
    
area = ""
district = ""
town = ""
    
for row in table.findAll("tr"):
    cells = row.findAll("td")
    if len(cells) == 3:
        area = cells[0].find(text=True)
        district = cells[1].findAll(text=True)
        town = cells[2].find(text=True)
        
    for x in range(len(district)):
        postcode_list = district[x].split(",")
        for i in range(len(postcode_list)):
            write_to_file = area.lstrip('\n').strip() + "," + postcode_list[i].lstrip('\n').strip() + "," + town.lstrip('\n').strip() +  "\n"
            f.write(write_to_file)
 
f.close()

### Loading in CSV file
Creating Dataframe containing three columns (Postcode, Borough and Neighbourhood).

In [9]:
Toronto = pd.read_csv("Toronto_Data.csv") 
df = pd.DataFrame(Toronto)
df.columns = ['PostCode', 'Borough','Neighbourhood']
df.head()

Unnamed: 0,PostCode,Borough,Neighbourhood
0,M2A,Not assigned,Not assigned
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M5A,Downtown Toronto,Regent Park


Dropping the Not assigned Boroughs.

In [10]:
df1 = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df1.head()

Unnamed: 0,PostCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Grouping the Borughs with multiple neighbourhoods.

In [11]:
df2 = df1.groupby(['PostCode','Borough'])['Neighbourhood'].apply(','.join).reset_index()
df2.head()

Unnamed: 0,PostCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Replacing values of Not assigned neighbourhood with borough.

In [12]:
NA_rows = df2.Neighbourhood == 'Not assigned'
df2.loc[NA_rows, 'Neighbourhood'] = df2.loc[NA_rows, 'Borough']
df2[NA_rows]

Unnamed: 0,PostCode,Borough,Neighbourhood
85,M7A,Queen's Park,Queen's Park


Cleaned Toronto data has 3 columns and 103 rows.

In [13]:
df_cleaned = df2 
df_cleaned.shape

(103, 3)

## Part 2: Geo Data for the Postcodes

!pip install wget
conda install -c conda-forge geocoder

Downloading File

In [20]:
url = 'http://cocl.us/Geospatial_data/toronto_coordinates.csv'
file = w.download(url)
file

  0% [                                                                                ]    0 / 2891100% [................................................................................] 2891 / 2891

'Geospatial_Coordinates (5).csv'

In [19]:
import wget as w
import geocoder

Reading in doownloaded coords from downloaded file.

In [52]:
Geo_Data = pd.read_csv('Geospatial_Coordinates.csv') 
Geo_df = pd.DataFrame(Geo_Data)
print(Geo_df.shape)
Geo_Data.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merging the Postcodes from the Toronto Dataframe with the latitude and longitude data from the Geo data.

In [56]:
toronto_df_temp = df_cleaned.set_index('PostCode')
Geo_temp = Geo_Data.set_index('Postal Code')
toronto_df_coors = pd.concat([toronto_df_temp, Geo_temp], axis=1, join='inner')

toronto_df_coors.index.name = 'PostalCode'
toronto_df_coors.reset_index(inplace=True)

print(toronto_df_coors.shape)
toronto_df_coors.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
