# IBM Capstone Project

This notebook will be mainly used for the capstone project for the last course of IBM's 'Professional Data Scientist' certificate

## Reading the Table Data

In [1]:
import pandas as pd
import numpy as np

Let's import data from wikipedia into a table using BeautifulSoup library

In [2]:
import requests
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})

In [4]:
My_table_rows = My_table.find_all('tr')

l = []
for tr in My_table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    l.append(row)

postal_codes = pd.DataFrame(l[1:], columns=["Postcode", "Borough", "Neighborhood"])
for i in range(len(postal_codes.Neighborhood)):
    postal_codes.Neighborhood[i] = postal_codes.Neighborhood[i].replace('\n','')

postal_codes = postal_codes[postal_codes.Borough != 'Not assigned']
postal_codes.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [5]:
print(postal_codes.shape)
print(len(postal_codes['Postcode'].unique()))
print(len(postal_codes['Borough'].unique()))
print(len(postal_codes['Neighborhood'].unique()))

(212, 3)
103
11
210


Now let's group the neighborhoods of similat postcode

In [6]:
grouped_neighborhoods = list(postal_codes.groupby(['Postcode'], sort=False)['Neighborhood'].apply(', '.join))
postal_codes = postal_codes.drop_duplicates(subset='Postcode')
postal_codes.Neighborhood = grouped_neighborhoods
postal_codes = postal_codes.reset_index(drop=True)
print(postal_codes.shape)
postal_codes.head()

(103, 3)


Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Not assigned


Let's also fill theunassigned neighborhoods with the borough value

In [7]:
for i in range(len(postal_codes.Neighborhood)):
    if(postal_codes.Neighborhood[i] == 'Not assigned'):
        postal_codes.Neighborhood[i] = postal_codes.Borough[i]
        
postal_codes.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [8]:
print(postal_codes.shape)

(103, 3)


## Adding Coordinates

Now let's add the latitude and longitude of corresponding postal codes

In [9]:
Geospatial_Coordinates = pd.read_csv('Geospatial_Coordinates.csv')

In [10]:
lat = []
long = []

for i in range(len(postal_codes)):
    lat.append(float(Geospatial_Coordinates.loc[Geospatial_Coordinates['Postal Code'] == postal_codes.Postcode[i]]['Latitude']))
    long.append(float(Geospatial_Coordinates.loc[Geospatial_Coordinates['Postal Code'] == postal_codes.Postcode[i]]['Longitude']))
    
postal_codes['Latitude'] = lat
postal_codes['Longitude'] = long

postal_codes.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
