## 1. Import libreries

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

## 2. Import the data from Wikipedia

In [17]:
# import data
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [18]:
soup = BeautifulSoup(data ,'html.parser')

In [19]:
# create list to store the data 
PostalCode = []
borough = []
neighborhood =[]

In [20]:
for row in soup.find('table').find_all('tr'):
    cell = row.find_all('td')
    if (len(cell)) > 0:
        PostalCode.append(cell[0].text)
        borough.append(cell[1].text)
        neighborhood.append(cell[2].text) 

In [21]:
Toronto = pd.DataFrame({'PostalCode' : PostalCode,
                       'borough' : borough,
                       'neighborhood': neighborhood})
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


## 3. Drop the last 2 letters (/n) from the column: postal_code and borough

In [22]:
Toronto['PostalCode'] = Toronto['PostalCode'].map(lambda x: str(x)[:-1])
Toronto['borough'] = Toronto['borough'].map(lambda x: str(x)[:-1])
Toronto['neighborhood'] = Toronto['neighborhood'].map(lambda x: str(x)[:-1])
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## 4. Drop the rows with postal_code not assigned

In [23]:
Toronto.drop(Toronto[Toronto['borough']=='Not assigned'].index, inplace=True)
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## 5. Group neighborhood with the same borough

In [24]:
Toronto = Toronto.groupby(['PostalCode','borough'])['neighborhood'].apply(', '.join).reset_index()
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 6. If Not assigned neighborhood, then neighborhood = borough

In [25]:
for index, row in Toronto.iterrows():
    if row['neighborhood'] == 'Not assigned':
        row['neighborhood'] == Toronto['borough']
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 7. Shape

In [26]:
Toronto.shape

(103, 3)

## 8. Load the coordinates from CSV file, provided by Coursera

In [27]:
coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 9. Rename postal code column

In [28]:
coordinates.rename(columns={'Postal Code' : 'PostalCode'}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 10. Merge the 2 tables 

In [29]:
Toronto_coord = pd.merge(Toronto, coordinates, on='PostalCode', how='left')
Toronto_coord.head()

Unnamed: 0,PostalCode,borough,neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
