# Neighborhoods in Toronto - part 1
- Build a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name in Toronto.

In [1]:
import numpy as np
import pandas as pd

In [2]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
toronto_df  = pd.read_html(link, header=0)[0]
print(toronto_df.shape)
toronto_df.head()

(180, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [3]:
toronto_df.columns = ['PostalCode', 'Borough', 'Neighborhood']

In [4]:
toronto_df.describe()

Unnamed: 0,PostalCode,Borough,Neighborhood
count,180,180,180
unique,180,11,100
top,M2R,Not assigned,Not assigned
freq,1,77,77


In [5]:
toronto_df['Borough'].value_counts()

Not assigned        77
North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

### Clean the data
- Ignore cells with a borough that is **Not assigned**.
- If a cell has a borough but a **Not assigned** neighborhood, then the neighborhood will be the same as the borough.

#### Drop cells with a borough that is "Not assigned"

In [6]:
# Drop cells with a borough that is Not assigned.
toronto_df_drop = toronto_df[toronto_df['Borough'] != 'Not assigned'].reset_index(drop=True)
toronto_df_drop.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Is there any 'Not assigned' Neighborhood?

In [7]:
# Check if there is 'Not assigned' Neighborhood
if not any(toronto_df_drop['Neighborhood'] == 'Not assigned'):
    print('All Neighborhoods are assigned')

All Neighborhoods are assigned


In [8]:
toronto_df_cleaned = toronto_df_drop

#### Check whether it is the same as required in question

In [9]:
# create a new test dataframe
column_names = ["PostalCode", "Borough", "Neighborhood"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_cleaned[toronto_df_cleaned["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford, Maryvale"
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har..."


#### Print the number of rows of the cleaned dataframe

In [10]:
toronto_df_cleaned.shape

(103, 3)

In [11]:
toronto_df_cleaned.to_csv('Toronto_cleaned.csv', index=False)