In [1]:
import pandas as pd

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url)[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


**Data Frame Initial Shape**

In [3]:
df.shape

(288, 3)

## 1. Data Preprocessing Steps

**1.1 : Renaming Columns**

In [4]:
df.columns = ['PostalCode', 'Borough','Neighborhood']

**1.2 : Ignore cells with a borough that is Not assigned.**

In [5]:
df = df[df['Borough']!='Not assigned']
df.shape

(211, 3)

**1.3: Assighn Borough to Neighborhood if Neighborhood is not assigned**

**1.3.1 : Check for such data**

In [6]:
df.loc[df['Neighborhood']=='Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood
8,M7A,Queen's Park,Not assigned


**1.3.2 : Checking the replacement for such data**

In [7]:
df[df['Neighborhood']=='Not assigned'][['PostalCode', 'Borough','Borough']] 

Unnamed: 0,PostalCode,Borough,Borough.1
8,M7A,Queen's Park,Queen's Park


**1.3.3 : Replacing**

In [8]:
df.loc[df['Neighborhood']=='Not assigned'] = df[df['Neighborhood']=='Not assigned'][['PostalCode', 'Borough','Borough']] 

**1.3.4 : Check after replacement whthere it is done correctly or not**

In [9]:
df[df['Neighborhood']=='Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood


**Replacement is done properly**

**1.4 : Combining all the Neighborhoods group by Borough & PostalCode**

In [10]:
df = df.groupby(['PostalCode', 'Borough'], as_index=False).agg(lambda x: ', '.join(set(x.dropna())))

In [11]:
# Sample Data
df.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union"
2,M1E,Scarborough,"Morningside, West Hill, Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


**1.5: Checking the Final Shape of Dataframe**

In [12]:
df.shape

(103, 3)

## 2. Geo Spatial Data addition to existing Dataframe

**2.1 Load the given CSV to GeoSpatial Dataframe**

In [13]:
df_geo_spatial = pd.read_csv('https://cocl.us/Geospatial_data')

In [14]:
df_geo_spatial.head(2)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497


**2.2 Join & Create a required Data Frame**

In [15]:
result_df = pd.merge(df,
                 df_geo_spatial,
                 left_on='PostalCode',
                 right_on='Postal Code')[['PostalCode','Borough','Neighborhood','Latitude','Longitude']]
result_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
