# WikiPedia List of Postal Codes of Canada - Toronto with Longitude and Lattitiude Details

## Import Necessary Libraries

In [1]:
import pandas as pd

In [2]:
import numpy as np

## Read Wiki HTML File and Store it in Panda Data Frame

In [3]:
df=pd.read_html('http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
dataframe=df[0]
dataframe

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [4]:
dataframe.columns

Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')

In [5]:
dataframe['Borough'].unique()

array(['Not assigned', 'North York', 'Downtown Toronto', "Queen's Park",
       'Etobicoke', 'Scarborough', 'East York', 'York', 'East Toronto',
       'West Toronto', 'Central Toronto', 'Mississauga'], dtype=object)

## Replace the Not Assigned Values to nan Values using NumPy

In [6]:
dataframe.replace('Not assigned',np.nan,inplace=True)

In [7]:
dataframe

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,
9,M8A,,


## Drop the rows that has NaN Values in Borough Column

In [8]:
dataframe.dropna(subset=['Borough'],axis=0,inplace=True)

In [9]:
dataframe.shape

(211, 3)

### Copy the Borough values to Neighbourhood Values for the NaN Values in Neighbourhood Column

In [10]:
dataframe['Neighbourhood'].fillna(dataframe['Borough'],axis=0,inplace=True)

In [11]:
dataframe.shape

(211, 3)

In [12]:
dataframe

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## Combine function to join the values in Neighbourhood Column

In [13]:
def combine(data):
    l1=data['Neighbourhood'].unique()
    res=','.join(map(str,l1))
    return res

In [14]:
df=dataframe.groupby(['Postcode','Borough']).apply(combine).to_frame()

In [15]:
df.reset_index(inplace=True)

In [16]:
df.columns

Index(['Postcode', 'Borough', 0], dtype='object')

In [17]:
df.rename(columns={0:'Neighbourhood'},inplace=True)

In [18]:
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


###  Just to see the List of Borough and neighbourhood with same values

In [19]:
set(df['Borough'].to_list()).intersection(set(df['Neighbourhood'].to_list()))

{'East Toronto', "Queen's Park"}

## Final Data Frame - Result Data Frame

In [20]:
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


## No of Rows and Columns in Final Data Frame

In [22]:
df.shape

(103, 3)

## Question 2 

### Use CSV file of Geospatial Data and Create a Data Frame

In [24]:
df2=pd.read_csv('https://cocl.us/Geospatial_data')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Left Join Two Data Frames using df and df2

In [49]:
df2.rename(columns={'Postal Code':'Postcode'},inplace=True)

In [50]:
Needed_Data_Frame=pd.merge(df,df2,on='Postcode',how='inner')

In [51]:
Needed_Data_Frame.sort_values(['Borough'],ascending=True,inplace=True)

In [57]:
Needed_Data_Frame.reset_index(inplace=True)

In [60]:
del Needed_Data_Frame['index']

## The Resultant Data Frame with Lattitude and Langitude Details

In [61]:
Needed_Data_Frame.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4S,Central Toronto,Davisville,43.704324,-79.38879
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M5P,Central Toronto,"Forest Hill North,Forest Hill West",43.696948,-79.411307
4,M5R,Central Toronto,"The Annex,North Midtown,Yorkville",43.67271,-79.405678


## No of Columns in Toronto Neighbourhoods with Longitude and Lattitude Details

In [62]:
Needed_Data_Frame.shape

(103, 5)