In [1]:
import pandas as pd

### Get the table for wikipedia

In [2]:
web_url=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
web_url

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

### Transform it into a pandas dataframe

In [3]:
neighborhood=pd.DataFrame(web_url[0])
neighborhood

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


### Ignore cells with a borough that is Not assigned

In [4]:
df=neighborhood[~neighborhood.Borough.isin(['Not assigned'])]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [5]:
df1=df['Neighbourhood'].replace('Not assigned',df['Borough'])
df1

2                                              Parkwoods
3                                       Victoria Village
4                              Regent Park, Harbourfront
5                       Lawrence Manor, Lawrence Heights
6            Queen's Park, Ontario Provincial Government
                             ...                        
160        The Kingsway, Montgomery Road, Old Mill North
165                                 Church and Wellesley
168    Business reply mail Processing Centre, South C...
169    Old Mill South, King's Mill Park, Sunnylea, Hu...
178    Mimico NW, The Queensway West, South of Bloor,...
Name: Neighbourhood, Length: 103, dtype: object

### Updated pandas dataframe

In [6]:
nei_df=pd.DataFrame({'Postal Code':df['Postal Code'],'Borough':df['Borough'],'Neighbourhood':df1.values})
nei_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


### More than one neighborhood can exist in one postal code area

In [7]:
nei_df1 = nei_df.groupby(by='Postal Code').apply(lambda x:','.join(x['Neighbourhood']))
nei_df1

Postal Code
M1B                                       Malvern, Rouge
M1C               Rouge Hill, Port Union, Highland Creek
M1E                    Guildwood, Morningside, West Hill
M1G                                               Woburn
M1H                                            Cedarbrae
                             ...                        
M9N                                               Weston
M9P                                            Westmount
M9R    Kingsview Village, St. Phillips, Martin Grove ...
M9V    South Steeles, Silverstone, Humbergate, Jamest...
M9W                  Northwest, West Humber - Clairville
Length: 103, dtype: object

### Updated dataframe

In [8]:
neighbourhood_df=pd.DataFrame({'Postal Code':nei_df1.index,'Borough':df['Borough'],'Neighbourhood':nei_df1.values})
neighbourhood_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M1B,North York,"Malvern, Rouge"
3,M1C,North York,"Rouge Hill, Port Union, Highland Creek"
4,M1E,Downtown Toronto,"Guildwood, Morningside, West Hill"
5,M1G,North York,Woburn
6,M1H,Downtown Toronto,Cedarbrae
...,...,...,...
160,M9N,Etobicoke,Weston
165,M9P,Downtown Toronto,Westmount
168,M9R,East Toronto,"Kingsview Village, St. Phillips, Martin Grove ..."
169,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


### The shape of the dataframe

In [9]:
df_shape=neighbourhood_df.shape
df_shape

(103, 3)

In [10]:
print('Number of rows of the data frame is:',df_shape[0])
print('Number of columns of the data frame is:',df_shape[1])

Number of rows of the data frame is: 103
Number of columns of the data frame is: 3


In [11]:
lat_lng=pd.read_csv('https://cocl.us/Geospatial_data')
lat_lng

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [12]:
neighbourhood_lat_lng=pd.merge(neighbourhood_df,lat_lng,how='inner',on=['Postal Code','Postal Code'])

In [13]:
neighbourhood_lat_lng

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,North York,"Malvern, Rouge",43.806686,-79.194353
1,M1C,North York,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Downtown Toronto,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,North York,Woburn,43.770992,-79.216917
4,M1H,Downtown Toronto,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,Etobicoke,Weston,43.706876,-79.518188
99,M9P,Downtown Toronto,Westmount,43.696319,-79.532242
100,M9R,East Toronto,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
