### Creating dataframe with latitude and longitude data from csv

In [2]:
import numpy as np
import pandas as pd

from bs4 import BeautifulSoup 
import requests
!pip install folium
import folium

print("Libraries imported")


wiki_data=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text #Getting the data from the link
soupObj = BeautifulSoup(wiki_data, 'html.parser') 







Libraries imported


In [3]:
#Navigate the HTML doc structure using beautiful soup.Will use this df throughout assignement
postalCodeList = []
boroughList = []
neighborhoodList = []

table_found = soupObj.find_all('table')[0]

for row in table_found.find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCodeList.append(cells[0].text.rstrip("\n"))
        boroughList.append(cells[1].text.rstrip("\n"))
        neighborhoodList.append(cells[2].text.rstrip("\n")) # avoid new lines in neighborhood cell
        
        
toronto_df = pd.DataFrame({"PostalCode": postalCodeList,
                           "Borough": boroughList,
                           "Neighborhood": neighborhoodList})

toronto_df.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
toronto_df_dropna = toronto_df[toronto_df.Borough != "Not assigned"].reset_index(drop=True)
toronto_df_dropna.head()









Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
#Group neighborhoods in the same borough
toronto_df_grouped = toronto_df_dropna.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
#toronto_df_grouped = toronto_df_dropna.groupby(["PostalCode", "Borough"], sort=False).agg(lambda x: ", ".join(x))

toronto_df_grouped.head()
toronto_df_grouped

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [7]:
#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
toronto_df_grouped.loc[toronto_df_grouped['Neighborhood'] =='Not assigned' , 'Neighborhood'] = toronto_df_grouped['Borough']
toronto_df_grouped.head(10)
toronto_df_grouped


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [8]:
#use the .shape method to print the number of rows of your dataframe
toronto_df_grouped.shape

(103, 3)

### #Creating dataframe with latitude and longitude data from csv

In [9]:
lat_long_file = pd.read_csv('https://cocl.us/Geospatial_data')
lat_long_file.shape


(103, 3)

### Merge toronto_df_grouped and lat_long_file dataframes


In [11]:
lat_long_file.rename(columns={'Postal Code':'PostalCode'},inplace=True)
final_df = pd.merge(toronto_df_grouped,lat_long_file,on="PostalCode",how="left")
final_df


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
