# Part 1: Scraping of Data on Toronto Neighborhoods from Wikipedia

#### Installing and importing libraries


In [1]:
!pip install bs4
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup



#### Request web_page and convert it using BS

In [2]:
link="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
toronto_wiki = requests.get(link).text

toronto_bs=BeautifulSoup(toronto_wiki,"html5lib")


#### Extract the table 

In [3]:
toronto_table_bs=toronto_bs.find("table")

#### Extract data from the table and append to dataframe

In [4]:
toronto_neigh_data = pd.DataFrame(columns=["Postal Code", "Borough","Neighborhood"])

for row in toronto_table_bs.find_all("tr"):
    col = row.find_all("td")
    
    for cell in col:
                    
            Postal_Code =str(cell.b.string)

            rest_of_cell = cell.find_all('a',href=True) 
                       
            Neighborhood=""
            Borough = ""
            
            for i,borough_and_neighs in enumerate(rest_of_cell):
                
                if i == 0:
                    Borough =str(borough_and_neighs.string)
                    
                elif  i==1:
                    Neighborhood +=str(borough_and_neighs.string)
                
                else:
                    Neighborhood += ", " + str(borough_and_neighs.string)
            
            if Neighborhood=="":
                    Neighborhood += Borough

            toronto_neigh_data = toronto_neigh_data.append({"Postal Code":Postal_Code, "Borough":Borough, "Neighborhood":Neighborhood}, ignore_index=True)
    

#### Remove empty cells from the dataframe and reset index


In [5]:
toronto_neigh_data.replace("",np.nan,inplace=True)
toronto_neigh_data.dropna(axis=0,inplace=True)
toronto_neigh_data.reset_index(drop=True,inplace=True)

In [6]:
toronto_neigh_data.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [7]:
toronto_neigh_data.shape

(101, 3)

# Part 2: Obtaining the Longitude and Latitudes of the Postal Codes

#### Install and import all required libraries

In [8]:
!pip install folium
import folium 

!pip install geocoder
import geocoder 



#### Get latitudes and longitudes for each postal code using geolocator

In [9]:
#def get_lat_lng(toronto):

    # initialize your variable to None
    #lat_lng_coords = None

    # loop until you get the coordinates
    #while(lat_lng_coords is None):
        #g = geocoder.google('{}, Toronto, Ontario'.format(toronto))
        #lat_lng_coords = g.latlng

    #latitude = lat_lng_coords[0]
    #longitude = lat_lng_coords[1]
    
    #return latitude, longitude
    

#### Get latitudes and longitudes for each postal codes using the csv: geolocator was taking too long

In [17]:
lat_lng_df=pd.read_csv("Geospatial_Coordinates.csv")
lat_lng_df

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [18]:
toronto_merged = toronto_neigh_data.merge(lat_lng_df,on="Postal Code")
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
...,...,...,...,...,...
96,M8X,Etobicoke,"The Kingsway, Old Mill",43.653654,-79.506944
97,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
98,M7Y,Business reply mail,Business reply mail,43.662744,-79.321558
99,M8Y,Etobicoke,"Old Mill, Sunnylea, Humber Bay, Mimico, The Qu...",43.636258,-79.498509
