### Importamos las librerías necesarias para realizar web scraping

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

### Descargamos los datos del enlace: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, por medio de BeatifulSoup

In [2]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")

### Se extrae la tabla que contiene solamente los datos de códigos postales de Toronto

In [3]:
table = soup.find('table')

In [4]:
file=[]
ncol=list(range(0,9))

for i in ncol :
    
    for row in table.find_all('tr'): # in html table row is represented by the table
    # Get all columns in each row.
        cols = row.find_all('p') # in html a column is represented by the tag td or p
        file.append(cols[i].getText())

    # convert to dataframe:
df= pd.DataFrame(file)

### Luego, configuramos el datafreme solicitado en la tarea

In [23]:
df["Postal Code"] = df[0].str[0:3]
df["Borough"] = df[0].str[3:].str.split("(", n=1, expand=True)[0]
df["Neigh"] = df[0].str.rsplit("(", n=2, expand=True)[1].str.split(")", n=1, expand=True)[0].fillna("")
df["Neighbourhood"] = df["Neigh"].str.replace(" / ", ",")

In [24]:
postalcode_list=df[(df["Borough"]!="Not assigned\n")]
postalcode_list=postalcode_list.drop([0, 'Neigh'],axis=1)

In [25]:
postalcode_list.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Postal Code
1,M1B,Scarborough,"Malvern,Rouge",M1B
2,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",M1C
3,M1E,Scarborough,"Guildwood,Morningside,West Hill",M1E
4,M1G,Scarborough,Woburn,M1G
5,M1H,Scarborough,Cedarbrae,M1H


In [26]:
postalcode_list.shape

(103, 4)

### Descargamos el archivo de internet con el enlace dado, que nos proporciona coordenadas geográficas

In [27]:
# URL del archivo CSV en internet
url = 'https://cocl.us/Geospatial_data'

Geospatial_Coordinates=pd.read_csv(url)

### Anexamos las coordenadas geográficas a nuestro dataframe por medio del código postal

In [29]:
coord_list= pd.merge(postalcode_list, Geospatial_Coordinates, on="Postal Code")

coord_list = coord_list.drop("Postal Code", axis=1)

coord_list.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
