In [9]:
# For getting Wikipedia page from URL through get request
import requests

# For scraping information from the HTML source
!pip install bs4
from bs4 import BeautifulSoup

# To create the DataFrame for neighborhood data
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# For handling arrays and vectors
import numpy as np

# Supress warnings
import warnings
warnings.filterwarnings('ignore')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: bs4
  Building wheel for bs4 (setup.py) ... [?25l[?25hdone
  Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1270 sha256=39f6da0e42e0f9b0aa7db5c7a85916dcb7323f68bf66034b7d42a74c760c57bb
  Stored in directory: /root/.cache/pip/wheels/73/2b/cb/099980278a0c9a3e57ff1a89875ec07bfa0b6fcbebb9a8cad3
Successfully built bs4
Installing collected packages: bs4
Successfully installed bs4-0.0.1


In [29]:
# Obtener la fuente HTML
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

# lista vacía para contener información
table_contents = []

# Revisión de información
table = soup.find('table')
# Revisión de información (filas)
for row in table.findAll('td'):
    # Almacenar info en el df
    cell = {}
    # Ignorar las filas si el municipio no está asignado
    if row.span.text=='Not assigned':
        pass
    else:
        # Añadir el PostalCode, Borough y Neighborhood al registro
        cell['Postal Code'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# Crear un df con pandas
toronto_DF = pd.DataFrame(table_contents)

# Reemplazar el barrio con un nombre apropiado
toronto_DF['Borough'] = toronto_DF['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto', 
                                                     'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business', 
                                                     'EtobicokeNorthwest':'Etobicoke Northwest',
                                                     'East YorkEast Toronto':'East York/East Toronto', 
                                                     'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

# Cantidad de vecindarios en Toronto
print('There are {} neighborhoods in Toronto, Ontario.\n'.format(toronto_DF.shape[0]))

# Primeros 10 vecindarios
toronto_DF.head(10)

There are 103 neighborhoods in Toronto, Ontario.



Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [30]:
# Obtener la data geoespacial mediante el archivo CSV
geospatial_data = pd.read_csv('/content/Geospatial_Coordinates.csv')

# Unir la data geoespacial (latitud y longitud) por vecindario
toronto_DF = toronto_DF.join(geospatial_data.set_index('Postal Code'), on='Postal Code')

# Validar el df resultante
toronto_DF.head(10)



Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [32]:
#Filtrar unicamente para la zona de Toronto
toronto_filtered = toronto_DF[toronto_DF['Borough'].str.contains('Toronto')]



In [33]:
import folium
from folium.plugins import MarkerCluster

# Crear un mapa centrado en Toronto
toronto_map = folium.Map(location=[toronto_DF['Latitude'].mean(), toronto_DF['Longitude'].mean()], zoom_start=11)

# Crear un clúster de marcadores para los vecindarios
marker_cluster = MarkerCluster().add_to(toronto_map)

# Agregar marcadores para cada vecindario
for lat, lng, label in zip(toronto_filtered['Latitude'], toronto_filtered['Longitude'], toronto_filtered['Neighborhood']):
    folium.Marker(location=[lat, lng], popup=label).add_to(marker_cluster)

# Mostrar el mapa
toronto_map
