# Neighborhods of Toronto

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.3

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Libraries imported.


In [3]:

pip install beautifulsoup4


Note: you may need to restart the kernel to use updated packages.


In [4]:
from bs4 import BeautifulSoup
 
 
req = requests.get('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=890001695')
soup = BeautifulSoup(req.text, "html.parser")

table=soup.find('table', attrs={'class':'wikitable sortable'})

#get headers:
headers=table.findAll('th')
for i, head in enumerate(headers): headers[i]=str(headers[i]).replace("<th>","").replace("</th>","").replace("\n","")

#Para buscar todos los tr sin encabezado
rows=table.findAll('tr')
rows=rows[1:len(rows)]

In [5]:
headers

['Postcode', 'Borough', 'Neighbourhood']

In [6]:
# Quitar los tags
for i, row in enumerate(rows): rows[i] = str(rows[i]).replace("\n</td></tr>","").replace("<tr>\n<td>","")

In [7]:


# dataframe
df_canada=pd.DataFrame(rows)
df_canada[headers] = df_canada[0].str.split("</td>\n<td>", n = 2, expand = True) 
df_canada.drop(columns=[0],inplace=True)



In [8]:
df_canada.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Parkwoods"" title=""Parkwoods"">Pa..."
3,M4A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Victoria_Village"" title=""Victor..."
4,M5A,"<a href=""/wiki/Downtown_Toronto"" title=""Downto...","<a href=""/wiki/Harbourfront_(Toronto)"" title=""..."


In [9]:
df_canada = df_canada.drop(df_canada[(df_canada.Borough == "Not assigned")].index)
df_canada.Neighbourhood.replace("Not assigned", df_canada.Borough, inplace=True)

df_canada.Neighbourhood.fillna(df_canada.Borough, inplace=True)
df_canada=df_canada.drop_duplicates()

df_canada.update(
    df_canada.Neighbourhood.loc[
        lambda x: x.str.contains('title')
    ].str.extract('title=\"([^\"]*)',expand=False))

df_canada.update(
    df_canada.Borough.loc[
        lambda x: x.str.contains('title')
    ].str.extract('title=\"([^\"]*)',expand=False))

df_canada.update(
    df_canada.Neighbourhood.loc[
        lambda x: x.str.contains('Toronto')
    ].str.replace(", Toronto",""))
df_canada.update(
    df_canada.Neighbourhood.loc[
        lambda x: x.str.contains('Toronto')
    ].str.replace("\(Toronto\)",""))

df_toronto = pd.DataFrame({'Postcode':df_canada.Postcode.unique()})
df_toronto['Borough']=pd.DataFrame(list(set(df_canada['Borough'].loc[df_canada['Postcode'] == x['Postcode']])) for i, x in df_toronto.iterrows())
df_toronto['Neighborhood']=pd.Series(list(set(df_canada['Neighbourhood'].loc[df_canada['Postcode'] == x['Postcode']])) for i, x in df_toronto.iterrows())
df_toronto['Neighborhood']=df_toronto['Neighborhood'].apply(lambda x: ', '.join(x))
df_toronto.dtypes

df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront , Regent Park"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park (Toronto),Queen's Park


In [10]:
dfll= pd.read_csv("http://cocl.us/Geospatial_data")
dfll.rename(columns={'Postal Code':'Postcode'}, inplace=True)
dfll.set_index("Postcode")
df_toronto.set_index("Postcode")
toronto_data=pd.merge(df_toronto, dfll)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront , Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park (Toronto),Queen's Park,43.662301,-79.389494


In [11]:


address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, ON, Canada are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto, ON, Canada are 43.653963, -79.387207.


In [14]:


map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

map_toronto



In [15]:

# Marcas en mapa
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
map_toronto