#### Importing libraries

In [23]:
import pandas as pd
import requests 
from bs4 import BeautifulSoup
import geocoder
import folium
from geopy.geocoders import Nominatim

import random 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline 
from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

import matplotlib.cm as cm
import matplotlib.colors as colors

#### Scraping data from the website

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html_data = requests.get(url).content
soup = BeautifulSoup(html_data,"html5lib")
t = soup.title

In [3]:
tables = soup.find("table")

#### Creating a data frame with the required information

In [4]:
table_contents = []

In [5]:
for row in tables.findAll('td'):
    cell = {}
    if 'Not assigned' in row.span.text:
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [6]:
df = pd.DataFrame(table_contents)
df.sort_values(by = 'PostalCode', inplace = True)

In [7]:
geo_data = pd.read_csv('/Users/test/Downloads/Geospatial_Coordinates.csv')

In [8]:
df['Latitude'] = geo_data['Latitude']
df['Longitude'] = geo_data['Longitude']

In [9]:
df.sort_index(inplace = True)

In [11]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="on_explorer")
location = geolocator.geocode(address)
tor_lat = location.latitude
tor_long = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(tor_lat, tor_long))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [13]:
map_toronto = folium.Map(location=[tor_lat, tor_long], zoom_start=12)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [14]:
toronto_data = df[df['Borough'].str.contains('Toronto')].reset_index(drop=True)

In [17]:
map_borough = folium.Map(location=[tor_lat, tor_long], zoom_start=12)

# add markers to map
for lat, lng, bor, label in zip(toronto_data['Latitude'], toronto_data['Longitude'],toronto_data['Borough'],toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(map_borough)  
    
map_borough

In [18]:
tor_grouped = toronto_data.groupby('Neighborhood').mean().reset_index()

In [20]:
kclusters = 5

tor_cluster = tor_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 1, 1, 4, 0, 3, 1, 3, 1, 3], dtype=int32)