#### Installing and Loading Libraries

In [1]:
!pip install bs4

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [2]:
!pip install geocoder

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [3]:
!pip install folium 

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [4]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

import folium # map rendering library

## Web-scraping: Transforming the Wikipedia page into a pandas dataframe

#### Retrieving URL

In [5]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_data = requests.get(url).text

Parse the html data using beautiful_soup.

In [6]:
beautiful_soup = BeautifulSoup(html_data, 'html5lib')

#### Extracting tables with the Beautifulsoap package

In [7]:
tables = beautiful_soup.find_all('table')

In [8]:
len(tables)

3

In [9]:
tables

[<table cellpadding="2" cellspacing="0" rules="all" style="width:100%; border-collapse:collapse; border:1px solid #ccc;">
 
 <tbody><tr>
 <td style="width:11%; vertical-align:top; color:#ccc;">
 <p><b>M1A</b><br/><span style="font-size:85%;"><i>Not assigned</i></span>
 </p>
 </td>
 <td style="width:11%; vertical-align:top; color:#ccc;">
 <p><b>M2A</b><br/><span style="font-size:85%;"><i>Not assigned</i></span>
 </p>
 </td>
 <td style="width:11%; vertical-align:top;">
 <p><b>M3A</b><br/><span style="font-size:85%;"><a href="/wiki/North_York" title="North York">North York</a><br/>(<a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>)</span>
 </p>
 </td>
 <td style="width:11%; vertical-align:top;">
 <p><b>M4A</b><br/><span style="font-size:85%;"><a href="/wiki/North_York" title="North York">North York</a><br/>(<a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>)</span>
 </p>
 </td>
 <td style="width:11%; vertical-align:top;">
 <p><b>M5A</b><br/><span style=

In [10]:
for index,table in enumerate(tables):
    if ("M3A" in str(table)):
        table_index = index

#print(table_index)

In [11]:
print(table_index)

0


In [12]:
print(tables[table_index].prettify())

<table cellpadding="2" cellspacing="0" rules="all" style="width:100%; border-collapse:collapse; border:1px solid #ccc;">
 <tbody>
  <tr>
   <td style="width:11%; vertical-align:top; color:#ccc;">
    <p>
     <b>
      M1A
     </b>
     <br/>
     <span style="font-size:85%;">
      <i>
       Not assigned
      </i>
     </span>
    </p>
   </td>
   <td style="width:11%; vertical-align:top; color:#ccc;">
    <p>
     <b>
      M2A
     </b>
     <br/>
     <span style="font-size:85%;">
      <i>
       Not assigned
      </i>
     </span>
    </p>
   </td>
   <td style="width:11%; vertical-align:top;">
    <p>
     <b>
      M3A
     </b>
     <br/>
     <span style="font-size:85%;">
      <a href="/wiki/North_York" title="North York">
       North York
      </a>
      <br/>
      (
      <a href="/wiki/Parkwoods" title="Parkwoods">
       Parkwoods
      </a>
      )
     </span>
    </p>
   </td>
   <td style="width:11%; vertical-align:top;">
    <p>
     <b>
      M4A
     </b>
 

#### Creating list with postal code, borough and neighborhood

In [13]:
# OLD CODE
# neighborhoods_table = pd.DataFrame(columns=["PostalCode", "Borough", "Neighborhood"])

# for row in tables[table_index].tbody.find_all("tr"):
#    col = row.find_all("td")
#    date =col[0].text
#    revenue = col[1].text.replace("$", "").replace(",", "")
#    tesla_revenue = tesla_revenue.append({"Date":date, "Revenue":revenue}, ignore_index=True)

In [14]:
table_contents=[]

for row in tables[table_index].tbody.find_all('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [15]:
print(table_contents)

[{'PostalCode': 'M3A', 'Borough': 'North York', 'Neighborhood': 'Parkwoods'}, {'PostalCode': 'M4A', 'Borough': 'North York', 'Neighborhood': 'Victoria Village'}, {'PostalCode': 'M5A', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Regent Park, Harbourfront'}, {'PostalCode': 'M6A', 'Borough': 'North York', 'Neighborhood': 'Lawrence Manor, Lawrence Heights'}, {'PostalCode': 'M7A', 'Borough': "Queen's Park", 'Neighborhood': 'Ontario Provincial Government'}, {'PostalCode': 'M9A', 'Borough': 'Etobicoke', 'Neighborhood': 'Islington Avenue'}, {'PostalCode': 'M1B', 'Borough': 'Scarborough', 'Neighborhood': 'Malvern, Rouge'}, {'PostalCode': 'M3B', 'Borough': 'North York', 'Neighborhood': 'Don Mills North'}, {'PostalCode': 'M4B', 'Borough': 'East York', 'Neighborhood': 'Parkview Hill, Woodbine Gardens'}, {'PostalCode': 'M5B', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Garden District, Ryerson'}, {'PostalCode': 'M6B', 'Borough': 'North York', 'Neighborhood': 'Glencairn'}, {'PostalCode': 'M9

#### Creating and cleaning dataframe

In [16]:
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [17]:
df.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [18]:
df[df.iloc[:, 2] == "Not assigned"]

Unnamed: 0,PostalCode,Borough,Neighborhood


In [19]:
df.shape

(103, 3)

## Adding geo-location data (latitude and longitude)

In [20]:
latitude=[]
longitude=[]
for code in df['PostalCode']:
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    #print(code, g.latlng)
    while (g.latlng is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
        print(code, g.latlng)
    latlng = g.latlng
    latitude.append(latlng[0])
    longitude.append(latlng[1])

In [21]:
df['Latitude'] = latitude
df['Longitude'] = longitude

In [22]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto Business,Enclave of M4L,43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


## Clustering boroughs

#### Accessing FourSquare information and creating dataframe with venue information (name, latitude, longitude, category)

In [23]:
# hide-cell
CLIENT_ID = '2A4NU1T32EZAHSQQJVKD5NOOARCZIPHNNRNJQYWMNKPAMINS' # your Foursquare ID
CLIENT_SECRET = 'J2PJIIOUGNSPY1O4LBBFYOEPBSID42KVP3OKWEGZIVPDJ2V2' # your Foursquare Secret
ACCESS_TOKEN = 'UUNUQJE1YRKM0FOKXND2GWI2I3NPS3PHEDXMGZ2SKRWYL2YO' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2A4NU1T32EZAHSQQJVKD5NOOARCZIPHNNRNJQYWMNKPAMINS
CLIENT_SECRET:J2PJIIOUGNSPY1O4LBBFYOEPBSID42KVP3OKWEGZIVPDJ2V2


In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

In [26]:
print(toronto_venues.shape)
toronto_venues.head()

(2246, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Parkwoods,43.75245,-79.32991,Towns On The Ravine,43.754754,-79.332552,Hotel
4,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park


In [27]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,14,14,14,14,14,14
"Alderwood, Long Branch",4,4,4,4,4,4
"Bathurst Manor, Wilson Heights, Downsview North",2,2,2,2,2,2
Bayview Village,5,5,5,5,5,5
"Bedford Park, Lawrence Manor East",18,18,18,18,18,18
...,...,...,...,...,...,...
"Willowdale, Newtonbrook",20,20,20,20,20,20
Woburn,5,5,5,5,5,5
Woodbine Heights,20,20,20,20,20,20
York Mills West,3,3,3,3,3,3


#### Creating table with dummy variables grouped by neighborhood

In [28]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

In [29]:
#toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
toronto_onehot = toronto_onehot.drop('Neighborhood', axis = 1)

In [30]:
toronto_onehot = pd.concat([toronto_venues['Neighborhood'], toronto_onehot], axis=1, join="inner")
toronto_onehot

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2241,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2242,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2243,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2244,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
toronto_onehot['Neighborhood']

0                                               Parkwoods
1                                               Parkwoods
2                                               Parkwoods
3                                               Parkwoods
4                                        Victoria Village
                              ...                        
2241    Mimico NW, The Queensway West, South of Bloor,...
2242    Mimico NW, The Queensway West, South of Bloor,...
2243    Mimico NW, The Queensway West, South of Bloor,...
2244    Mimico NW, The Queensway West, South of Bloor,...
2245    Mimico NW, The Queensway West, South of Bloor,...
Name: Neighborhood, Length: 2246, dtype: object

In [32]:
toronto_onehot.shape

(2246, 256)

In [33]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
97,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
98,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
99,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [34]:
toronto_grouped.shape

(101, 256)

#### Cluster analysis of neighborhoods

In [35]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 1, 3, 3, 3, 3, 3, 3], dtype=int32)

#### Creating a dataframe with top-10 most commong venues

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Hong Kong Restaurant,Newsagent,Shopping Mall,Shanghai Restaurant,Supermarket,Sushi Restaurant,Bakery,Badminton Court,Discount Store
1,"Alderwood, Long Branch",Convenience Store,Performing Arts Venue,Pub,Farm,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Yoga Studio
2,"Bathurst Manor, Wilson Heights, Downsview North",Men's Store,Lawyer,Yoga Studio,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market
3,Bayview Village,Trail,Construction & Landscaping,Golf Driving Range,Park,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Pharmacy,Butcher,Liquor Store,Café,Sports Club,Restaurant,Sushi Restaurant,Pub


#### Adding clustering lables to top-10 venues dataframe

In [38]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,1.0,Food & Drink Shop,Hotel,Fast Food Restaurant,Park,Fish Market,Fish & Chips Shop,Field,Flea Market,Eastern European Restaurant,Farmers Market
1,M4A,North York,Victoria Village,43.73057,-79.31306,1.0,Nail Salon,Park,Grocery Store,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Yoga Studio
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,3.0,Coffee Shop,Greek Restaurant,Pub,Bakery,Restaurant,Event Space,Italian Restaurant,Thai Restaurant,Distribution Center,Discount Store
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,3.0,Clothing Store,Restaurant,Coffee Shop,Toy / Game Store,Furniture / Home Store,Fast Food Restaurant,Bookstore,Men's Store,Café,Metro Station
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188,3.0,Coffee Shop,Burrito Place,Falafel Restaurant,Persian Restaurant,Theater,Burger Joint,Sandwich Place,Café,Sushi Restaurant,Bar


In [39]:
type(latitude)

list

## Visualizing the results - creating a map

#### Getting coordinates of Toronto (latitude, longitude)

In [40]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Creating the map

In [41]:
toronto_merged = toronto_merged.dropna()
toronto_merged = toronto_merged.reset_index(drop=True)

In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    cluster = int(cluster)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Distribution of clusters - Cluster 3 most common

In [43]:
toronto_merged['Cluster Labels'].value_counts()

3.0    76
1.0    18
2.0     3
0.0     3
4.0     1
Name: Cluster Labels, dtype: int64

#### Table with Cluster 3 venues shows common attributes such as coffee shops, restaurants and cafés

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,3.0,Coffee Shop,Greek Restaurant,Pub,Bakery,Restaurant,Event Space,Italian Restaurant,Thai Restaurant,Distribution Center,Discount Store
3,North York,3.0,Clothing Store,Restaurant,Coffee Shop,Toy / Game Store,Furniture / Home Store,Fast Food Restaurant,Bookstore,Men's Store,Café,Metro Station
4,Queen's Park,3.0,Coffee Shop,Burrito Place,Falafel Restaurant,Persian Restaurant,Theater,Burger Joint,Sandwich Place,Café,Sushi Restaurant,Bar
5,Etobicoke,3.0,Pharmacy,Café,Home Service,Park,Skating Rink,Shopping Mall,Bank,Grocery Store,Elementary School,Escape Room
8,East York,3.0,Pizza Place,Breakfast Spot,Fast Food Restaurant,Bank,Flea Market,Rock Climbing Spot,Intersection,Café,Athletics & Sports,Gym / Fitness Center
...,...,...,...,...,...,...,...,...,...,...,...,...
95,Downtown Toronto,3.0,Coffee Shop,Sandwich Place,Café,Hotel,Restaurant,Japanese Restaurant,Bank,Gym,Deli / Bodega,Asian Restaurant
97,Downtown Toronto,3.0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Gym,Grocery Store,Mediterranean Restaurant,Pub,Fast Food Restaurant
98,East Toronto Business,3.0,Coffee Shop,Café,Gym,Restaurant,Sushi Restaurant,Asian Restaurant,Hotel,Vegetarian / Vegan Restaurant,Thai Restaurant,Sandwich Place
99,Etobicoke,3.0,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Sushi Restaurant,Bank,Italian Restaurant,Flower Shop,Park,Dessert Shop,Escape Room


#### Table with Cluster 1 venues shows common attributes such as Shops, parks and event spaces

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Food & Drink Shop,Hotel,Fast Food Restaurant,Park,Fish Market,Fish & Chips Shop,Field,Flea Market,Eastern European Restaurant,Farmers Market
1,North York,1.0,Nail Salon,Park,Grocery Store,Falafel Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Yoga Studio
7,North York,1.0,Soccer Field,Gas Station,Park,Burger Joint,Yoga Studio,Farm,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
16,York,1.0,Hockey Arena,Trail,Field,Park,Grocery Store,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space
17,Etobicoke,1.0,Shopping Mall,Park,Grocery Store,Carpet Store,College Rec Center,Electronics Store,Fish & Chips Shop,Fast Food Restaurant,Field,Farmers Market
18,Scarborough,1.0,Construction & Landscaping,Park,Gym / Fitness Center,Dry Cleaner,Cupcake Shop,Dance Studio,Flea Market,Fish Market,Fish & Chips Shop,Field
22,Scarborough,1.0,Soccer Field,Park,Coffee Shop,Korean BBQ Restaurant,Business Service,Farm,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
32,Scarborough,1.0,Spa,Park,Grocery Store,Restaurant,Indian Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Fish & Chips Shop
35,East York/East Toronto,1.0,Intersection,Recreation Center,Playground,Park,Yoga Studio,Elementary School,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant
38,Scarborough,1.0,Chinese Restaurant,Park,Coffee Shop,Discount Store,Farm,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market
