In [1]:
from bs4 import BeautifulSoup as BSp
import lxml
#import html5lib alternative to lxml
import requests
import pandas as pd

### 1. Get text from wikipedia website for scraping

In [2]:
#from html file for offline work
#with open('Cantest.html') as file:
#    soup = BSp(file,'lxml')


#from URL
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(URL).text
soup = BSp(source, 'lxml')
#print(soup.prettify())

### 2. Scraping
- find table
- extract all rows
- produce a clean list through spliting and slicing

In [3]:
#isolating table
table = soup.find('table')
rows = table.tbody.text
rows

"\nPostcode\nBorough\nNeighbourhood\n\n\nM1A\nNot assigned\nNot assigned\n\n\nM2A\nNot assigned\nNot assigned\n\n\nM3A\nNorth York\nParkwoods\n\n\nM4A\nNorth York\nVictoria Village\n\n\nM5A\nDowntown Toronto\nHarbourfront\n\n\nM5A\nDowntown Toronto\nRegent Park\n\n\nM6A\nNorth York\nLawrence Heights\n\n\nM6A\nNorth York\nLawrence Manor\n\n\nM7A\nQueen's Park\nNot assigned\n\n\nM8A\nNot assigned\nNot assigned\n\n\nM9A\nEtobicoke\nIslington Avenue\n\n\nM1B\nScarborough\nRouge\n\n\nM1B\nScarborough\nMalvern\n\n\nM2B\nNot assigned\nNot assigned\n\n\nM3B\nNorth York\nDon Mills North\n\n\nM4B\nEast York\nWoodbine Gardens\n\n\nM4B\nEast York\nParkview Hill\n\n\nM5B\nDowntown Toronto\nRyerson\n\n\nM5B\nDowntown Toronto\nGarden District\n\n\nM6B\nNorth York\nGlencairn\n\n\nM7B\nNot assigned\nNot assigned\n\n\nM8B\nNot assigned\nNot assigned\n\n\nM9B\nEtobicoke\nCloverdale\n\n\nM9B\nEtobicoke\nIslington\n\n\nM9B\nEtobicoke\nMartin Grove\n\n\nM9B\nEtobicoke\nPrincess Gardens\n\n\nM9B\nEtobicoke\n

In [4]:
# create list by spliting the string
rowList = rows.split('\n')

# clean the list
while '' in rowList:
    rowList.remove('')

#remove header
rowList = rowList[3:]
rowList

['M1A',
 'Not assigned',
 'Not assigned',
 'M2A',
 'Not assigned',
 'Not assigned',
 'M3A',
 'North York',
 'Parkwoods',
 'M4A',
 'North York',
 'Victoria Village',
 'M5A',
 'Downtown Toronto',
 'Harbourfront',
 'M5A',
 'Downtown Toronto',
 'Regent Park',
 'M6A',
 'North York',
 'Lawrence Heights',
 'M6A',
 'North York',
 'Lawrence Manor',
 'M7A',
 "Queen's Park",
 'Not assigned',
 'M8A',
 'Not assigned',
 'Not assigned',
 'M9A',
 'Etobicoke',
 'Islington Avenue',
 'M1B',
 'Scarborough',
 'Rouge',
 'M1B',
 'Scarborough',
 'Malvern',
 'M2B',
 'Not assigned',
 'Not assigned',
 'M3B',
 'North York',
 'Don Mills North',
 'M4B',
 'East York',
 'Woodbine Gardens',
 'M4B',
 'East York',
 'Parkview Hill',
 'M5B',
 'Downtown Toronto',
 'Ryerson',
 'M5B',
 'Downtown Toronto',
 'Garden District',
 'M6B',
 'North York',
 'Glencairn',
 'M7B',
 'Not assigned',
 'Not assigned',
 'M8B',
 'Not assigned',
 'Not assigned',
 'M9B',
 'Etobicoke',
 'Cloverdale',
 'M9B',
 'Etobicoke',
 'Islington',
 'M9B',
 

### 3. produce dataframe

In [5]:
#create list for later columns

postcode = rowList[0::3]
Borough = rowList[1::3]
Neighb = rowList[2::3]

#create a dictionary and from that the dataframe
dfDict = {'Postcode':postcode,'Borough':Borough,'Neighborhood':Neighb}
df = pd.DataFrame.from_dict(dfDict)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
#remove "Not assigned" Boroughs
df = df[df.Borough != "Not assigned"]

#join Neighborhoods by postcode
df = df.groupby(['Postcode', 'Borough'], as_index = False)['Neighborhood'].agg(list)
df['Neighborhood'] = df['Neighborhood'].str.join(', ')

#replace "Not assigned" Neighborhood with Borough
dfTest = df.copy()
dfTest.iloc[:, 1:3] = dfTest.iloc[:, 1:3].where(df['Neighborhood'] != "Not assigned", df['Borough'], axis = 0)
df = dfTest
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [7]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


# Part 2: fetching coordinates

used the csv file, because geocoder didn't finish for eternity

In [8]:
#load csv file with coordinates
dfCoords = pd.read_csv('Geospatial_Coordinates.csv')
dfCoords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
#join dataframes and drop additional Postal Codes column
dfNew = pd.concat([df, dfCoords], axis = 1)
dfFull = dfNew.drop(["Postal Code"], axis = 1)
dfFull.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part 3: Foursquare exloring

In [10]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [11]:
CLIENT_ID = 'IHZMA...LQDY5W' # your Foursquare ID
CLIENT_SECRET = 'BYGY...4B1CM2' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

In [12]:
# create map of Toronto
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dfFull['Latitude'], dfFull['Longitude'], dfFull['Borough'], dfFull['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### Get all the venues for all Boroughs

In [13]:
# define a function to retrieve all nearby venues

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
# create a dataframe using the function

Toronto_venues = getNearbyVenues(names=dfFull['Borough'].unique(),
                                   latitudes=dfFull['Latitude'],
                                   longitudes=dfFull['Longitude']
                                  )

print(Toronto_venues.shape)
Toronto_venues.head()

Scarborough
North York
East York
East Toronto
Central Toronto
Downtown Toronto
York
West Toronto
Queen's Park
Mississauga
Etobicoke
(49, 7)


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Scarborough,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,North York,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,North York,43.784535,-79.160497,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
3,East York,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,East York,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


#### Analyze each neighborhood employing one hot encoding

In [15]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Borough'] = Toronto_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()
# Toronto_onehot.shape

Unnamed: 0,Borough,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Convenience Store,Department Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Hakka Restaurant,Hobby Shop,Indian Restaurant,Intersection,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Motel,Moving Target,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Skating Rink,Soccer Field,Thai Restaurant,Vietnamese Restaurant
0,Scarborough,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,North York,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,North York,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,East York,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,East York,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
Toronto_grouped = Toronto_onehot.groupby('Borough').mean().reset_index()
Toronto_grouped

Unnamed: 0,Borough,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Convenience Store,Department Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Hakka Restaurant,Hobby Shop,Indian Restaurant,Intersection,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Motel,Moving Target,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Skating Rink,Soccer Field,Thai Restaurant,Vietnamese Restaurant
0,Central Toronto,0.0,0.125,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
1,Downtown Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,East York,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
5,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
6,North York,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Queen's Park,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Scarborough,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,West Toronto,0.0,0.0,0.111111,0.0,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0


In [17]:
# check the top venues in each Borough

num_top_venues = 5

for hood in Toronto_grouped['Borough']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
                venue  freq
0              Lounge  0.12
1              Bakery  0.12
2                Bank  0.12
3     Thai Restaurant  0.12
4  Athletics & Sports  0.12


----Downtown Toronto----
                 venue  freq
0           Playground   0.5
1    Convenience Store   0.5
2  American Restaurant   0.0
3        Moving Target   0.0
4    Korean Restaurant   0.0


----East Toronto----
                       venue  freq
0                Coffee Shop  0.50
1          Korean Restaurant  0.25
2                   Pharmacy  0.25
3        American Restaurant  0.00
4  Latin American Restaurant  0.00


----East York----
                 venue  freq
0       Medical Center  0.14
1       Breakfast Spot  0.14
2  Rental Car Location  0.14
3          Pizza Place  0.14
4    Electronics Store  0.14


----Etobicoke----
                       venue  freq
0          Indian Restaurant  0.33
1      Vietnamese Restaurant  0.17
2                  Pet Store  0.17
3  Latin American Re

#### create a new dataframe for clustering k-means

In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
boroughs_venues_sorted = pd.DataFrame(columns=columns)
boroughs_venues_sorted['Borough'] = Toronto_grouped['Borough']

for ind in np.arange(Toronto_grouped.shape[0]):
    boroughs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

boroughs_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Thai Restaurant,Fried Chicken Joint,Caribbean Restaurant,Lounge,College Stadium,Fast Food Restaurant
1,Downtown Toronto,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
2,East Toronto,Coffee Shop,Korean Restaurant,Pharmacy,Vietnamese Restaurant,Caribbean Restaurant,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium
3,East York,Medical Center,Electronics Store,Rental Car Location,Pizza Place,Intersection,Breakfast Spot,Mexican Restaurant,Vietnamese Restaurant,Department Store,Convenience Store
4,Etobicoke,Indian Restaurant,Vietnamese Restaurant,Chinese Restaurant,Latin American Restaurant,Pet Store,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium


#### Cluster the Boroughs

In [19]:
# set number of clusters
kclusters = 5
Toronto_grouped_clustering = Toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
boroughs_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
Toronto_merged = dfFull

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(boroughs_venues_sorted.set_index('Borough'), on='Borough')
Toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant


### Create a map to see the clusters

In [20]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Borough'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Let's see what the clusters comprise
#### Cluster 1

In [21]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,East York,0,Medical Center,Electronics Store,Rental Car Location,Pizza Place,Intersection,Breakfast Spot,Mexican Restaurant,Vietnamese Restaurant,Department Store,Convenience Store
36,East York,0,Medical Center,Electronics Store,Rental Car Location,Pizza Place,Intersection,Breakfast Spot,Mexican Restaurant,Vietnamese Restaurant,Department Store,Convenience Store
37,East Toronto,0,Coffee Shop,Korean Restaurant,Pharmacy,Vietnamese Restaurant,Caribbean Restaurant,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium
38,East York,0,Medical Center,Electronics Store,Rental Car Location,Pizza Place,Intersection,Breakfast Spot,Mexican Restaurant,Vietnamese Restaurant,Department Store,Convenience Store
39,East York,0,Medical Center,Electronics Store,Rental Car Location,Pizza Place,Intersection,Breakfast Spot,Mexican Restaurant,Vietnamese Restaurant,Department Store,Convenience Store
40,East York,0,Medical Center,Electronics Store,Rental Car Location,Pizza Place,Intersection,Breakfast Spot,Mexican Restaurant,Vietnamese Restaurant,Department Store,Convenience Store
41,East Toronto,0,Coffee Shop,Korean Restaurant,Pharmacy,Vietnamese Restaurant,Caribbean Restaurant,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium
42,East Toronto,0,Coffee Shop,Korean Restaurant,Pharmacy,Vietnamese Restaurant,Caribbean Restaurant,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium
43,East Toronto,0,Coffee Shop,Korean Restaurant,Pharmacy,Vietnamese Restaurant,Caribbean Restaurant,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium
44,Central Toronto,0,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Thai Restaurant,Fried Chicken Joint,Caribbean Restaurant,Lounge,College Stadium,Fast Food Restaurant


Most of Toronto, but pretty different 1st most venues. Might be improvable.

#### Cluster 2

In [22]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
51,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
52,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
53,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
54,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
55,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
56,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
57,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
58,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop
59,Downtown Toronto,1,Playground,Convenience Store,Vietnamese Restaurant,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,College Stadium,Coffee Shop


#### Cluster 3

In [23]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
1,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
2,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
3,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
4,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
5,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
6,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
7,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
8,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant
9,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Fried Chicken Joint,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant


#### Cluster 4

In [24]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,Queen's Park,3,American Restaurant,Motel,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop,Chinese Restaurant


#### Cluster 5

In [25]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
18,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
19,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
20,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
21,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
22,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
23,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
24,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
25,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
26,North York,4,Moving Target,Bar,Caribbean Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Department Store,Convenience Store,College Stadium,Coffee Shop
