# Task 1: Web scraping

In [5]:
!conda install beautifulsoup4 --y
!conda install lxml --y

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.



In [214]:
import requests
from bs4 import BeautifulSoup
import lxml
import pandas as pd
import numpy as np

In [215]:
canada_postcode_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [216]:
web_document = requests.get(canada_postcode_url)

In [217]:
soup = BeautifulSoup(web_document.text, 'lxml')

In [218]:
# Fortunate in that the first table is the table we want
# So the find function will give it to us without needing to loop
table = soup.find('table')

In [219]:
# Get Column names
column_names = []
th_tags = table.find_all('th') 
if len(th_tags) > 0 and len(column_names) == 0:
    for th in th_tags:
        column_names.append(th.get_text().strip())
        
column_names

['Postcode', 'Borough', 'Neighbourhood']

In [243]:
# Create an empty data frame using column names already scraped
df = pd.DataFrame(columns=column_names)
previous_postcode =''

# For each scraped row, append to the data frame where the postcode is unique
row_tags = table.find_all('tr')
for row in row_tags[1:]:
    row_entries = row.find_all('td')
    tmp = []
    for ii, entry in enumerate(row_entries):
        tmp.append(entry.get_text().strip()) 
    
    # If we have a borough but no neighbourhood, we make the neighbourhood = borough
    if tmp[-1]=='Not assigned' and tmp[-2] != 'Not assigned':
        tmp[-1] = tmp[-2]
        
    new_postcode = tmp[0]
    if len(df) > 0:
 
        # If the current row has a borough previously written to the data frame
        # update the neighbourhood of the row already in the data frame
        new_postcode = tmp[0]
        previous_postcode = df.iloc[-1,0]
        
        if new_postcode == previous_postcode:
            df.iloc[-1,-1] = tmp[-1] + ", " + df.iloc[-1,-1]
            
        else:
            df = df.append(pd.DataFrame([tmp], columns=df.columns))
            
    else:

        df = df.append(pd.DataFrame([tmp], columns=df.columns))
        
df.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
0,M2A,Not assigned,Not assigned
0,M3A,North York,Parkwoods
0,M4A,North York,Victoria Village
0,M5A,Downtown Toronto,"Regent Park, Harbourfront"
0,M6A,North York,"Lawrence Manor, Lawrence Heights"
0,M7A,Queen's Park,Queen's Park
0,M8A,Not assigned,Not assigned
0,M9A,Etobicoke,Islington Avenue
0,M1B,Scarborough,"Malvern, Rouge"


In [244]:
# Ignore all unassigned Boroughs
df = df[df['Borough'] != 'Not assigned']

# Clean up
df.rename(columns={'Postcode':'PostalCode', 'Neighbourhood': 'Neighborhood'}, inplace=True)
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)
df.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [222]:
df.shape

(103, 3)

# Task 2: Latitude and Longitude values

In [224]:
# Question 2
!conda install -c conda-forge geocoder --y

Solving environment: done

## Package Plan ##

  environment location: /anaconda3

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ratelim-0.1.6              |           py36_0           5 KB  conda-forge
    geocoder-1.38.1            |             py_0          52 KB  conda-forge
    conda-4.5.11               |           py36_0         625 KB  conda-forge
    certifi-2018.4.16          |           py36_0         142 KB  conda-forge
    orderedset-2.0.1           |           py36_0          74 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         899 KB

The following NEW packages will be INSTALLED:

    geocoder:   1.38.1-py_0      conda-forge
    orderedset: 2.0.1-py36_0     conda-forge
    ratelim:    0.1.6-py36_0     conda-forge

The following packages wil

In [256]:
import geocoder

In [257]:
tmp_lat_long = pd.DataFrame(columns=['Latitude', 'Longitude'])
post_codes = df['PostalCode'].values   

for ii in range(len(post_codes)):
    print(ii)
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g= geocoder.google('{}, Toronto, Ontario'.format(post_codes[ii]))
        lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    tmp_lat_long = tmp_lat_long.append(pd.DataFrame([[latitude, longitude]], columns=tmp_lat_long.columns))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102


In [265]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Queen's Park


In [279]:
tmp_lat_long.reset_index(inplace=True)
tmp_lat_long.drop('index', axis=1, inplace=True)
tmp_lat_long.head(20)

Unnamed: 0,Latitude,Longitude
0,43.753259,-79.329656
1,43.725882,-79.315572
2,43.65426,-79.360636
3,43.718518,-79.464763
4,43.662301,-79.389494
5,43.667856,-79.532242
6,43.806686,-79.194353
7,43.745906,-79.352188
8,43.706397,-79.309937
9,43.657162,-79.378937


In [281]:
df_merged = pd.concat([df, tmp_lat_long], axis=1)

In [282]:
df_merged.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Task 3: Cluster analysis
The goal is to review similarity between neighborhoods which are in boroughs containing 'Toronto' in the name

In [310]:
# Using Boroughs which have the word 'Toronto' in the name
boroughs_all = df_merged['Borough'].unique()

In [311]:
boroughs_with_toronto_in_name = boroughs_all[['Toronto' in item for item in boroughs_all]]
boroughs_with_toronto_in_name

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

In [312]:
df_toronto_in_name = df_merged[df_merged['Borough'].isin(boroughs_with_toronto_in_name)]
df_toronto_in_name.reset_index(inplace=True)
df_toronto_in_name = df_toronto_in_name.drop('index', axis=1)
df_toronto_in_name.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [315]:
# Hidden cell for credentials

In [316]:
# Function used in labs to pull down venues near a given location
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    
    VERSION = '20180605' # Foursquare API version
    LIMIT = 100

    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [317]:
#toronto_venues = getNearbyVenues(names=df_toronto_in_name['Neighborhood'],
#                                   latitudes=df_toronto_in_name['Latitude'],
#                                   longitudes=df_toronto_in_name['Longitude']
#                                  )
toronto_venues = pd.read_csv("toronto_venues.csv")
toronto_venues.drop('Unnamed: 0', axis=1, inplace=True)
toronto_venues.head()

Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, King, Adelaide
Dufferin, Dovercourt Village
Union Station, Toronto Islands, Harbourfront East
Trinity, Little Portugal
Riverdale, The Danforth West
Toronto Dominion Centre, Design Exchange
Parkdale Village, Exhibition Place, Brockton
India Bazaar, The Beaches West
Victoria Hotel, Commerce Court
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill West, Forest Hill North
The Junction South, High Park
North Toronto West
Yorkville, North Midtown, The Annex
Roncesvalles, Parkdale
Davisville
University of Toronto, Harbord
Swansea, Runnymede
Summerhill East, Moore Park
Kensington Market, Grange Park, Chinatown
Summerhill West, South Hill, Rathnelly, Forest Hill SE, Deer Park
South Niagara, Railway Lands, King and Spadina, Harbourfront West, Island airport, Bathurst Quay, CN Tower
Rosedale
Stn A PO Boxes 25 The Esplanade
St. James Town, Cabbagetown
Und

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
3,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
4,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo YMCA,43.653191,-79.357947,Gym / Fitness Center


In [320]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,55,55,55,55,55,55
Business reply mail Processing Centre969 Eastern,16,16,16,16,16,16
Central Bay Street,85,85,85,85,85,85
Christie,16,16,16,16,16,16
Church and Wellesley,86,86,86,86,86,86
Davisville,36,36,36,36,36,36
Davisville North,7,7,7,7,7,7
"Dufferin, Dovercourt Village",18,18,18,18,18,18
"Forest Hill West, Forest Hill North",4,4,4,4,4,4
"Garden District, Ryerson",100,100,100,100,100,100


In [328]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 231 uniques categories.


In [329]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
# Neighborhood was recorded as a venue so changing the name
toronto_onehot['Neighborhood_List'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood_List,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [330]:
toronto_grouped = toronto_onehot.groupby('Neighborhood_List').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood_List,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Business reply mail Processing Centre969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,...,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.0,0.011628,0.011628,0.0,0.0,0.0,0.0,0.0,0.011628,...,0.0,0.0,0.0,0.011628,0.011628,0.011628,0.0,0.0,0.011628,0.011628
5,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Dufferin, Dovercourt Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Forest Hill West, Forest Hill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0


In [331]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [332]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood_List']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood_List'] = toronto_grouped['Neighborhood_List']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood_List,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,Bakery,Steakhouse,Cheese Shop,Seafood Restaurant,Café,Farmers Market
1,Business reply mail Processing Centre969 Eastern,Park,Skate Park,Smoke Shop,Light Rail Station,Spa,Farmers Market,Fast Food Restaurant,Brewery,Burrito Place,Restaurant
2,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Bar,Burger Joint,Sushi Restaurant,Spa,Indian Restaurant,Ice Cream Shop
3,Christie,Grocery Store,Café,Park,Nightclub,Diner,Baby Store,Italian Restaurant,Convenience Store,Restaurant,Athletics & Sports
4,Church and Wellesley,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Gay Bar,Restaurant,Burger Joint,Men's Store,Café,Mediterranean Restaurant,Bubble Tea Shop
5,Davisville,Dessert Shop,Sandwich Place,Italian Restaurant,Café,Coffee Shop,Sushi Restaurant,Seafood Restaurant,Pizza Place,Diner,Burger Joint
6,Davisville North,Hotel,Sandwich Place,Park,Burger Joint,Breakfast Spot,Dance Studio,Food & Drink Shop,Yoga Studio,Doner Restaurant,Donut Shop
7,"Dufferin, Dovercourt Village",Supermarket,Bakery,Fast Food Restaurant,Pharmacy,Pizza Place,Music Venue,Middle Eastern Restaurant,Café,Discount Store,Brewery
8,"Forest Hill West, Forest Hill North",Park,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
9,"Garden District, Ryerson",Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Bar,Middle Eastern Restaurant,Pizza Place,Diner


In [372]:
from sklearn.cluster import KMeans
import folium # map rendering library
import matplotlib.cm as cm
import matplotlib.colors as colors

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood_List', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 2, 0], dtype=int32)

In [373]:
toronto_merged = df_toronto_in_name

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood_List'), on='Neighborhood')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Café,Bakery,Mexican Restaurant,Pub,Theater,Breakfast Spot,Farmers Market,Shoe Store
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Bar,Middle Eastern Restaurant,Pizza Place,Diner
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Restaurant,Café,Clothing Store,Hotel,Cosmetics Shop,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Neighborhood,Coffee Shop,Pub,Dance Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,Bakery,Steakhouse,Cheese Shop,Seafood Restaurant,Café,Farmers Market
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Bar,Burger Joint,Sushi Restaurant,Spa,Indian Restaurant,Ice Cream Shop
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Nightclub,Diner,Baby Store,Italian Restaurant,Convenience Store,Restaurant,Athletics & Sports
7,M5H,Downtown Toronto,"Richmond, King, Adelaide",43.650571,-79.384568,0,Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Bar,Bakery,Restaurant,Hotel,Burger Joint
8,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,2,Supermarket,Bakery,Fast Food Restaurant,Pharmacy,Pizza Place,Music Venue,Middle Eastern Restaurant,Café,Discount Store,Brewery
9,M5J,Downtown Toronto,"Union Station, Toronto Islands, Harbourfront East",43.640816,-79.381752,0,Coffee Shop,Hotel,Café,Pizza Place,Aquarium,Scenic Lookout,Sports Bar,Italian Restaurant,Brewery,Fried Chicken Joint


In [402]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11, width=800, height=600)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
# Saving just in case map does not render in github
map_clusters.save("toronto_clusers.html")
map_clusters       

In [375]:
print("Cluster 0")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, 
toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Cluster 0


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",0,Coffee Shop,Park,Café,Bakery,Mexican Restaurant,Pub,Theater,Breakfast Spot,Farmers Market,Shoe Store
1,"Garden District, Ryerson",0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant,Italian Restaurant,Bar,Middle Eastern Restaurant,Pizza Place,Diner
2,St. James Town,0,Coffee Shop,Restaurant,Café,Clothing Store,Hotel,Cosmetics Shop,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant
3,The Beaches,0,Neighborhood,Coffee Shop,Pub,Dance Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
4,Berczy Park,0,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,Bakery,Steakhouse,Cheese Shop,Seafood Restaurant,Café,Farmers Market
5,Central Bay Street,0,Coffee Shop,Italian Restaurant,Café,Japanese Restaurant,Bar,Burger Joint,Sushi Restaurant,Spa,Indian Restaurant,Ice Cream Shop
6,Christie,0,Grocery Store,Café,Park,Nightclub,Diner,Baby Store,Italian Restaurant,Convenience Store,Restaurant,Athletics & Sports
7,"Richmond, King, Adelaide",0,Coffee Shop,Café,Steakhouse,Thai Restaurant,American Restaurant,Bar,Bakery,Restaurant,Hotel,Burger Joint
9,"Union Station, Toronto Islands, Harbourfront East",0,Coffee Shop,Hotel,Café,Pizza Place,Aquarium,Scenic Lookout,Sports Bar,Italian Restaurant,Brewery,Fried Chicken Joint
10,"Trinity, Little Portugal",0,Bar,Café,Coffee Shop,Restaurant,Pizza Place,Asian Restaurant,French Restaurant,Cocktail Bar,Vietnamese Restaurant,Bakery


In [376]:
print("Cluster 1")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, 
toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Cluster 1


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,"Forest Hill West, Forest Hill North",1,Park,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


In [377]:
print("Cluster 2")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, 
toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Cluster 2


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Dufferin, Dovercourt Village",2,Supermarket,Bakery,Fast Food Restaurant,Pharmacy,Pizza Place,Music Venue,Middle Eastern Restaurant,Café,Discount Store,Brewery
19,Davisville North,2,Hotel,Sandwich Place,Park,Burger Joint,Breakfast Spot,Dance Studio,Food & Drink Shop,Yoga Studio,Doner Restaurant,Donut Shop


In [378]:
print("Cluster 3")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, 
toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Cluster 3


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,"Toronto Dominion Centre, Design Exchange",3,Coffee Shop,Hotel,Café,American Restaurant,Deli / Bodega,Gastropub,Italian Restaurant,Restaurant,Sports Bar,Bakery


In [379]:
print("Cluster 4")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, 
toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Cluster 4


Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,"University of Toronto, Harbord",4,Café,Bookstore,Restaurant,Japanese Restaurant,Bar,Italian Restaurant,Bakery,Coffee Shop,Sandwich Place,Nightclub
