# PART 1 - Creation of the Data Frame

### BeautifulSoup - Scraping the table from the website

In [47]:
import requests
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None

import matplotlib.pyplot
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_content = requests.get(url).text

### Definition of the html content

In [3]:
soup = BeautifulSoup(html_content, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"91040cad-018f-48bc-a488-21fce7d2c332","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":967921175,"wgRevisionId":967921175,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toron

### Location of class ‘wikitable sortable’ in the HTML script

In [4]:
my_table = soup.find('table',{'class':'wikitable sortable'})
value=[]
tr = my_table.findAll(['tr'])
my_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighborhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td>

### Storing the data in a .csv format

In [48]:
import csv

csvfile = open("canada.csv",'wt',newline='',encoding='utf-8')
writer = csv.writer(csvfile)  
try:   
        for cell in tr:
            th = cell.find_all('th')
            th_data = [col.text.strip('\n') for col in th]
            td = cell.find_all('td')
            row = [i.text.replace('\n','') for i in td]
            writer.writerow(th_data+row)      
        
finally:   
    csvfile.close()

### Loading the .csv file

In [6]:
df = pd.read_csv("/Users/Sam/Desktop/Projects/Coursera_Capstone/canada.csv")
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Dropping all cells in 'Borough' that contain 'Not assigned'

In [7]:
indexName = df[df['Borough'] == 'Not assigned'].index
df.drop(indexName, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Reset the index and remane & sort the 'Postal Code' column 

In [8]:
df.reset_index(inplace = True, drop = True)
df.rename(columns={'Postal Code':'PostalCode'}, inplace = True)
df.sort_values(by=['PostalCode'])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Dataframe's shape

In [9]:
df.shape

(103, 3)

# PART 2 - Latitude and longitude coordinates of each neighborhood

### Installing geocoder in order to get the coordinates of each neighborhood

In [10]:
!pip install geocoder
import geocoder



### Recalling the DataFrame

In [11]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Looking for the coordinates and trying if it works 

In [12]:
def get_latilong(postal_code):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lati_long_coords = g.latlng
    return lati_long_coords

get_latilong('M5G')

[43.65609000000006, -79.38492999999994]

### Applying the code to the whole DataFrame

In [13]:
postal_codes = df['PostalCode']    
coords = [ get_latilong(postal_code) for postal_code in postal_codes.tolist() ]

df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']

In [14]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75188,-79.33036
1,M4A,North York,Victoria Village,43.73042,-79.31282
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65514,-79.36265
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72321,-79.45141
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66449,-79.39302


# PART 3 - Creating a map

In [15]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 

Collecting package metadata (current_repodata.json): done
Solving environment: | 
  - anaconda/osx-64::ca-certificates-2020.1.1-0, anaconda/osx-64::openssl-1.1.1d-h1de35cc_4
  - anaconda/osx-64::openssl-1.1.1d-h1de35cc_4, defaults/osx-64::ca-certificates-2020.1.1-0
  - anaconda/osx-64::ca-certificates-2020.1.1-0, defaults/osx-64::openssl-1.1.1d-h1de35cc_4
  - defaults/osx-64::ca-certificates-2020.1.1-0, defaults/osx-64::openssl-1.1.1d-h1de35ccdone

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: - 
  - anaconda/osx-64::ca-certificates-2020.1.1-0, anaconda/osx-64::openssl-1.1.1d-h1de35cc_4
  - anaconda/osx-64::ca-certificates-2020.1.1-0, defaults/osx-64::openssl-1.1.1d-h1de35cc_4
  - anaconda/osx-64::openssl-1.1.1d-h1de35cc_4, defaults/osx-64::ca-certificates-2020.1.1-0
  - defaults/osx-64::ca-certificates-2020.1.1-0, defaults/osx-64::openssl-1.1.1d-h1de35ccdone

# All requested packages already installed.



### Looking for Toronto's coordinates

In [16]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Canada_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Visualising Toronto's map

In [17]:
map_toronto = folium.Map(location=[43.6534817, -79.3839347])
map_toronto

### Visualising Toronto's neighborhoods on the map

In [18]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#cfbf19',
        fill=True,
        fill_color='#edda09',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Exploring and clustering the neighborhoods 

*I chose to explore Downtown Toronto*

In [49]:
downtown = df.loc[df['Borough'] == 'Downtown Toronto']
downtown

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65514,-79.36265
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66449,-79.39302
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.37818
15,M5C,Downtown Toronto,St. James Town,43.65143,-79.37557
20,M5E,Downtown Toronto,Berczy Park,43.64531,-79.37368
24,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493
25,M6G,Downtown Toronto,Christie,43.66878,-79.42071
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258
36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.64285,-79.3804
42,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.6471,-79.38153


In [20]:
address = 'Downtown Toronto, Canada'

geolocator = Nominatim(user_agent="Downtown_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6563221, -79.3809161.


### Creating a map of Downtown Toronto and its neighborhoods

In [21]:

map_downtown = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(downtown['Latitude'], downtown['Longitude'], downtown['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

### Utilizing the Foursquare API to explore the neighborhoods and segment them

In [22]:
CLIENT_ID = 'V5WOYHS1O4OLSZUUFHCWGP0ALRYU42M3ERDG0MK1K2BPJPTM'
CLIENT_SECRET = 'TZRFTNUMUKOZC2QXOXOYOA2KGPGJ4OPLVPTYCEU1UXOQOO1E'
VERSION = '20180605'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: V5WOYHS1O4OLSZUUFHCWGP0ALRYU42M3ERDG0MK1K2BPJPTM
CLIENT_SECRET:TZRFTNUMUKOZC2QXOXOYOA2KGPGJ4OPLVPTYCEU1UXOQOO1E


In [24]:
def getNearbyVenues(names, latitudes,longitudes, radius=500, limit=100):
    
    venues_list=[]
    for name, lat, lng in zip(names,latitudes,longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Writing the code to run the above function on each neighborhood and create a new dataframe called downtown_venues

In [50]:
downtown_venues = getNearbyVenues(names=downtown['Neighborhood'],
                                   latitudes=downtown['Latitude'],
                                   longitudes=downtown['Longitude'],
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


### Checking the size of the resulting dataframe

In [51]:
print(downtown_venues.shape)
downtown_venues.head()

(1237, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65514,-79.36265,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65514,-79.36265,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65514,-79.36265,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,"Regent Park, Harbourfront",43.65514,-79.36265,The Yoga Lounge,43.655515,-79.364955,Yoga Studio
4,"Regent Park, Harbourfront",43.65514,-79.36265,Body Blitz Spa East,43.654735,-79.359874,Spa


### Checking how many venues were returned for each neighborhood

In [52]:
downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,67,67,67,67,67,67
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",70,70,70,70,70,70
Central Bay Street,68,68,68,68,68,68
Christie,10,10,10,10,10,10
Church and Wellesley,73,73,73,73,73,73
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East, Union Station, Toronto Islands",51,51,51,51,51,51
"Kensington Market, Chinatown, Grange Park",49,49,49,49,49,49


### Unique categories 

In [28]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 191 uniques categories.


In [29]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot


Unnamed: 0,Yoga Studio,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Baby Store,Bagel Shop,...,Theme Restaurant,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1232,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1233,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1234,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1235,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()

### Printing each neighborhood along with the top 5 most common venues

In [31]:
top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(top_venues))
    print('\n')

----Berczy Park----
            venue  freq
0     Coffee Shop  0.07
1    Cocktail Bar  0.04
2      Restaurant  0.04
3            Café  0.03
4  Farmers Market  0.03


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0         Coffee Shop  0.09
1  Italian Restaurant  0.07
2                 Bar  0.04
3                Café  0.04
4           Speakeasy  0.03


----Central Bay Street----
                       venue  freq
0                Coffee Shop  0.10
1             Clothing Store  0.09
2                      Plaza  0.03
3  Middle Eastern Restaurant  0.03
4             Cosmetics Shop  0.03


----Christie----
                venue  freq
0                Café   0.3
1       Grocery Store   0.2
2          Playground   0.1
3  Italian Restaurant   0.1
4         Candy Store   0.1


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.12
1  Japanese Restaurant  0.05
2      

### Putting these information into a dataframe and display the top 10 venues for each neighborhood

In [55]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [56]:
top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Cheese Shop,Pharmacy,Seafood Restaurant,Café,Beer Bar,Hotel,Farmers Market
1,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Italian Restaurant,Bar,Café,Restaurant,Speakeasy,Gym / Fitness Center,Pizza Place,Electronics Store,Park
2,Central Bay Street,Coffee Shop,Clothing Store,Hotel,Plaza,Bubble Tea Shop,Electronics Store,Cosmetics Shop,Sandwich Place,Middle Eastern Restaurant,Pizza Place
3,Christie,Café,Grocery Store,Coffee Shop,Playground,Candy Store,Baby Store,Italian Restaurant,Donut Shop,Farm,Falafel Restaurant
4,Church and Wellesley,Coffee Shop,Restaurant,Japanese Restaurant,Sushi Restaurant,Gay Bar,Pub,Café,Hotel,Dance Studio,Men's Store
5,"Commerce Court, Victoria Hotel",Coffee Shop,Restaurant,Hotel,Café,American Restaurant,Japanese Restaurant,Gym,Italian Restaurant,Seafood Restaurant,Deli / Bodega
6,"First Canadian Place, Underground city",Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gym,Deli / Bodega,Japanese Restaurant,Asian Restaurant,Seafood Restaurant
7,"Garden District, Ryerson",Coffee Shop,Clothing Store,Hotel,Italian Restaurant,Japanese Restaurant,Café,Cosmetics Shop,Fast Food Restaurant,Bubble Tea Shop,Ramen Restaurant
8,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Hotel,Japanese Restaurant,Plaza,Park,Deli / Bodega,Boat or Ferry,Ice Cream Shop,Lake,Electronics Store
9,"Kensington Market, Chinatown, Grange Park",Café,Coffee Shop,Mexican Restaurant,Burger Joint,Gaming Cafe,Bakery,Pizza Place,Park,Vegetarian / Vegan Restaurant,Grocery Store


### Clustering Neighborhoods

In [34]:
from sklearn.cluster import KMeans

kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 3, 1, 4, 4, 1, 4, 1], dtype=int32)

In [65]:
#Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
downtown_merged = downtown

# add clustering labels
downtown_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')


downtown_merged # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65514,-79.36265,1,Coffee Shop,Breakfast Spot,Yoga Studio,Bakery,Health Food Store,Italian Restaurant,Food Truck,Event Space,Electronics Store,Distribution Center
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66449,-79.39302,1,Coffee Shop,Café,Sandwich Place,Park,College Theater,Salon / Barbershop,Restaurant,Chinese Restaurant,Pub,Clothing Store
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.37818,1,Coffee Shop,Clothing Store,Hotel,Italian Restaurant,Japanese Restaurant,Café,Cosmetics Shop,Fast Food Restaurant,Bubble Tea Shop,Ramen Restaurant
15,M5C,Downtown Toronto,St. James Town,43.65143,-79.37557,3,Café,Seafood Restaurant,Coffee Shop,Restaurant,Clothing Store,American Restaurant,Cocktail Bar,Cosmetics Shop,Bakery,Breakfast Spot
20,M5E,Downtown Toronto,Berczy Park,43.64531,-79.37368,1,Coffee Shop,Restaurant,Cocktail Bar,Cheese Shop,Pharmacy,Seafood Restaurant,Café,Beer Bar,Hotel,Farmers Market
24,M5G,Downtown Toronto,Central Bay Street,43.65609,-79.38493,4,Coffee Shop,Clothing Store,Hotel,Plaza,Bubble Tea Shop,Electronics Store,Cosmetics Shop,Sandwich Place,Middle Eastern Restaurant,Pizza Place
25,M6G,Downtown Toronto,Christie,43.66878,-79.42071,4,Café,Grocery Store,Coffee Shop,Playground,Candy Store,Baby Store,Italian Restaurant,Donut Shop,Farm,Falafel Restaurant
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6497,-79.38258,1,Hotel,Café,Coffee Shop,Japanese Restaurant,Restaurant,Gym,American Restaurant,Steakhouse,Salad Place,Asian Restaurant
36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.64285,-79.3804,4,Coffee Shop,Hotel,Japanese Restaurant,Plaza,Park,Deli / Bodega,Boat or Ferry,Ice Cream Shop,Lake,Electronics Store
42,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.6471,-79.38153,1,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Seafood Restaurant,American Restaurant,Salad Place,Japanese Restaurant,Concert Hall


In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [66]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, 
                    downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,"University of Toronto, Harbord",0,Café,Restaurant,Coffee Shop,Bakery,Japanese Restaurant,Bookstore,Bar,Gym,French Restaurant,Comfort Food Restaurant


### Cluster 2

In [67]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, 
                    downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Regent Park, Harbourfront",1,Coffee Shop,Breakfast Spot,Yoga Studio,Bakery,Health Food Store,Italian Restaurant,Food Truck,Event Space,Electronics Store,Distribution Center
4,"Queen's Park, Ontario Provincial Government",1,Coffee Shop,Café,Sandwich Place,Park,College Theater,Salon / Barbershop,Restaurant,Chinese Restaurant,Pub,Clothing Store
9,"Garden District, Ryerson",1,Coffee Shop,Clothing Store,Hotel,Italian Restaurant,Japanese Restaurant,Café,Cosmetics Shop,Fast Food Restaurant,Bubble Tea Shop,Ramen Restaurant
20,Berczy Park,1,Coffee Shop,Restaurant,Cocktail Bar,Cheese Shop,Pharmacy,Seafood Restaurant,Café,Beer Bar,Hotel,Farmers Market
30,"Richmond, Adelaide, King",1,Hotel,Café,Coffee Shop,Japanese Restaurant,Restaurant,Gym,American Restaurant,Steakhouse,Salad Place,Asian Restaurant
42,"Toronto Dominion Centre, Design Exchange",1,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Seafood Restaurant,American Restaurant,Salad Place,Japanese Restaurant,Concert Hall
48,"Commerce Court, Victoria Hotel",1,Coffee Shop,Restaurant,Hotel,Café,American Restaurant,Japanese Restaurant,Gym,Italian Restaurant,Seafood Restaurant,Deli / Bodega
84,"Kensington Market, Chinatown, Grange Park",1,Café,Coffee Shop,Mexican Restaurant,Burger Joint,Gaming Cafe,Bakery,Pizza Place,Park,Vegetarian / Vegan Restaurant,Grocery Store
91,Rosedale,1,Park,Shop & Service,Playground,Bike Trail,Campground,Tennis Court,Women's Store,Donut Shop,Falafel Restaurant,Event Space
92,Stn A PO Boxes,1,Coffee Shop,Hotel,Restaurant,Café,Mediterranean Restaurant,Burrito Place,Salon / Barbershop,Sandwich Place,Gym,Seafood Restaurant


### Cluster 3

In [68]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, 
                    downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
87,"CN Tower, King and Spadina, Railway Lands, Har...",2,Coffee Shop,Italian Restaurant,Bar,Café,Restaurant,Speakeasy,Gym / Fitness Center,Pizza Place,Electronics Store,Park


### Cluster 4

In [69]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, 
                            downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,St. James Town,3,Café,Seafood Restaurant,Coffee Shop,Restaurant,Clothing Store,American Restaurant,Cocktail Bar,Cosmetics Shop,Bakery,Breakfast Spot


### Cluster 5

In [70]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, 
                            downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,Central Bay Street,4,Coffee Shop,Clothing Store,Hotel,Plaza,Bubble Tea Shop,Electronics Store,Cosmetics Shop,Sandwich Place,Middle Eastern Restaurant,Pizza Place
25,Christie,4,Café,Grocery Store,Coffee Shop,Playground,Candy Store,Baby Store,Italian Restaurant,Donut Shop,Farm,Falafel Restaurant
36,"Harbourfront East, Union Station, Toronto Islands",4,Coffee Shop,Hotel,Japanese Restaurant,Plaza,Park,Deli / Bodega,Boat or Ferry,Ice Cream Shop,Lake,Electronics Store
97,"First Canadian Place, Underground city",4,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gym,Deli / Bodega,Japanese Restaurant,Asian Restaurant,Seafood Restaurant
