## Segmenting and Clustering Neighborhoods in Toronto

# Part 1 of Week 3 Assignment

### Data Preparation

#### 1. Download initial Packages for data scrapping


In [5]:
## Packages
!pip install bs4
import pandas as pd
from bs4 import BeautifulSoup
import requests



### From Wikipedia Page we parse the information

In [6]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
soup = requests.get(url).text
soup = BeautifulSoup(soup,'html.parser')
## Find all table elements & Create a DF
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
neighborhood = pd.DataFrame(df[0])

In [7]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


# Part 2 of Week 3 Assignment

### Aggregate Coordinates info to the Data

In [8]:
!pip install geopy
!conda install -c conda-forge geocoder --yes 

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [9]:
import geocoder # import geocoder

latitude=[]
longitude=[]
for code in df['PostalCode']:
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    print(code, g.latlng)
    while (g.latlng is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
        print(code, g.latlng)
    latlng = g.latlng
    latitude.append(latlng[0])
    longitude.append(latlng[1])


M3A [43.75245000000007, -79.32990999999998]
M4A [43.73057000000006, -79.31305999999995]
M5A [43.65512000000007, -79.36263999999994]
M6A [43.72327000000007, -79.45041999999995]
M7A [43.66253000000006, -79.39187999999996]
M9A [43.662630000000036, -79.52830999999998]
M1B [43.811390000000074, -79.19661999999994]
M3B [43.74923000000007, -79.36185999999998]
M4B [43.70718000000005, -79.31191999999999]
M5B [43.65739000000008, -79.37803999999994]
M6B [43.70687000000004, -79.44811999999996]
M9B [43.65034000000003, -79.55361999999997]
M1C [43.78574000000003, -79.15874999999994]
M3C [43.72168000000005, -79.34351999999996]
M4C [43.68970000000007, -79.30681999999996]
M5C [43.65215000000006, -79.37586999999996]
M6C [43.69211000000007, -79.43035999999995]
M9C [43.64857000000006, -79.57824999999997]
M1E [43.765750000000025, -79.17469999999997]
M4E [43.67709000000008, -79.29546999999997]
M5E [43.64536000000004, -79.37305999999995]
M6E [43.68784000000005, -79.45045999999996]
M1G [43.76812000000007, -79.2

In [10]:
df['Latitude']=latitude
df['Longitude']=longitude
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188


# Part 3 of Week 3 Assignment

## Explore Toronto Neighborhood and Clusters

In [11]:
## Install necessary Libraries
!conda install -c conda-forge folium=0.5.0 --yes 
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [12]:
### Adquire necessary addt'l packages
import json
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library


In [13]:
### Examine initially boroughs and neighborhoods
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]))

The dataframe has 15 boroughs and 103 neighborhoods.


In [14]:
### Create a Map of Toronto Area with Data superimposed to showcase neighborhoods

### first we create the instance for Geocoder for Toronto Coordinates
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  

map_Toronto


### We will refocus our analysis to the Boroughs of Toronto (East, West and Downtown Areas)

In [15]:
df_tor = df[df['Borough'].str.contains("Toronto")].reset_index(drop=True)
df_tor.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804
2,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587
3,M4E,East Toronto,The Beaches,43.67709,-79.29547
4,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306


In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_tor['Borough'].unique()),
        df_tor.shape[0]))

The dataframe has 7 boroughs and 39 neighborhoods.


### We will reiterate our Graphic map only considering these Areas

In [17]:
# create map of Toronto using latitude and longitude values
map_Toronto2 = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_tor['Latitude'], df_tor['Longitude'], df_tor['Borough'], df_tor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto2)  

map_Toronto2

### Define Foursquare Version and Credentials

In [18]:
CLIENT_ID = 'XOWRBRMUUP5YS1RKR0VHUJWCDTDRZ1NOCBCBPKTB20JO2H3V' # your Foursquare ID
CLIENT_SECRET = '22BQEDAKYLZ1B5PHKFUJGWBUK4Q3SRCFBGQCRPIOP4ZLPAW3' # your Foursquare Secret
VERSION = '20210617' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XOWRBRMUUP5YS1RKR0VHUJWCDTDRZ1NOCBCBPKTB20JO2H3V
CLIENT_SECRET:22BQEDAKYLZ1B5PHKFUJGWBUK4Q3SRCFBGQCRPIOP4ZLPAW3


### we will focus our analysis around the areas of Toronto Boroughs and its neighborhoods

#### we will define our function to get top nearby Venues on our neighborhoods

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Toronto_venues = getNearbyVenues(names=df_tor['Neighborhood'],
                                   latitudes=df_tor['Latitude'],
                                   longitudes=df_tor['Longitude']
                                  )

Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Enclave of M5E
St. James Town, Cabbagetown
First Canadi

### Analysis of resulting Dataframe containing Venues and grouping by neighborhoods  


In [20]:
Toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65512,-79.36264,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.65512,-79.36264,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.65512,-79.36264,Souvlaki Express,43.655584,-79.364438,Greek Restaurant
3,"Regent Park, Harbourfront",43.65512,-79.36264,Berkeley Church,43.655123,-79.365873,Event Space
4,"Regent Park, Harbourfront",43.65512,-79.36264,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot


In [21]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,47,47,47,47,47,47
"Brockton, Parkdale Village, Exhibition Place",80,80,80,80,80,80
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",74,74,74,74,74,74
Central Bay Street,55,55,55,55,55,55
Christie,8,8,8,8,8,8
Church and Wellesley,70,70,70,70,70,70
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,20,20,20,20,20,20
Davisville North,9,9,9,9,9,9
"Dufferin, Dovercourt Village",17,17,17,17,17,17


## Neighborhood Analysis (Data preparation)

In [22]:
print('The dataframe has {} Neighborhoods and {} Venues.'.format(
        len(Toronto_venues['Neighborhood'].unique()),
        Toronto_venues.shape[0]), "and", 'There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

The dataframe has 38 Neighborhoods and 1590 Venues. and There are 213 uniques categories.


In [23]:
# We apply one hot encoding, to prepare data for analysis and simplify the view of the venues categories
Toronto = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto['Neighborhoods'] = Toronto_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [Toronto.columns[-1]] + list(Toronto.columns[:-1])
Toronto = Toronto[fixed_columns]

Toronto.head()

Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### We group rows by neighborhood and by taking the mean of the frequency of occurrence of each category to see the occurrence

In [24]:
Tor_gp = Toronto.groupby('Neighborhoods').mean().reset_index()
Tor_gp

Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.021277,0.0,0.021277,0.0,0.0,...,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,...,0.0125,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0125
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,...,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.018182,0.018182,0.018182,0.0,0.0,0.0
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Church and Wellesley,0.0,0.014286,0.014286,0.014286,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0
6,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0
7,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville North,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Dufferin, Dovercourt Village",0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
Tor_gp.shape

(38, 214)

### Once we have defined our Data frame, let's focus on Top Venues to guide our analysis

In [26]:
### We def a function to sort the data in descending order 
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [27]:
import numpy as np
### We define the limit target for Venues
num_top_venues = 20
### whe choose an ordinal indicator
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
tor_srt = pd.DataFrame(columns=columns)
tor_srt['Neighborhood'] = Tor_gp['Neighborhoods']

for ind in np.arange(Tor_gp.shape[0]):
    tor_srt.iloc[ind, 1:] = return_most_common_venues(Tor_gp.iloc[ind, :], num_top_venues)

tor_srt.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Berczy Park,Cocktail Bar,Sandwich Place,Bakery,Farmers Market,Coffee Shop,Seafood Restaurant,Beer Bar,Vegetarian / Vegan Restaurant,Sporting Goods Shop,...,Molecular Gastronomy Restaurant,Spa,Greek Restaurant,Movie Theater,Supermarket,Museum,Comfort Food Restaurant,Restaurant,French Restaurant,Diner
1,"Brockton, Parkdale Village, Exhibition Place",Bar,Sandwich Place,Coffee Shop,Restaurant,Café,Japanese Restaurant,Pizza Place,Furniture / Home Store,Breakfast Spot,...,Gift Shop,Arts & Crafts Store,Vegetarian / Vegan Restaurant,Bakery,Italian Restaurant,Falafel Restaurant,French Restaurant,Department Store,Ethiopian Restaurant,Gym
2,"CN Tower, King and Spadina, Railway Lands, Har...",Italian Restaurant,Coffee Shop,Sandwich Place,Café,French Restaurant,Park,Bar,Speakeasy,Pizza Place,...,Restaurant,Spa,Bakery,Gym / Fitness Center,Pharmacy,Falafel Restaurant,Ramen Restaurant,Pub,Recreation Center,Seafood Restaurant
3,Central Bay Street,Coffee Shop,Clothing Store,Pizza Place,Sushi Restaurant,Sandwich Place,Bank,Restaurant,Cosmetics Shop,Middle Eastern Restaurant,...,Plaza,Park,Diner,Bubble Tea Shop,Burger Joint,Modern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Music Venue,Shoe Store
4,Christie,Café,Grocery Store,Italian Restaurant,Baby Store,Coffee Shop,Accessories Store,Moroccan Restaurant,Movie Theater,Moving Target,...,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Opera House,Molecular Gastronomy Restaurant,Optical Shop,Organic Grocery


### Once we have established our venue data we can create Clusters for our neighborhoods to evaluate the area

##### We Run K-Means to create Clusters

In [28]:
# set number of clusters according to the graphical distribution we saw East, West, Nort and Downtown
kclusters = 4

Tor_clst = Tor_gp.drop('Neighborhoods', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Tor_clst)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [29]:
# add clustering labels
tor_srt.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_data = df_tor

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_data = toronto_data.join(tor_srt.set_index('Neighborhood'), on='Neighborhood')

### We discover a discrepancy on the data from Geocoder/Wikipedia and Foursquare for this case we decided to drop the column and change the data type to INT
toronto_data.dropna(subset=["Cluster Labels"], axis=0, inplace=True)
toronto_data["Cluster Labels"]= toronto_data["Cluster Labels"].astype("int")

# reset index, because we droped two rows
toronto_data.reset_index(drop=True, inplace=True)

toronto_data.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,0,Coffee Shop,Distribution Center,Event Space,Spa,...,Bakery,Health Food Store,Thrift / Vintage Store,Wine Shop,Discount Store,Electronics Store,Italian Restaurant,Organic Grocery,Pet Store,Moving Target
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.65739,-79.37804,0,Coffee Shop,Sandwich Place,Clothing Store,Hotel,...,Bank,Theater,Falafel Restaurant,Burger Joint,Bookstore,Ramen Restaurant,Bar,Restaurant,Spa,Italian Restaurant
2,M5C,Downtown Toronto,St. James Town,43.65215,-79.37587,0,Coffee Shop,Italian Restaurant,Cocktail Bar,Café,...,Music Venue,New American Restaurant,Moroccan Restaurant,Steakhouse,Monument / Landmark,Candy Store,Creperie,Distribution Center,Shopping Mall,Molecular Gastronomy Restaurant
3,M4E,East Toronto,The Beaches,43.67709,-79.29547,0,Pub,Health Food Store,Neighborhood,Accessories Store,...,Museum,Music Store,Music Venue,New American Restaurant,Opera House,Noodle House,Mobile Phone Shop,Optical Shop,Organic Grocery,Other Nightlife
4,M5E,Downtown Toronto,Berczy Park,43.64536,-79.37306,0,Cocktail Bar,Sandwich Place,Bakery,Farmers Market,...,Molecular Gastronomy Restaurant,Spa,Greek Restaurant,Movie Theater,Supermarket,Museum,Comfort Food Restaurant,Restaurant,French Restaurant,Diner


### With the new data frame we have the analysis including Venues, Top Common that can be joined with our Clusters and Graphically show in a Map


In [32]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood'], toronto_data['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Clusters Analysis

#### Cluster 1 

In [33]:
toronto_data.loc[toronto_data['Cluster Labels'] == 0, toronto_data.columns[[1] + list(range(5, toronto_data.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Distribution Center,Event Space,Spa,Breakfast Spot,Sandwich Place,Greek Restaurant,Restaurant,...,Bakery,Health Food Store,Thrift / Vintage Store,Wine Shop,Discount Store,Electronics Store,Italian Restaurant,Organic Grocery,Pet Store,Moving Target
1,Downtown Toronto,0,Coffee Shop,Sandwich Place,Clothing Store,Hotel,Café,Japanese Restaurant,Middle Eastern Restaurant,Cosmetics Shop,...,Bank,Theater,Falafel Restaurant,Burger Joint,Bookstore,Ramen Restaurant,Bar,Restaurant,Spa,Italian Restaurant
2,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Cocktail Bar,Café,Cosmetics Shop,Restaurant,Clothing Store,Diner,...,Music Venue,New American Restaurant,Moroccan Restaurant,Steakhouse,Monument / Landmark,Candy Store,Creperie,Distribution Center,Shopping Mall,Molecular Gastronomy Restaurant
3,East Toronto,0,Pub,Health Food Store,Neighborhood,Accessories Store,Nightclub,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,...,Museum,Music Store,Music Venue,New American Restaurant,Opera House,Noodle House,Mobile Phone Shop,Optical Shop,Organic Grocery,Other Nightlife
4,Downtown Toronto,0,Cocktail Bar,Sandwich Place,Bakery,Farmers Market,Coffee Shop,Seafood Restaurant,Beer Bar,Vegetarian / Vegan Restaurant,...,Molecular Gastronomy Restaurant,Spa,Greek Restaurant,Movie Theater,Supermarket,Museum,Comfort Food Restaurant,Restaurant,French Restaurant,Diner
5,Downtown Toronto,0,Coffee Shop,Clothing Store,Pizza Place,Sushi Restaurant,Sandwich Place,Bank,Restaurant,Cosmetics Shop,...,Plaza,Park,Diner,Bubble Tea Shop,Burger Joint,Modern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Music Venue,Shoe Store
6,Downtown Toronto,0,Café,Grocery Store,Italian Restaurant,Baby Store,Coffee Shop,Accessories Store,Moroccan Restaurant,Movie Theater,...,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Opera House,Molecular Gastronomy Restaurant,Optical Shop,Organic Grocery
7,Downtown Toronto,0,Coffee Shop,Café,Sandwich Place,Hotel,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Restaurant,...,Gym,Steakhouse,Gastropub,American Restaurant,Pizza Place,Concert Hall,Cocktail Bar,Greek Restaurant,Soup Place,Gym / Fitness Center
8,West Toronto,0,Grocery Store,Park,Pizza Place,Bakery,Bus Line,Middle Eastern Restaurant,Brazilian Restaurant,Smoke Shop,...,Bank,Furniture / Home Store,Pet Store,Art Gallery,Pharmacy,Organic Grocery,Moroccan Restaurant,Pilates Studio,Movie Theater,Moving Target
10,Downtown Toronto,0,Coffee Shop,Sandwich Place,Japanese Restaurant,Aquarium,Hotel,Boat or Ferry,Park,Bank,...,Café,Spa,Clothing Store,Cocktail Bar,Salad Place,Restaurant,Pizza Place,Plaza,Electronics Store,Bubble Tea Shop


In [34]:
toronto_data.loc[toronto_data['Cluster Labels'] == 1, toronto_data.columns[[1] + list(range(5, toronto_data.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
9,East York/East Toronto,1,Intersection,Park,Accessories Store,Nightclub,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,...,Music Store,Music Venue,Neighborhood,New American Restaurant,Opera House,Noodle House,Mobile Phone Shop,Optical Shop,Organic Grocery,Other Nightlife
20,Central Toronto,1,Park,Accessories Store,Nightclub,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,...,Music Venue,Neighborhood,New American Restaurant,Noodle House,Mobile Phone Shop,Opera House,Optical Shop,Organic Grocery,Other Nightlife,Performing Arts Venue
32,Downtown Toronto,1,Park,Playground,Bike Trail,New American Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,...,Museum,Music Store,Music Venue,Neighborhood,Accessories Store,Mobile Phone Shop,Noodle House,Opera House,Optical Shop,Organic Grocery


In [35]:
toronto_data.loc[toronto_data['Cluster Labels'] == 2, toronto_data.columns[[1] + list(range(5, toronto_data.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
18,Central Toronto,2,Bus Line,Swim School,Accessories Store,New American Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,...,Music Store,Music Venue,Neighborhood,Nightclub,Mobile Phone Shop,Noodle House,Opera House,Optical Shop,Organic Grocery,Other Nightlife


In [36]:
toronto_data.loc[toronto_data['Cluster Labels'] == 3, toronto_data.columns[[1] + list(range(5, toronto_data.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
21,West Toronto,3,Convenience Store,Residential Building (Apartment / Condo),Accessories Store,Nightclub,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,...,Music Store,Music Venue,Neighborhood,New American Restaurant,Opera House,Noodle House,Pizza Place,Optical Shop,Organic Grocery,Other Nightlife
