# PART 1

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

# Web scraping the wikipedia page to get the table using BeautifulSoup
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(source.text, 'lxml')
contentTable = soup.find('table', class_='wikitable sortable')
rows = contentTable.find_all('tr')

# 3 Lists for the 3 columns in the table
A=[]
B=[]
C=[]

# For each row on the table captured, each column value is stored in corresponding list created above
for row in rows:
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

In [2]:
# Make the three lists into a Pandas dataframe
df2=pd.DataFrame(A,columns=['PostalCode'])
df2['Borough']=B
df2['Neighborhood']=C

In [3]:
# Remove the Boroughs with 'Not assigned' values
df2 = df2[df2.Borough != 'Not assigned']

df2['Neighborhood'] = np.where(df2['Neighborhood'] == 'Not assigned\n', df2['Borough'], df2['Neighborhood'])

In [4]:
grouped = pd.DataFrame(df2.groupby('PostalCode')['Neighborhood'].apply(list))

In [5]:
#pd.concat([df4, a], join='inner')
grouped.head(10)

Unnamed: 0_level_0,Neighborhood
PostalCode,Unnamed: 1_level_1
M1B,"[Rouge, Malvern]"
M1C,"[Highland Creek, Rouge Hill, Port Union]"
M1E,"[Guildwood , Morningside, West Hill]"
M1G,[Woburn]
M1H,[Cedarbrae ]
M1J,[Scarborough Village]
M1K,"[East Birchmount Park , Ionview, Kennedy Park]"
M1L,"[Clairlea, Golden Mile, Oakridge]"
M1M,"[Cliffcrest, Cliffside, Scarborough Village We..."
M1N,"[Birch Cliff, Cliffside West ]"


In [6]:
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [7]:
df2.drop(columns='Neighborhood', inplace=True)
df2.drop_duplicates(inplace=True)

In [8]:
final_df = df2.join(grouped, on='PostalCode').reset_index(drop=True)

In [9]:
# Assumption: The postal codes are unique. This is because I grouped the data earlier using the postal codes.
# The following tests the PostalCode column to confirm there are indeed no duplicates
duplicateRows = final_df[final_df.duplicated(['PostalCode'])]

duplicateRows

Unnamed: 0,PostalCode,Borough,Neighborhood


### Hence the final table:

In [10]:
final_df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,[Parkwoods]
1,M4A,North York,[Victoria Village]
2,M5A,Downtown Toronto,"[Harbourfront, Regent Park]"
3,M6A,North York,"[Lawrence Heights, Lawrence Manor]"
4,M7A,Queen's Park,[Queen's Park]
5,M9A,Etobicoke,[Islington Avenue]
6,M1B,Scarborough,"[Rouge, Malvern]"
7,M3B,North York,[Don Mills North ]
8,M4B,East York,"[Woodbine Gardens, Parkview Hill]"
9,M5B,Downtown Toronto,"[Ryerson , Garden District ]"


In [11]:
final_df.shape

(103, 3)

# PART 2

In [12]:
import geocoder 

As we can see, Geocoder cannot be imported. With no way of installing the package, I have opted to use the Geospatial_Coordinates CSV file.

In [14]:
import types
import pandas as pd

path = '/Users/philip1/Downloads/Geospatial_Coordinates.csv'

df_geo = pd.read_csv(path)
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df_geo.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
df_geo.set_index('PostalCode');
finaldf2 = pd.merge(final_df, df_geo, on='PostalCode')

In [16]:
finaldf2.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,[Parkwoods],43.753259,-79.329656
1,M4A,North York,[Victoria Village],43.725882,-79.315572
2,M5A,Downtown Toronto,"[Harbourfront, Regent Park]",43.65426,-79.360636
3,M6A,North York,"[Lawrence Heights, Lawrence Manor]",43.718518,-79.464763
4,M7A,Queen's Park,[Queen's Park],43.662301,-79.389494
5,M9A,Etobicoke,[Islington Avenue],43.667856,-79.532242
6,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
7,M3B,North York,[Don Mills North ],43.745906,-79.352188
8,M4B,East York,"[Woodbine Gardens, Parkview Hill]",43.706397,-79.309937
9,M5B,Downtown Toronto,"[Ryerson , Garden District ]",43.657162,-79.378937


In [17]:
finaldf2.shape

(103, 5)

In [18]:
# Check if there is any Null value which would indicate a mismatch.
finaldf2.isnull().values.any()

False

# PART 3

In [19]:
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium

In [20]:
latitude = finaldf2['Latitude']
longitude = finaldf2['Longitude']
map_toronto = folium.Map(location=[latitude[0], longitude[0]], zoom_start=10)

# All boroughs show up with red markers, while the ones of interest (containing Toronto) show up in blue.
for lat,lng,borough,postcode in zip(finaldf2['Latitude'], finaldf2['Longitude'], finaldf2['Borough'], finaldf2['PostalCode']):
    diff = None
    if 'Toronto' in borough:
        diff = 'blue'
    else:
        diff = 'red'
               
    label = f'{borough},{postcode}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat,lng],
                        radius = 5,
                        popup=label,
                        color=diff,
                        fill=True,
                        fill_color='diff',
                        fill_opacity=0.7,
                        parse_html=False).add_to(map_toronto)
                
map_toronto

In [21]:
CLIENT_ID = 'NSTWZCLW4JJ1AXCVCISMJJ5EB4EJI2M31ZYTMCSXNSNPAETY' # your Foursquare ID
CLIENT_SECRET = 'UY3POCAX1IQ22N52KKFNJ210BSK4IJHFSJ21X05RL0YHNXYH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NSTWZCLW4JJ1AXCVCISMJJ5EB4EJI2M31ZYTMCSXNSNPAETY
CLIENT_SECRET:UY3POCAX1IQ22N52KKFNJ210BSK4IJHFSJ21X05RL0YHNXYH


In [22]:
borough_lat = finaldf2.loc[0, 'Latitude']
borough_lng = finaldf2.loc[0, 'Longitude']
borough_name = finaldf2.loc[0, 'Borough']
borough_code = finaldf2.loc[0, 'PostalCode']

## Venue Exploration
#### Top 50 venues in the first borough, within a 600m radius

In [23]:
import requests
import json
from pandas.io.json import json_normalize

In [24]:
LIMIT = 50
radius = 600

URL = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    borough_lat, 
    borough_lng, 
    radius, 
    LIMIT)

In [25]:
results = requests.get(URL).json()
results

{'meta': {'code': 200, 'requestId': '5d63cce95f9e7900262f7dd7'},
 'response': {'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.75865860540001,
    'lng': -79.32219458781913},
   'sw': {'lat': 43.7478585946, 'lng': -79.33711841218087}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',

In [26]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,PetSmart,Pet Store,43.748639,-79.333488
2,KFC,Fast Food Restaurant,43.754387,-79.333021
3,Variety Store,Food & Drink Shop,43.751974,-79.333114


Only 4 venues were returned for the North York MC3 Borough. Perhaps not a very eventful area.

### Display the top venues for each borough

In [28]:
def getNearbyVenues(names, latitudes, longitudes, radius=600):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [42]:
toronto_borough = []
toronto_lat = []
toronto_long = []
toronto_code = []

for lat,long,borough,postcode in zip(finaldf2['Latitude'], finaldf2['Longitude'], finaldf2['Borough'], finaldf2['PostalCode']):
    if 'Toronto' in borough:
        toronto_borough.append(borough)
        toronto_lat.append(lat)
        toronto_long.append(long)
        toronto_code.append(postcode)
        
toronto_data = pd.DataFrame({'Borough': toronto_borough, 
                             'Latitude': toronto_lat, 
                             'Longitude': toronto_long, 
                             'Postcode': toronto_code})       
toronto_data        
toronto_venues = getNearbyVenues(names=toronto_data.Postcode,
                                 latitudes=toronto_data.Latitude,
                                 longitudes=toronto_data.Longitude)
                                          

M5A
M5B
M5C
M4E
M5E
M5G
M6G
M5H
M6H
M5J
M6J
M4K
M5K
M6K
M4L
M5L
M4M
M4N
M5N
M4P
M5P
M6P
M4R
M5R
M6R
M4S
M5S
M6S
M4T
M5T
M4V
M5V
M4W
M5W
M4X
M5X
M4Y
M7Y


In [43]:
print(toronto_venues.shape)
toronto_venues.head()

(1380, 7)


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,M5A,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,M5A,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [44]:
toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M4E,17,17,17,17,17,17
M4K,50,50,50,50,50,50
M4L,29,29,29,29,29,29
M4M,50,50,50,50,50,50
M4N,4,4,4,4,4,4
M4P,13,13,13,13,13,13
M4R,29,29,29,29,29,29
M4S,45,45,45,45,45,45
M4T,4,4,4,4,4,4
M4V,50,50,50,50,50,50


Downtown Toronto appears to have significantly more values than other boroughs. This does infact make a lot of sense as Downtown is the commercial area of a city (What we would refer to as a 'City Centre' in the UK).

### Analyse each Borough

In [100]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Borough Postcode'] = toronto_venues['Borough'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Borough Postcode,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Animal Shelter,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [101]:
toronto_onehot.shape

(1380, 230)

In [102]:
toronto_grouped = toronto_onehot.groupby('Borough Postcode').mean().reset_index()
toronto_grouped.shape

(38, 230)

In [103]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [114]:
num_top_venues = 8

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
borough_venues_sorted = pd.DataFrame(columns=columns)
borough_venues_sorted['Borough Postcode'] = toronto_grouped['Borough Postcode']

for ind in np.arange(toronto_grouped.shape[0]):
    borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

borough_venues_sorted.head()

Unnamed: 0,Borough Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,M4E,Pub,French Restaurant,Health Food Store,Bakery,Indian Restaurant,Ice Cream Shop,Electronics Store,Neighborhood
1,M4K,Greek Restaurant,Italian Restaurant,Café,Coffee Shop,Ice Cream Shop,Burger Joint,Pub,Yoga Studio
2,M4L,Park,Pet Store,Gym,Sandwich Place,Café,Fish & Chips Shop,Burger Joint,Italian Restaurant
3,M4M,Café,Bar,Italian Restaurant,Bakery,Coffee Shop,American Restaurant,Sandwich Place,Yoga Studio
4,M4N,Park,Swim School,Jewelry Store,Bus Line,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant


Going by the above data, it is probably safe to say the people of Toronto are sorted when it comes to café food.

## Cluster Boroughs

Here, since there are only four boroughs by name, the postcodes will be used instead

4 clusters will be used for K-means

In [115]:
toronto_postcode_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_postcode_onehot['Borough'] = toronto_venues['Borough']  # Note that Borough in the one-hot encoding dataframe now represents postcodes for each borough

fixed_columns = [toronto_postcode_onehot.columns[-1]] + list(toronto_postcode_onehot.columns[:-1])
toronto_postcode_onehot = toronto_postcode_onehot[fixed_columns]

toronto_postcode_onehot.head()

Unnamed: 0,Borough,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Animal Shelter,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [116]:
# Set the number of clusters
k = 4

toronto_grouped_cluster = toronto_grouped.drop('Borough Postcode', 1)

# Run the kmeans
kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_grouped_cluster)

kmeans.labels_[0:10]

array([3, 3, 3, 3, 0, 3, 3, 3, 1, 3], dtype=int32)

In [117]:
# add clustering labels
borough_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(borough_venues_sorted.set_index('Borough Postcode'), on='Postcode')

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Latitude,Longitude,Postcode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,Downtown Toronto,43.65426,-79.360636,M5A,3,Coffee Shop,Bakery,Park,Theater,Café,Gym / Fitness Center,Mexican Restaurant,Italian Restaurant
1,Downtown Toronto,43.657162,-79.378937,M5B,3,Coffee Shop,Bookstore,Cosmetics Shop,Fast Food Restaurant,Café,Clothing Store,Ramen Restaurant,Middle Eastern Restaurant
2,Downtown Toronto,43.651494,-79.375418,M5C,3,Coffee Shop,Restaurant,Café,Gastropub,Cocktail Bar,Farmers Market,Italian Restaurant,Japanese Restaurant
3,East Toronto,43.676357,-79.293031,M4E,3,Pub,French Restaurant,Health Food Store,Bakery,Indian Restaurant,Ice Cream Shop,Electronics Store,Neighborhood
4,Downtown Toronto,43.644771,-79.373306,M5E,3,Coffee Shop,Café,Cocktail Bar,Seafood Restaurant,Hotel,Creperie,Farmers Market,Bakery


In [125]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[toronto_merged.Latitude.tolist()[0],toronto_merged.Longitude.tolist()[0]], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 0

In [126]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
17,43.72802,Park,Swim School,Jewelry Store,Bus Line,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant
20,43.696948,Mexican Restaurant,Trail,Sushi Restaurant,Jewelry Store,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant


### Cluster 1

In [127]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
28,43.689574,Park,Gym,Playground,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
32,43.679563,Park,Playground,Trail,Building,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store


### Cluster 2

In [128]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
18,43.711695,Spa,Playground,Garden,Dance Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### Cluster 3

In [129]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue
0,43.65426,Coffee Shop,Bakery,Park,Theater,Café,Gym / Fitness Center,Mexican Restaurant,Italian Restaurant
1,43.657162,Coffee Shop,Bookstore,Cosmetics Shop,Fast Food Restaurant,Café,Clothing Store,Ramen Restaurant,Middle Eastern Restaurant
2,43.651494,Coffee Shop,Restaurant,Café,Gastropub,Cocktail Bar,Farmers Market,Italian Restaurant,Japanese Restaurant
3,43.676357,Pub,French Restaurant,Health Food Store,Bakery,Indian Restaurant,Ice Cream Shop,Electronics Store,Neighborhood
4,43.644771,Coffee Shop,Café,Cocktail Bar,Seafood Restaurant,Hotel,Creperie,Farmers Market,Bakery
5,43.657952,Coffee Shop,Café,Japanese Restaurant,Spa,Italian Restaurant,Bubble Tea Shop,Chinese Restaurant,Tea Room
6,43.669542,Grocery Store,Café,Park,Playground,Coffee Shop,Italian Restaurant,Baby Store,Convenience Store
7,43.650571,Coffee Shop,Café,Steakhouse,American Restaurant,Hotel,Gastropub,Pizza Place,Asian Restaurant
8,43.669005,Bakery,Park,Pharmacy,Supermarket,Music Venue,Gym,Gym / Fitness Center,Brewery
9,43.640816,Coffee Shop,Aquarium,Café,Brewery,Hotel,Park,Fried Chicken Joint,Salad Place


## Insight

From inspecting the different clusters created, it is immediately obvious that cluster 3 is made up of areas with food/drink venues as the most common. In addition, the amount of cafés and coffee shop venues suggests these boroughs are the more commercially active parts of Toronto, where there is more disposable income to regularly patronize food and drink vendors. 



The other clusters are a contrast to this, as there are more of parks, playgrounds, event spaces, electronics stores etc. Perhaps a more residential scenario