# This is my notebook that I will use for the capstone project in coursera

In [1]:
import pandas as pd
import numpy as np


### Getting the file information from wikipedia using pandas

In [2]:
file = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# returns a list of tables from the file
tables = pd.read_html(file)

### tables is a list of tables so how many tables does it contain?


In [3]:
print("# of tables: ", len(tables))

# of tables:  3


### combine each table into tables

In [4]:
df = pd.concat(tables,axis=1)
df.head()
df.columns

Index([  'Postal Code',       'Borough', 'Neighbourhood',               0,
                     1,               2,               3,               4,
                     5,               6,               7,               8,
                     9,              10,              11,              12,
                    13,              14,              15,              16,
                    17,               0,               1,               2,
                     3,               4,               5,               6,
                     7,               8,               9,              10,
                    11,              12,              13,              14,
                    15,              16,              17],
      dtype='object')

### df has alot of useless info in it.  Let's just specify postal cod, borough and neighourhood

In [6]:
# df has alot of useless info let's get rid of that
df_info = df[['Postal Code','Borough','Neighbourhood']]
df_info.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Let's drop boroughs that haven't been assigned

In [7]:
# Now let's drop rows that have "Not assigned" in the borough
df_info = df_info[ df['Borough'] != 'Not assigned']
df_info.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Postal codes should have their unique row with no duplicate rows
#### This cell shows that the number of unique rows is equal to the shape of the dataframe; i.e., there are no duplicate rows

In [8]:
# There should only exist one row for each Postal Code
uniqueRows = df_info['Postal Code'].nunique()

print(uniqueRows, df_info.shape)
print("Each row has unique postal code: ", uniqueRows == df_info.shape[0])

103 (103, 3)
Each row has unique postal code:  True


### If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.

In [9]:
df['Neighbourhood'].value_counts()

Not assigned                                       77
Downsview                                           4
Don Mills                                           2
University of Toronto, Harbord                      1
Alderwood, Long Branch                              1
                                                   ..
India Bazaar, The Beaches West                      1
Willowdale, Newtonbrook                             1
Humberlea, Emery                                    1
Berczy Park                                         1
Cliffside, Cliffcrest, Scarborough Village West     1
Name: Neighbourhood, Length: 100, dtype: int64

### There's 77 'Not assigned' Neighbourhoods!  Let's change their neighbourhood to their respective borough!
#### I use value_counts() to check if "Not assigned" value still exists in the Neighbourhood column

In [10]:
# There's 77 Not assigned neighbourhoods!
for row in df_info.index:
    if(df_info.loc[row,'Neighbourhood'] == 'Not assigned'):
        df_info.loc[row,'Neighbourhood'] = df_info.loc[row, 'Borough']
        
df_info['Neighbourhood'].value_counts()

Downsview                                                                               4
Don Mills                                                                               2
Business reply mail Processing Centre, South Central Letter Processing Plant Toronto    1
Bathurst Manor, Wilson Heights, Downsview North                                         1
Cedarbrae                                                                               1
                                                                                       ..
Northwood Park, York University                                                         1
Del Ray, Mount Dennis, Keelsdale and Silverthorn                                        1
Parkdale, Roncesvalles                                                                  1
Humberlea, Emery                                                                        1
Golden Mile, Clairlea, Oakridge                                                         1
Name: Neig

#### How many rows x columns does the dataframe have?

In [11]:
# Shape of dataframe
df_info.shape

(103, 3)

### Importing coordinates from a csv file

In [12]:
geoDataURL = "https://cocl.us/Geospatial_data"
geoData = pd.read_csv(geoDataURL)
geoData.sort_values('Postal Code', inplace=True)
geoData.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Let's set the index to the postal code
#### This allows me to merge the correct lat & longitude to the correct row

In [13]:
geoData = geoData.set_index('Postal Code')

In [14]:
geoData.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


### Setting the index to postal code for the main dataframe

In [15]:
df_info = df_info.set_index('Postal Code')
df_info.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### This loops through the indicies (postal code) in the main df, gets the coordinates and assigns them to the corresponding column

In [16]:
for postal_code in df_info.index:
    lat = geoData.loc[postal_code, 'Latitude']
    long = geoData.loc[postal_code,'Longitude']
    df_info.loc[postal_code, 'Latitude'] = lat
    df_info.loc[postal_code, 'Longitude'] = long
    
df_info.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### I might not want the indicies to be the postal code.  So the indicies are rest and the postal code is placed into its own column

In [17]:
df_info.reset_index(inplace=True)
df_info.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Using foursquare to explore what is around in toronto
### Filter the boroughs to only include Boroughs from Toronto

In [19]:
df_info = df_info[ df_info['Borough'].str.contains("Toronto")  ]
df_info.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [20]:
df_info.shape

(40, 5)

### Credentials cell & URL for FourSquare API

In [21]:
import requests, json

In [23]:
CLIENT_ID = "REDACTED"
CLIENT_SECRET = 'REDACTED'
VERSION = "20180605"
LIMIT = 100

### Defining a method to get nearby venues from a given list of names and coordinates

In [52]:
def getNearbyVenues(names,latitudes,longitudes, radius=500):

    venues_list=[]
    for name,lat,lng in zip(names,latitudes,longitudes):
        
        # Forming the URL based on the current lat,lng
        url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            LIMIT
        )
        
        # Getting the JSON response
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        #print(results)
        # Appending useful data to the list venues_list
        venues_list.append([(
            name,
            lat,
            lng,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])
        
    # Converting the nearby
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list
    ])
        
    nearby_venues.columns = ['Neighborhood',
                          'Neighborhood Latitude',
                         'Neighborhood Latitude',
                         'Venue',
                         'Venue Latitude',
                          'Venue Longitude',
                         'Venue Category']
    return(nearby_venues)

### Calling the method defined in the previous cell.  Checking the result with .head()

In [53]:
toronto_venues = getNearbyVenues(names = df_info['Neighbourhood'],latitudes = df_info['Latitude'], longitudes = df_info['Longitude'])
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Latitude.1,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


### Looking at the number of unique venue categories

In [57]:
toronto_venues['Venue Category'].nunique()

233

### Create dummies (1/0) of the possible categorical variables

In [60]:
torontoOneHot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="",prefix_sep="")

torontoOneHot['Neighborhood'] = toronto_venues['Neighborhood']
columns = [torontoOneHot.columns[-1]] + list(torontoOneHot.columns[:-1])
torontoOneHot = torontoOneHot[columns]
torontoOneHot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [62]:
torontoOneHot.shape

(1617, 233)

### Grouping venues by neighborhood

In [63]:
torontoGrouped = torontoOneHot.groupby('Neighborhood').mean().reset_index()
torontoGrouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [65]:
torontoGrouped.shape

(40, 233)

### Loop through the neighborhood column, create a temp dataframe of venue and frequency, sort the temp df, and print it.

In [66]:
topN = 5

for h in torontoGrouped['Neighborhood']:
    print("===",h,"===")
    temp = torontoGrouped[torontoGrouped['Neighborhood'] == h].T.reset_index()
    temp.columns=['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq',ascending=False).reset_index(drop=True).head(topN))
    print('\n')

=== Berczy Park ===
                venue  freq
0         Coffee Shop  0.08
1              Bakery  0.05
2        Cocktail Bar  0.05
3      Farmers Market  0.03
4  Seafood Restaurant  0.03


=== Brockton, Parkdale Village, Exhibition Place ===
                   venue  freq
0                   Café  0.12
1         Breakfast Spot  0.08
2  Performing Arts Venue  0.08
3                 Bakery  0.08
4            Coffee Shop  0.08


=== Business reply mail Processing Centre, South Central Letter Processing Plant Toronto ===
                  venue  freq
0            Comic Shop  0.07
1            Restaurant  0.07
2  Fast Food Restaurant  0.07
3           Pizza Place  0.07
4            Skate Park  0.07


=== CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport ===
              venue  freq
0    Airport Lounge  0.13
1   Airport Service  0.13
2  Airport Terminal  0.13
3   Harbor / Marina  0.07
4          Boutique  0.07


=== Central Bay Stree

### Defining method that sorts venues in descending order

In [67]:
def return_most_common_venues(row,num_top_venues):
    rowCategories = row.iloc[1:]
    rowCategoriesSorted = rowCategories.sort_values(ascending=False)
    return rowCategoriesSorted.index.values[0:num_top_venues]

### Creates dataframe and records the top 10 venues

In [81]:
num_top_venues = 10
indicators = ['st','nd','rd']

columns = ['Neighborhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
neighborhoodsVenueSorted = pd.DataFrame(columns=columns)
neighborhoodsVenueSorted['Neighborhood'] = torontoGrouped['Neighborhood']

for ind in np.arange(torontoGrouped.shape[0]):
    neighborhoodsVenueSorted.iloc[ind,1:] = return_most_common_venues(torontoGrouped.iloc[ind,:],num_top_venues)
    
neighborhoodsVenueSorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Bakery,Cocktail Bar,Cheese Shop,Pharmacy,Restaurant,Farmers Market,Beer Bar,Seafood Restaurant,Bistro
1,"Brockton, Parkdale Village, Exhibition Place",Café,Bakery,Breakfast Spot,Performing Arts Venue,Coffee Shop,Pet Store,Italian Restaurant,Bar,Intersection,Restaurant
2,"Business reply mail Processing Centre, South C...",Gym / Fitness Center,Auto Workshop,Comic Shop,Pizza Place,Restaurant,Butcher,Burrito Place,Brewery,Skate Park,Light Rail Station
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Boutique,Rental Car Location,Plane,Harbor / Marina,Sculpture Garden,Airport Gate
4,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Thai Restaurant,Japanese Restaurant,Burger Joint,Bubble Tea Shop,Salad Place,Ramen Restaurant


### Using KMeans Algorithm (with k = 5) to fit the torontoGrouped data to 5 centroids

In [82]:
from sklearn.cluster import KMeans
k = 5
torontoGroupedClustering = torontoGrouped.drop('Neighborhood',axis=1)
kmeans = KMeans(n_clusters=k, random_state=0)
kmeans.fit(torontoGroupedClustering)

kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

### Merging the sorted data with the original data (the one with lat & long data)

In [110]:
#neighborhoodsVenueSorted.insert(0,'Cluster Labels', kmeans.labels_)

torontoMerged = df_info
torontoMerged.head()
torontoMerged = torontoMerged.join(neighborhoodsVenueSorted.set_index('Neighborhood'),
                                  on = 'Neighbourhood')

torontoMerged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1,Coffee Shop,Park,Bakery,Breakfast Spot,Café,Pub,Theater,Farmers Market,French Restaurant,Greek Restaurant
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Café,Bar,Bank,Mexican Restaurant,Japanese Restaurant,Portuguese Restaurant
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,Coffee Shop,Clothing Store,Italian Restaurant,Hotel,Bubble Tea Shop,Middle Eastern Restaurant,Café,Japanese Restaurant,Cosmetics Shop,Movie Theater
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,Café,Coffee Shop,Gastropub,Cocktail Bar,Clothing Store,Art Gallery,Seafood Restaurant,Farmers Market,Cosmetics Shop,Restaurant
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Asian Restaurant,Health Food Store,Coffee Shop,Trail,Pub,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant


### Installing, importing folium.  

In [93]:
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.2 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [94]:
import folium

### Creating colors so the clusters can be color coated.  Creating the map around Toronto.  Creating the popups for each Neighbourhood

In [102]:
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[43.65,-79.38],zoom_start=11)

x = np.arange(k)
ys = [i * x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat,lon,poi,cluster in zip(torontoMerged['Latitude'],torontoMerged['Longitude'],torontoMerged['Neighbourhood'],torontoMerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat,lon],
        radius = 5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
        
map_clusters

## Displaying what is in each culster
### Cluster # 0

In [114]:
torontoMerged.loc[torontoMerged['Cluster Labels']==0, torontoMerged.columns[[2] +list(range(5,torontoMerged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,"Runnymede, The Junction, Weston-Pellam Park, C...",0,Convenience Store,Breakfast Spot,Brewery,Bus Line,Women's Store,Distribution Center,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store


### Cluster # 1

In [115]:
torontoMerged.loc[torontoMerged['Cluster Labels']==1, torontoMerged.columns[[2] +list(range(5,torontoMerged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Regent Park, Harbourfront",1,Coffee Shop,Park,Bakery,Breakfast Spot,Café,Pub,Theater,Farmers Market,French Restaurant,Greek Restaurant
4,"Queen's Park, Ontario Provincial Government",1,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Café,Bar,Bank,Mexican Restaurant,Japanese Restaurant,Portuguese Restaurant
9,"Garden District, Ryerson",1,Coffee Shop,Clothing Store,Italian Restaurant,Hotel,Bubble Tea Shop,Middle Eastern Restaurant,Café,Japanese Restaurant,Cosmetics Shop,Movie Theater
15,St. James Town,1,Café,Coffee Shop,Gastropub,Cocktail Bar,Clothing Store,Art Gallery,Seafood Restaurant,Farmers Market,Cosmetics Shop,Restaurant
19,The Beaches,1,Asian Restaurant,Health Food Store,Coffee Shop,Trail,Pub,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant
20,Berczy Park,1,Coffee Shop,Bakery,Cocktail Bar,Cheese Shop,Pharmacy,Restaurant,Farmers Market,Beer Bar,Seafood Restaurant,Bistro
24,Central Bay Street,1,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Thai Restaurant,Japanese Restaurant,Burger Joint,Bubble Tea Shop,Salad Place,Ramen Restaurant
25,Christie,1,Grocery Store,Café,Park,Coffee Shop,Candy Store,Athletics & Sports,Italian Restaurant,Restaurant,Nightclub,Baby Store
30,"Richmond, Adelaide, King",1,Coffee Shop,Café,Restaurant,Hotel,Bakery,Gym,Deli / Bodega,Thai Restaurant,Clothing Store,Asian Restaurant
31,"Dufferin, Dovercourt Village",1,Bakery,Pharmacy,Pet Store,Music Venue,Middle Eastern Restaurant,Bar,Café,Pool,Bank,Supermarket


### Cluster # 2

In [116]:
torontoMerged.loc[torontoMerged['Cluster Labels']==2, torontoMerged.columns[[2] +list(range(5,torontoMerged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Roselawn,2,Home Service,Garden,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### Cluster # 3

In [117]:
torontoMerged.loc[torontoMerged['Cluster Labels']==3, torontoMerged.columns[[2] +list(range(5,torontoMerged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
83,"Moore Park, Summerhill East",3,Lawyer,Restaurant,Women's Store,Diner,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### Cluster # 4

In [113]:
torontoMerged.loc[torontoMerged['Cluster Labels']==4, torontoMerged.columns[[2] +list(range(5,torontoMerged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Lawrence Park,4,Park,Swim School,Bus Line,Women's Store,Discount Store,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant
91,Rosedale,4,Park,Playground,Trail,Women's Store,Dessert Shop,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


# Cluster 4 has alot of 