### Toronto Neighbourhood analysis (2 of 3)

#### This Notebook is set in 3 parts, as requested by the Coursera assignments: in this third part, all previous code is used

### Import lybraries

In [1]:
import numpy as np
import pandas as pd
import requests

#!pip install beautifulsoup4
#!pip install lxml
from bs4 import BeautifulSoup

### Fetch data and create Pandas dataframe

In [2]:
### Fetching the page

wikilink = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page = requests.get(wikilink)
page = raw_wikipedia_page.text

In [3]:
### Fetching the table

soup = BeautifulSoup(page)
match = soup.find('table', class_ = 'wikitable sortable')

In [4]:
#print(match)

### Preparing the columns for the dataframe

In [5]:
### The dataframe has 3 columns:
cols = []

tag = match.find_all('th')

for t in range(len(tag)):
    cols.append(tag[t].text)

In [6]:
cols = ['Postcode', 'Borough', 'Neighborhood']

In [7]:
### Fetching the data from the wiki page

data = [None] * len(tag)

tag = match.find_all('td')

for t in range(len(tag)):
    data.append(tag[t].text)

In [8]:
### Reshaping the data array in a matrix with 3 columns

data_res = [[None for i in range(3)] for j in range(int(len(data)/3))]

for i in range(int(len(data) / 3)):
    for j in range(3):
        data_res[i][j] = data[3 * i + j]
        
### Removing first row (None)

data_res = data_res[1:][:]

In [9]:
### Removing the \n character in every third column element

for k in range(len(data_res)):
    data_res[k][2] = data_res[k][2][:-1]

In [10]:
#data_res

### Creating dataframe and cleaning

In [11]:
### Creating dataframe

df = pd.DataFrame(data_res, columns = cols)

In [12]:
### Removing the Not assigned rows as requested

df = df[df.Borough != 'Not assigned']
df.reset_index(inplace = True)
df.drop(labels = 'index', axis = 1, inplace = True)

In [14]:
### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

df.Neighborhood[df.Neighborhood == 'Not assigned'] = df.Borough[df.Neighborhood == 'Not assigned']

In [15]:
#df

In [17]:
### More than one neighborhood can exist in one postal code area:
### These rows will be combined into one row with the neighborhoods separated with a comma

uniques = pd.unique(df.Postcode)
uni_neighs = [[None] for j in range(len(uniques))]

for i, e in enumerate(uniques):
    for j in range(df.shape[0]):
        if str(df.Postcode[j]) == str(e):
              uni_neighs[i] = str(uni_neighs[i]) + ', ' + str(df.Neighborhood[j]) 
                
### Removing the [None], characters
for i in range(len(uni_neighs)):
    uni_neighs[i] = uni_neighs[i][8:]
    
#len(uni_neighs)

In [18]:
### Copying the Borough values for the uniques values in the postal codes list

uni_bors = [[None] for j in range(len(uniques))]

for i, e in enumerate(uniques):
    for j in range(df.shape[0]):
        if str(df.Postcode[j]) == str(e):
              uni_bors[i] = df.Borough[j]
    
#len(uni_bors)

In [20]:
### Building the final dataframe

data = [[None for i in range(3)] for j in range(len(uniques))]

df = pd.DataFrame(data, columns = cols)

df.Postcode = uniques
df.Borough = uni_bors
df.Neighborhood = uni_neighs

In [21]:
#Final dataframe

df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [22]:
df.shape

(103, 3)

# End of Notebook 1 of 3

### Geocoder package does not work properly (see attempt below), working with provided .xlsx file

In [23]:
#!pip install geocoder

In [24]:
'''
import geocoder # import geocoder

coords = [[None for i in range(3)] for j in range(len(uniques))]

for i, postal_code in enumerate(uniques):
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    coords[i][0] = postal_code
    coords[i][1] = latitude
    coords[i][2] = longitude
'''

"\nimport geocoder # import geocoder\n\ncoords = [[None for i in range(3)] for j in range(len(uniques))]\n\nfor i, postal_code in enumerate(uniques):\n    # initialize your variable to None\n    lat_lng_coords = None\n\n    # loop until you get the coordinates\n    while(lat_lng_coords is None):\n      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n      lat_lng_coords = g.latlng\n\n    latitude = lat_lng_coords[0]\n    longitude = lat_lng_coords[1]\n    \n    coords[i][0] = postal_code\n    coords[i][1] = latitude\n    coords[i][2] = longitude\n"

### Adding latitude and longitude data from the .csv file to the df

In [25]:
# Dataframe from the csv file

df1 = pd.read_csv('Geospatial_coordinates.csv')
df1.columns = ['PostalCode', 'Latitude', 'Longitude']

df1.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [26]:
# Adding the lat and long data to the dataframe

lats = [[None] for j in range(len(uniques))]
longs = [[None] for j in range(len(uniques))]

for i, e in enumerate(df1.PostalCode):
    for j, f in enumerate(df.Postcode):
        if str(e) == str(f):
            lats[j] = df1.Latitude[i]
            longs[j] = df1.Longitude[i]

In [27]:
### Final dataframe

df['Latitude'] = lats
df['Longitude'] = longs

df_toronto = df
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


# End of Notebook 2 of 3

# Explore and cluster the neighborhoods in Toronto

In [49]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
import folium

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [29]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of ' + str(address) + ' are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Create a map of New York with neighborhoods superimposed on top

In [31]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Analysis with Foursquare API

In [32]:
CLIENT_ID = '1JJ2UBMLUE0M150OCS51HB3NEQ0LPY5BYD5BB0TFTGVFVRNB' # your Foursquare ID
CLIENT_SECRET = '03ENNGPSGSP3UQAVAV11JCB50HG4DU4H10Q5Y1DCEHRIO5KP' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: 1JJ2UBMLUE0M150OCS51HB3NEQ0LPY5BYD5BB0TFTGVFVRNB
CLIENT_SECRET:03ENNGPSGSP3UQAVAV11JCB50HG4DU4H10Q5Y1DCEHRIO5KP


### Explore all venues in Toronto with radius = 500

In [33]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [36]:
toronto_venues = getNearbyVenues(names = df['Neighborhood'], latitudes = df['Latitude'], longitudes = df['Longitude'])

In [37]:
print(toronto_venues.shape)
toronto_venues.head()

(1323, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Parkwoods,43.753259,-79.329656,GreenWin pool,43.756232,-79.333842,Pool
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena


### Number of venues for each neighborhood

In [38]:
df_count = toronto_venues.groupby('Neighborhood').count()

df_count.drop('Neighborhood Longitude', axis = 1, inplace = True)
df_count.drop('Venue', axis = 1, inplace = True)
df_count.drop('Venue Latitude', axis = 1, inplace = True)
df_count.drop('Venue Longitude', axis = 1, inplace = True)
df_count.drop('Venue Category', axis = 1, inplace = True)

df_count.columns = ['Counts']
df_count.head()

Unnamed: 0_level_0,Counts
Neighborhood,Unnamed: 1_level_1
"Adelaide, King, Richmond",30
Agincourt,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",9
"Alderwood, Long Branch",10


In [39]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 236 uniques categories.


### Analyze each neighborhood

In [40]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
i = toronto_onehot.columns.get_loc('Neighborhood')
fixed_columns = [toronto_onehot.columns[i]] + list(toronto_onehot.columns[:(i-1)]) + list(toronto_onehot.columns[(i+1):])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Group rows by neighborhood and take the mean of the frequency of occurrence of each category

In [41]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
              venue  freq
0        Steakhouse  0.10
1       Pizza Place  0.07
2             Hotel  0.07
3  Asian Restaurant  0.07
4              Café  0.07


----Agincourt----
               venue  freq
0             Lounge  0.25
1     Breakfast Spot  0.25
2       Skating Rink  0.25
3     Sandwich Place  0.25
4  Accessories Store  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
            venue  freq
0     Coffee Shop  0.33
1            Park  0.33
2      Playground  0.33
3   Movie Theater  0.00
4  Massage Studio  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0        Grocery Store  0.22
1          Pizza Place  0.11
2  Fried Chicken Joint  0.11
3           Beer Store  0.11
4       Sandwich Place  0.11


----Alderwood, Long Branch----
                venue  freq
0         Pizza Place   0.2
1                 Gym   0.1
2  Athl

               venue  freq
0               Park  0.50
1      Metro Station  0.25
2  Convenience Store  0.25
3  Accessories Store  0.00
4      Movie Theater  0.00


----Emery, Humberlea----
                        venue  freq
0  Construction & Landscaping   0.5
1              Baseball Field   0.5
2           Accessories Store   0.0
3               Movie Theater   0.0
4              Massage Studio   0.0


----Fairview, Henry Farm, Oriole----
                  venue  freq
0        Clothing Store  0.17
1           Coffee Shop  0.13
2              Pharmacy  0.03
3  Fast Food Restaurant  0.03
4         Shopping Mall  0.03


----First Canadian Place, Underground city----
           venue  freq
0           Café  0.13
1  Deli / Bodega  0.10
2    Coffee Shop  0.10
3     Steakhouse  0.07
4            Gym  0.03


----Flemingdon Park, Don Mills South----
                venue  freq
0                 Gym  0.10
1          Beer Store  0.10
2         Coffee Shop  0.10
3    Asian Restaurant  0.10
4  Ita

                 venue  freq
0                 Café  0.13
1          Coffee Shop  0.10
2  American Restaurant  0.07
3   Italian Restaurant  0.07
4               Bakery  0.07


----The Annex, North Midtown, Yorkville----
            venue  freq
0            Café  0.12
1     Coffee Shop  0.12
2  Sandwich Place  0.12
3     Pizza Place  0.08
4  Cosmetics Shop  0.04


----The Beaches----
               venue  freq
0  Health Food Store  0.25
1                Pub  0.25
2  Accessories Store  0.00
3      Moving Target  0.00
4     Massage Studio  0.00


----The Beaches West, India Bazaar----
            venue  freq
0            Park  0.09
1  Sandwich Place  0.09
2             Gym  0.05
3         Brewery  0.05
4    Burger Joint  0.05


----The Danforth West, Riverdale----
                    venue  freq
0        Greek Restaurant  0.27
1          Ice Cream Shop  0.07
2      Italian Restaurant  0.07
3             Yoga Studio  0.03
4  Furniture / Home Store  0.03


----The Junction North, Runnymede-

### Sort in pandas dataframe and display the top 10 venues for each neighborhood

In [68]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [69]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Steakhouse,Hotel,Café,Asian Restaurant,Pizza Place,Food Court,Breakfast Spot,Smoke Shop,Lounge,Speakeasy
1,Agincourt,Lounge,Skating Rink,Breakfast Spot,Sandwich Place,Coffee Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Coffee Shop,Playground,Park,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Beer Store,Fried Chicken Joint,Fast Food Restaurant,Coffee Shop,Pharmacy,Sandwich Place,Pizza Place,Airport Terminal,Deli / Bodega
4,"Alderwood, Long Branch",Pizza Place,Pharmacy,Pub,Coffee Shop,Athletics & Sports,Gym,Skating Rink,Sandwich Place,Pool,Dance Studio


### Cluster the neighborhoods in 10 clusters by k-means

In [93]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(toronto_grouped_clustering)
kmeans.labels_

array([0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 4, 0, 4, 0, 4, 0, 0, 0,
       0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 0, 1, 3, 0,
       0, 0, 4, 0, 4, 0, 0, 0, 0, 2, 4, 0, 3, 4, 4, 4, 4, 4, 0, 4, 0, 0,
       0, 4, 0, 2, 0, 0, 4, 0, 1, 4, 4, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0, 0,
       4, 1, 0, 0, 0, 1, 0, 0, 0, 4, 4, 4])

In [94]:
# add clustering labels
neighborhoods_venues_sorted['ClusterLabels'] =  kmeans.labels_

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on = 'Neighborhood')

toronto_merged.head()
print(toronto_merged.shape)

(103, 16)


In [95]:
### Some rows have NaN, removing them

toronto_merged.dropna(inplace = True)
print(toronto_merged.shape)

(100, 16)


In [96]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['ClusterLabels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [98]:
group0 = toronto_merged.loc[toronto_merged['ClusterLabels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
print(group0.shape)
group0.head()

(57, 12)


Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,ClusterLabels
1,North York,Pizza Place,Hockey Arena,Portuguese Restaurant,Coffee Shop,General Entertainment,Cuban Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,0.0
2,Downtown Toronto,Coffee Shop,Bakery,Park,Café,Mexican Restaurant,Breakfast Spot,Performing Arts Venue,Chocolate Shop,Pub,Restaurant,0.0
3,North York,Furniture / Home Store,Clothing Store,Accessories Store,Arts & Crafts Store,Gift Shop,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Women's Store,0.0
4,Queen's Park,Coffee Shop,Gym,Japanese Restaurant,Diner,Burrito Place,Italian Restaurant,Smoothie Shop,Seafood Restaurant,Liquor Store,Burger Joint,0.0
7,North York,Gym / Fitness Center,Japanese Restaurant,Café,Caribbean Restaurant,Yoga Studio,Discount Store,Dessert Shop,Dim Sum Restaurant,Diner,Dog Run,0.0


In [99]:
group1 = toronto_merged.loc[toronto_merged['ClusterLabels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
print(group1.shape)
group1.head()

(7, 12)


Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,ClusterLabels
35,East York,Park,Metro Station,Convenience Store,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,1.0
40,North York,Airport,Park,Snack Place,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,1.0
45,North York,Park,Cafeteria,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,1.0
64,York,Park,Yoga Studio,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,1.0
85,Scarborough,Coffee Shop,Playground,Park,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,1.0


In [101]:
group2 = toronto_merged.loc[toronto_merged['ClusterLabels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
print(group2.shape)
group2.head()

(2, 12)


Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,ClusterLabels
12,Scarborough,Bar,Yoga Studio,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,2.0
94,Etobicoke,Bar,Drugstore,Rental Car Location,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,2.0


In [102]:
group3 = toronto_merged.loc[toronto_merged['ClusterLabels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
print(group3.shape)
group3.head()

(2, 12)


Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,ClusterLabels
57,North York,Baseball Field,Construction & Landscaping,Yoga Studio,Department Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,3.0
101,Etobicoke,Baseball Field,Yoga Studio,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,3.0


In [103]:
group4 = toronto_merged.loc[toronto_merged['ClusterLabels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
print(group4.shape)
group4.head()

(32, 12)


Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,ClusterLabels
0,North York,Park,Pool,Fast Food Restaurant,Food & Drink Shop,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,4.0
6,Scarborough,Fast Food Restaurant,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,4.0
8,East York,Fast Food Restaurant,Pizza Place,Pharmacy,Intersection,Breakfast Spot,Bank,Rock Climbing Spot,Athletics & Sports,Pet Store,Gym / Fitness Center,4.0
11,Etobicoke,Golf Course,Bank,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,4.0
14,East York,Pharmacy,Cosmetics Shop,Beer Store,Skating Rink,Asian Restaurant,Curling Ice,Park,Comfort Food Restaurant,Department Store,College Arts Building,4.0


# End of Notebook 3 of 3