### Exploring and Clustering of Neighborhoods in Toronto.

# OBTAINING DATAFRAME FROM WIKIPEDIA URL

In [85]:
import numpy as np 
import pandas as pd
pd.set_option('display.max_columns', None) # allows the dataframe to be displayed in its entirty accross all columns 
pd.set_option('display.max_rows', None) # allows the dataframe to be displayed in its entirty accross all rows 

In [36]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [2]:
# Matplotlib and associated plotting modules
!pip install matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
 
# import k-means from clustering stage
!pip install sklearn
from sklearn.cluster import KMeans




You should consider upgrading via the 'c:\users\mail4\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.
You should consider upgrading via the 'c:\users\mail4\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.




In [3]:
wikipedia_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs =  pd.read_html(wikipedia_url)
len(dfs)

3

In [4]:
dfs[0].shape

(180, 3)

### The table we need is the first table from the wikipedia url, hence dfs[0]

In [5]:
toronto_neighbourhood = dfs[0]
toronto_neighbourhood.tail()

Unnamed: 0,Postal Code,Borough,Neighbourhood
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."
179,M9Z,Not assigned,Not assigned


In [6]:
toronto_neighbourhood.columns

Index(['Postal Code', 'Borough', 'Neighbourhood'], dtype='object')

In [7]:
# get names of indexes for which 
# column Borough has Not assigned
index_names = toronto_neighbourhood[toronto_neighbourhood['Borough'] == 'Not assigned' ].index 

# dropiing every entry in the Borough column that has 'Not assigned' as entry
toronto_neighbourhood.drop(index_names, inplace = True)   
toronto_neighbourhood

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [8]:
# lET'S RESET THE INDEX OF OUR DATARFRAME
toronto_neighbourhood = toronto_neighbourhood.reset_index(drop=True)
toronto_neighbourhood

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [9]:
missing = toronto_neighbourhood[toronto_neighbourhood['Neighbourhood'] == 'Not assigned']

# checking for Not assigned data is still not present in the Neighborhood column
missing

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [10]:
toronto_neighbourhood.shape
# toronto_neighborhood[['Postal Code']]

(103, 3)

# UPDATING DATAFRAME WITH LATITUDE AND LONGITUDE COLUMNS

In [11]:
# import the geocoder library to find the lat and lng of the postal codes
!pip install geocoder
import geocoder



You should consider upgrading via the 'c:\users\mail4\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.


## The geocoder library wasn't working and the csv file wasn't loading either.
## So I decided to use pgeocode library instead which worked perfectly 

In [12]:
!pip install pgeocode
import pgeocode



You should consider upgrading via the 'c:\users\mail4\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.


In [13]:
nomi = pgeocode.Nominatim('ca') # intializing the country to canada
lat_lng = []
code_lat_lng = []
lat = None
lng = None

# for column in toronto_neighbourhood[['Postal Code']]:
# postal_codes = toronto_neighbourhood['Postal Code'].values
# postal_codes

# postal_codes should contain a list of the codes after this loop
for column in toronto_neighbourhood[['Postal Code']]:
    postal_codes = toronto_neighbourhood[column].values

#extracting lat and lng for all postal codes    
for code in postal_codes:
    #let's loop until we find both lat and lng incase we encounter None
    while((lat == None) or (lng == None)):
        df_lat_long = nomi.query_postal_code(code) # grabs the lat and lng for a postal code
        lat = df_lat_long['latitude']
        lng = df_lat_long['longitude']
    
#     print("Done with {}, Toronto, Ontario".format(code))
    lat_lng.append([lat, lng])
    code_lat_lng.append([code, lat, lng])
    lat = None
    lng = None

# lat_lng
code_lat_lng

[['M3A', 43.7545, -79.33],
 ['M4A', 43.7276, -79.3148],
 ['M5A', 43.6555, -79.3626],
 ['M6A', 43.7223, -79.4504],
 ['M7A', 43.6641, -79.3889],
 ['M9A', 43.6662, -79.5282],
 ['M1B', 43.8113, -79.193],
 ['M3B', 43.745, -79.359],
 ['M4B', 43.7063, -79.3094],
 ['M5B', 43.6572, -79.3783],
 ['M6B', 43.7081, -79.4479],
 ['M9B', 43.6505, -79.5517],
 ['M1C', 43.7878, -79.1564],
 ['M3C', 43.7334, -79.3329],
 ['M4C', 43.6913, -79.3116],
 ['M5C', 43.6513, -79.3756],
 ['M6C', 43.6915, -79.4307],
 ['M9C', 43.6437, -79.5767],
 ['M1E', 43.7678, -79.1866],
 ['M4E', 43.6784, -79.2941],
 ['M5E', 43.6456, -79.3754],
 ['M6E', 43.6889, -79.4507],
 ['M1G', 43.7712, -79.2144],
 ['M4G', 43.7124, -79.3644],
 ['M5G', 43.6564, -79.38600000000002],
 ['M6G', 43.6683, -79.4205],
 ['M1H', 43.7686, -79.2389],
 ['M2H', 43.8015, -79.3577],
 ['M3H', 43.7535, -79.4472],
 ['M4H', 43.7059, -79.3464],
 ['M5H', 43.6496, -79.3833],
 ['M6H', 43.6655, -79.4378],
 ['M1J', 43.7464, -79.2323],
 ['M2J', 43.7801, -79.3479],
 ['M3J', 

## ['M7R', nan, nan] 
#### Latitude and Long for M7R wasn't returned so we'll update the dataframe by 
#### dropping the row(s) corresponding to the postal code

In [14]:
len(lat_lng), len(postal_codes), toronto_neighbourhood.shape
# toronto_neighborhood

(103, 103, (103, 3))

### Let's find out the index of the entry for the code "M7R"
#### The postal code in the mississauga area is L4something and not M7R,
#### so it only makes sense to get rid of this OUTLIER from our data

In [15]:
toronto_neighbourhood[toronto_neighbourhood['Postal Code'] == 'M7R']

Unnamed: 0,Postal Code,Borough,Neighbourhood
76,M7R,Mississauga,Canada Post Gateway Processing Centre


In [16]:
toronto_neighbourhood = toronto_neighbourhood.drop([76])

In [122]:
# Now we can the drop the nan values from out lat and long before adding to 
# our dataframe
lat_lng[76]

[43.6898, -79.5582]

In [18]:
del lat_lng[76]
lat_lng

[[43.7545, -79.33],
 [43.7276, -79.3148],
 [43.6555, -79.3626],
 [43.7223, -79.4504],
 [43.6641, -79.3889],
 [43.6662, -79.5282],
 [43.8113, -79.193],
 [43.745, -79.359],
 [43.7063, -79.3094],
 [43.6572, -79.3783],
 [43.7081, -79.4479],
 [43.6505, -79.5517],
 [43.7878, -79.1564],
 [43.7334, -79.3329],
 [43.6913, -79.3116],
 [43.6513, -79.3756],
 [43.6915, -79.4307],
 [43.6437, -79.5767],
 [43.7678, -79.1866],
 [43.6784, -79.2941],
 [43.6456, -79.3754],
 [43.6889, -79.4507],
 [43.7712, -79.2144],
 [43.7124, -79.3644],
 [43.6564, -79.38600000000002],
 [43.6683, -79.4205],
 [43.7686, -79.2389],
 [43.8015, -79.3577],
 [43.7535, -79.4472],
 [43.7059, -79.3464],
 [43.6496, -79.3833],
 [43.6655, -79.4378],
 [43.7464, -79.2323],
 [43.7801, -79.3479],
 [43.7694, -79.4921],
 [43.6872, -79.3368],
 [43.62300000000001, -79.3936],
 [43.648, -79.4177],
 [43.7298, -79.2639],
 [43.7797, -79.3813],
 [43.739, -79.4692],
 [43.6803, -79.3538],
 [43.6469, -79.3823],
 [43.6383, -79.4301],
 [43.7122, -79.2843

In [19]:
# making sure we have consistency with dataframe and lat_lng list
len(lat_lng), toronto_neighbourhood.shape

(102, (102, 3))

In [20]:
toronto_neighbourhood = toronto_neighbourhood.reset_index(drop=True)
toronto_neighbourhood.shape

(102, 3)

###### lat_lng now contains a list of lists containing the lat and long for each codes
###### We need to prepare a lat_lng list so that it can be inserted into the dataframe called toronto_neighborhood

In [21]:
Latitude = []
Longitude = []
for i in range(len(lat_lng)):
    Latitude.append(lat_lng[i][0])
    Longitude.append(lat_lng[i][1])

# print(Latitude[101])
# print(Longitude[101])

len(Latitude), len(Longitude), len(lat_lng)

(102, 102, 102)

In [22]:
# Add Longitude and Latitude lits as columns into the dataframe
toronto_neighbourhood = toronto_neighbourhood.assign(**{'Latitude' : Latitude, 'Longitude' : Longitude})
toronto_neighbourhood.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.6662,-79.5282
6,M1B,Scarborough,"Malvern, Rouge",43.8113,-79.193
7,M3B,North York,Don Mills,43.745,-79.359
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7063,-79.3094
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783


In [24]:
toronto_data = toronto_neighbourhood[toronto_neighbourhood['Borough'] == "Downtown Toronto"]
# toronto.shape #shape is (19,5)
toronto_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783
15,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
20,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754
24,M5G,Downtown Toronto,Central Bay Street,43.6564,-79.386
25,M6G,Downtown Toronto,Christie,43.6683,-79.4205
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6496,-79.3833
36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.623,-79.3936
42,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.6469,-79.3823


## 1. Let's visualize the neighborhoods in Downtown Toronto

In [32]:
#Starting from the common neighborhoods downtown... i.e CENTRAL BAY STREET
latitude = 43.6564
longitude = -79.3860
print('The geograpical coordinate of CENTRAL BAY STREET are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of CENTRAL BAY STREET are 43.6564, -79.386.


In [26]:
# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
!pip install folium
import folium # map rendering library



You should consider upgrading via the 'c:\users\mail4\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.


In [33]:
# create map of downtown_toronto using latitude and longitude values
map_downtown_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown_toronto)  
    
map_downtown_toronto

### Using Foursquare credentials to access the API to explore neighborhoods and segment them

In [61]:
CLIENT_ID = 'TFRSDH32J2ITB3HVUJHNAN5UEICVSZ4WWFK3T15V5ULXXEL3' # your Foursquare ID
CLIENT_SECRET = '1SWDCS2KWT5G0FLC4UFJ3W0V5IEEHOMDOIQ4M3XMJ1X2IZXV' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TFRSDH32J2ITB3HVUJHNAN5UEICVSZ4WWFK3T15V5ULXXEL3
CLIENT_SECRET:1SWDCS2KWT5G0FLC4UFJ3W0V5IEEHOMDOIQ4M3XMJ1X2IZXV


### Now, let's get the top 100 venues that are in Downtown Toronto within a radius of 4 kilometers.
### Centering result from CENTRAL BAY STREET, so we can scan through all the neighnorhoods

#### We decided to view the top 100 venues downtown toronto because Foursquare only allows a LIMIT OF 100 result per call
#### for the regular account that I have

In [66]:
# First, let's create the GET request URL. Name your URL url.
RADIUS = 4000 #4 KILOMETERS
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&ll={},{}&v={}&raius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET,  
    latitude, 
    longitude, 
    VERSION,
    RADIUS, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=TFRSDH32J2ITB3HVUJHNAN5UEICVSZ4WWFK3T15V5ULXXEL3&client_secret=1SWDCS2KWT5G0FLC4UFJ3W0V5IEEHOMDOIQ4M3XMJ1X2IZXV&ll=43.6564,-79.386&v=20180605&raius=4000&limit=100'

In [67]:
response = requests.get(url).json()
response

{'meta': {'code': 200, 'requestId': '5f9343ba9d7cfa6c1b0268a0'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'suggestedRadius': 670,
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 153,
  'suggestedBounds': {'ne': {'lat': 43.66299775, 'lng': -79.37755672889631},
   'sw': {'lat': 43.65000125, 'lng': -79.39423825100494}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '537d4d6d498ec171ba22e7fe',
       'name': "Jimmy's Coffee",
       'location': {'address': '82 Gerrard Street W',
        'crossStreet': 'Gerrard & LaPlante',
        'lat': 43.65842123574496,
        'lng': -79.3856131

###### All the information needed about VENUES are in the items key

In [58]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [68]:
venues = response['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns] # using loc to get all the rows and the selected columns above

nearby_venues

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns by renaming with the second word from the split command 
# e.g venue.name becomes name, venue.lolcation.lat becomes lat
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Jimmy's Coffee,Coffee Shop,43.658421,-79.385613
1,Red Lobster,Seafood Restaurant,43.656328,-79.383621
2,Japango,Sushi Restaurant,43.655268,-79.385165
3,The Queen and Beaver Public House,Gastropub,43.657472,-79.383524
4,The Elm Tree Restaurant,Modern European Restaurant,43.657397,-79.383761


In [69]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


## 2. Explore Neighborhoods in Downtown Toronto

##### Let's create a function to repeat the same process to all the neighborhoods in Manhattan

In [71]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

####   New dataframe called Downtown_Toronto_Venues to run the above function on each neighborhood

In [79]:
Downtown_Toronto_Venues = getNearbyVenues(names=toronto_data['Neighbourhood'], 
                                   latitudes=toronto_data['Latitude'], 
                                   longitudes=toronto_data['Longitude'], 
                                   radius=500
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [80]:
print(Downtown_Toronto_Venues.shape)
Downtown_Toronto_Venues.head()

(1192, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.6555,-79.3626,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.6555,-79.3626,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.6555,-79.3626,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,"Regent Park, Harbourfront",43.6555,-79.3626,The Yoga Lounge,43.655515,-79.364955,Yoga Studio
4,"Regent Park, Harbourfront",43.6555,-79.3626,Body Blitz Spa East,43.654735,-79.359874,Spa


###### Let's check how many venues were returned for each neighborhood

In [81]:
Downtown_Toronto_Venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,90,90,90,90,90,90
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",57,57,57,57,57,57
Central Bay Street,74,74,74,74,74,74
Christie,12,12,12,12,12,12
Church and Wellesley,74,74,74,74,74,74
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East, Union Station, Toronto Islands",4,4,4,4,4,4
"Kensington Market, Chinatown, Grange Park",64,64,64,64,64,64


##### HOW MANY UNIQUE CATEGORIES FROM DATAFRAME

In [83]:
print('There are {} uniques categories.'.format(len(Downtown_Toronto_Venues['Venue Category'].unique())))

There are 185 uniques categories.


## 3. Analyze Each Neighborhood

In [86]:
# ONE ZERO DUMMY encoding
toronto_onezero = pd.get_dummies(Downtown_Toronto_Venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onezero['Neighbourhood'] = Downtown_Toronto_Venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onezero.columns[-1]] + list(toronto_onezero.columns[:-1])
toronto_onezero = toronto_onezero[fixed_columns]

toronto_onezero.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Butcher,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Cafeteria,College Gym,College Rec Center,College Theater,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,Hobby Shop,Hookah Bar,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kitchen Supply Store,Lake,Latin American Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Other Great Outdoors,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [87]:
toronto_onezero.shape

(1192, 186)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [88]:
toronto_grouped = toronto_onezero.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Butcher,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Cafeteria,College Gym,College Rec Center,College Theater,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,Hobby Shop,Hookah Bar,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kitchen Supply Store,Lake,Latin American Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Other Great Outdoors,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.011111,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.044444,0.0,0.0,0.011111,0.033333,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.022222,0.0,0.0,0.022222,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.011111,0.0,0.011111,0.0,0.022222,0.011111,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.022222,0.011111,0.0,0.011111,0.0,0.0,0.011111,0.011111,0.011111,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.011111,0.011111,0.011111,0.011111,0.0,0.0,0.0,0.0,0.0,0.055556,0.011111,0.0,0.0,0.0,0.0,0.0,0.022222,0.033333,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.022222,0.0,0.033333,0.0,0.011111,0.0,0.0,0.011111,0.0,0.0,0.022222,0.011111,0.011111,0.0,0.0,0.011111,0.0,0.011111,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.011111
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.035088,0.035088,0.052632,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.0,0.052632,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.070175,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.035088,0.0,0.017544,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.070175,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.035088,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.035088,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544
2,Central Bay Street,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.027027,0.0,0.027027,0.0,0.0,0.0,0.0,0.040541,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.189189,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.027027,0.013514,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.013514,0.013514,0.0,0.0,0.0,0.013514,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027,0.0,0.013514,0.013514,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.013514,0.013514,0.013514,0.0,0.0,0.013514,0.0,0.0,0.013514,0.013514,0.0,0.013514,0.0,0.0,0.0
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.013514,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.027027,0.0,0.013514,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.081081,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.013514,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.054054,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.013514,0.013514,0.0,0.027027,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.054054,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.027027,0.027027,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.040541,0.0,0.013514,0.0,0.013514,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.013514,0.040541,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.027027
5,"Commerce Court, Victoria Hotel",0.0,0.04,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.02,0.01,0.02,0.0,0.0,0.01,0.01,0.01,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.03,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0
6,"First Canadian Place, Underground city",0.0,0.04,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.02,0.01,0.02,0.0,0.0,0.01,0.01,0.01,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.03,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0
7,"Garden District, Ryerson",0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.08,0.0,0.1,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.01,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.02,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0
8,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0625,0.0,0.015625,0.0,0.015625,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.03125,0.015625,0.0,0.0625,0.0,0.0,0.015625,0.015625,0.0,0.0,0.015625,0.046875,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.015625,0.015625,0.03125,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.03125,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0625,0.0,0.03125,0.0,0.015625,0.0,0.0,0.0


In [89]:
# Let's confirm the new size
toronto_grouped.shape

(19, 186)

#### Let's print each neighborhood along with the top 5 most common venues

In [90]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
         venue  freq
0  Coffee Shop  0.10
1        Hotel  0.06
2         Café  0.04
3       Bakery  0.04
4     Beer Bar  0.03


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0  Italian Restaurant  0.07
1         Coffee Shop  0.07
2                Café  0.05
3                 Bar  0.05
4              Bakery  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.19
1                Café  0.04
2               Hotel  0.03
3  Italian Restaurant  0.03
4     Bubble Tea Shop  0.03


----Christie----
                venue  freq
0       Grocery Store  0.25
1                Café  0.25
2          Playground  0.08
3         Candy Store  0.08
4  Athletics & Sports  0.08


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.08
1  Japanese Restaurant  0.05
2              Gay Bar  0.05
3     Sushi Restaurant  0.04
4        

#### Let's put that into a _pandas_ dataframe

First, let's write a function to sort the venues in descending order.

In [91]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [94]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Hotel,Bakery,Café,Japanese Restaurant,Seafood Restaurant,Beer Bar,Restaurant,Italian Restaurant,Sandwich Place
1,"CN Tower, King and Spadina, Railway Lands, Har...",Italian Restaurant,Coffee Shop,Bar,Café,Restaurant,Gym / Fitness Center,Bakery,Bank,Park,French Restaurant
2,Central Bay Street,Coffee Shop,Café,Hotel,Italian Restaurant,Sandwich Place,Restaurant,Sushi Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Breakfast Spot
3,Christie,Café,Grocery Store,Playground,Athletics & Sports,Coffee Shop,Candy Store,Baby Store,Park,Gourmet Shop,Farmers Market
4,Church and Wellesley,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Yoga Studio,Bubble Tea Shop,Café,Mediterranean Restaurant,Men's Store
5,"Commerce Court, Victoria Hotel",Coffee Shop,Hotel,Café,Gym,Japanese Restaurant,Restaurant,American Restaurant,Deli / Bodega,Asian Restaurant,Salad Place
6,"First Canadian Place, Underground city",Coffee Shop,Hotel,Café,Gym,Japanese Restaurant,Restaurant,American Restaurant,Deli / Bodega,Asian Restaurant,Salad Place
7,"Garden District, Ryerson",Coffee Shop,Clothing Store,Café,Japanese Restaurant,Middle Eastern Restaurant,Cosmetics Shop,Hotel,Fast Food Restaurant,Lingerie Store,Italian Restaurant
8,"Harbourfront East, Union Station, Toronto Islands",Music Venue,Harbor / Marina,Park,Café,Yoga Studio,Farmers Market,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store
9,"Kensington Market, Chinatown, Grange Park",Mexican Restaurant,Vegetarian / Vegan Restaurant,Café,Bar,Coffee Shop,Park,Thai Restaurant,Gaming Cafe,Dumpling Restaurant,Pizza Place


## 4. Cluster Neighborhoods

Run _k_-means to cluster the neighborhood into 5 clusters.

In [112]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 4, 0, 2, 0, 0, 0, 0, 3, 4])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighbourhood.

In [113]:
# add clustering labels
# neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.tail() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
90,M4W,Downtown Toronto,Rosedale,43.6827,-79.373,1,Playground,Park,Grocery Store,Candy Store,Doner Restaurant,Farmers Market,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store
91,M5W,Downtown Toronto,Stn A PO Boxes,43.6437,-79.3787,0,Coffee Shop,Restaurant,Hotel,Gym,Café,Sporting Goods Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,Park
95,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.6684,-79.3689,4,Coffee Shop,Restaurant,Café,Market,Pizza Place,Park,Bakery,Italian Restaurant,Chinese Restaurant,Sandwich Place
96,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.6492,-79.3823,0,Coffee Shop,Hotel,Café,Gym,Japanese Restaurant,Restaurant,American Restaurant,Deli / Bodega,Asian Restaurant,Salad Place
98,M4Y,Downtown Toronto,Church and Wellesley,43.6656,-79.383,0,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Yoga Studio,Bubble Tea Shop,Café,Mediterranean Restaurant,Men's Store


#### Finally, let's visualize the resulting clusters

In [114]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters

###### Cluster 1

In [116]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + [2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,"Regent Park, Harbourfront",0,Coffee Shop,Breakfast Spot,Yoga Studio,Beer Store,Italian Restaurant,Food Truck,Event Space,Electronics Store,Distribution Center,Dance Studio
9,Downtown Toronto,"Garden District, Ryerson",0,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Middle Eastern Restaurant,Cosmetics Shop,Hotel,Fast Food Restaurant,Lingerie Store,Italian Restaurant
15,Downtown Toronto,St. James Town,0,Coffee Shop,Café,Seafood Restaurant,Restaurant,Cocktail Bar,American Restaurant,Gastropub,Beer Bar,Hotel,Bakery
20,Downtown Toronto,Berczy Park,0,Coffee Shop,Hotel,Bakery,Café,Japanese Restaurant,Seafood Restaurant,Beer Bar,Restaurant,Italian Restaurant,Sandwich Place
24,Downtown Toronto,Central Bay Street,0,Coffee Shop,Café,Hotel,Italian Restaurant,Sandwich Place,Restaurant,Sushi Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Breakfast Spot
30,Downtown Toronto,"Richmond, Adelaide, King",0,Café,Coffee Shop,Hotel,Gym,Restaurant,Salad Place,Breakfast Spot,Sushi Restaurant,Japanese Restaurant,Steakhouse
42,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",0,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Salad Place,Seafood Restaurant,Japanese Restaurant,Italian Restaurant,Beer Bar
48,Downtown Toronto,"Commerce Court, Victoria Hotel",0,Coffee Shop,Hotel,Café,Gym,Japanese Restaurant,Restaurant,American Restaurant,Deli / Bodega,Asian Restaurant,Salad Place
91,Downtown Toronto,Stn A PO Boxes,0,Coffee Shop,Restaurant,Hotel,Gym,Café,Sporting Goods Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,Park
96,Downtown Toronto,"First Canadian Place, Underground city",0,Coffee Shop,Hotel,Café,Gym,Japanese Restaurant,Restaurant,American Restaurant,Deli / Bodega,Asian Restaurant,Salad Place


###### Cluster 2

In [118]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + [2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
90,Downtown Toronto,Rosedale,1,Playground,Park,Grocery Store,Candy Store,Doner Restaurant,Farmers Market,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store


###### Cluster 3

In [119]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + [2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,Downtown Toronto,Christie,2,Café,Grocery Store,Playground,Athletics & Sports,Coffee Shop,Candy Store,Baby Store,Park,Gourmet Shop,Farmers Market


###### Cluster 4

In [120]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + [2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
36,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",3,Music Venue,Harbor / Marina,Park,Café,Yoga Studio,Farmers Market,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store


###### Cluster 5

In [121]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + [2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",4,Gym,Coffee Shop,Italian Restaurant,Escape Room,Café,Chinese Restaurant,Mexican Restaurant,Restaurant,Bubble Tea Shop,Ramen Restaurant
79,Downtown Toronto,"University of Toronto, Harbord",4,Café,Bookstore,Japanese Restaurant,Bakery,Yoga Studio,Pub,Sandwich Place,Restaurant,Coffee Shop,College Arts Building
83,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",4,Mexican Restaurant,Vegetarian / Vegan Restaurant,Café,Bar,Coffee Shop,Park,Thai Restaurant,Gaming Cafe,Dumpling Restaurant,Pizza Place
86,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",4,Italian Restaurant,Coffee Shop,Bar,Café,Restaurant,Gym / Fitness Center,Bakery,Bank,Park,French Restaurant
95,Downtown Toronto,"St. James Town, Cabbagetown",4,Coffee Shop,Restaurant,Café,Market,Pizza Place,Park,Bakery,Italian Restaurant,Chinese Restaurant,Sandwich Place
