In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


In [2]:
!pip install folium
import folium # map rendering library
import lxml # I've had to do this previously
from bs4 import BeautifulSoup as bs
import csv
print("Packages imported")

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 16.6MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1
Packages imported


## Pulling postal codes of Canada

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = bs(source, 'lxml')

#print(soup.prettify()) #Cool this works

table = soup.find("table")
# print(table) #Checking that I can find the table
rows = table.tbody.find_all("tr")

data = []
for tr in rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    if row != [] and row[1] != "Not assigned":
        if "Not assigned" in row[2]:
            row[2] = row[1]
        data.append(row)
        
print('Success')

Success


In [4]:
df = pd.DataFrame(data, columns = ['Postal Code', "Borough", "Neighborhood"])
df = df[~df['Postal Code'].isnull()]
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,Regent Park / Harbourfront\n


## Cleaning the data

In [5]:
#remove erraneous text
df["Postal Code"] = df["Postal Code"].str.replace("\n","")
df["Borough"] = df["Borough"].str.replace("\n","")
df["Neighborhood"] = df["Neighborhood"].str.replace("\n","")

#remove row if it does not have an assigned borough
df.drop(df[df['Borough']=="Not assigned"].index, axis=0, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [6]:
df.info()
df.shape

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103 entries, 2 to 178
Data columns (total 3 columns):
Postal Code     103 non-null object
Borough         103 non-null object
Neighborhood    103 non-null object
dtypes: object(3)
memory usage: 3.2+ KB


(103, 3)

In [7]:
#reset the index
df1 = df.reset_index(drop = True)
df1.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,Malvern / Rouge
7,M3B,North York,Don Mills
8,M4B,East York,Parkview Hill / Woodbine Gardens
9,M5B,Downtown Toronto,"Garden District, Ryerson"


## Get Latitude and Longitude using Geocoder

In [8]:
coord_df = pd.read_csv("http://cocl.us/Geospatial_data")
coord_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
#Set index of both data frame sto postal codes in order to combine them
df2 = df1.set_index('Postal Code')
coord_df2 = coord_df.set_index('Postal Code')
df2.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,Regent Park / Harbourfront
M6A,North York,Lawrence Manor / Lawrence Heights
M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [10]:
coord_df2.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [11]:
#Join the two dataframes together
df3 = df2.join(coord_df2)
df3.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


In [12]:
#Reset the dataframe index
df4=df3.reset_index(drop=False)
df4.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [16]:
#filter data frame so only Boroughs with "Toronto" are included
df5 = df4[df4['Borough'].str.contains('Toronto')]
df6 = df5.reset_index(drop=True) #Reset the index again
df6.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [17]:
#Get coordinates of Toronto Canada using geolocator
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude 
print('The geographical coordinates of Toronto Canada are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Toronto Canada are 43.6534817, -79.3839347.


## Get venue data using Foursquare

In [18]:
#get Foursquare credentials 
CLIENT_ID = 'V32ENKOHMIUCLQUGEOUGUNETWMYUOMY42ZC32CFXSJB2E4V3' # your Foursquare ID
CLIENT_SECRET = 'PSAPTXND0ZJRE1KTI40OLH0CM4GQBBN2PIDYDKJGRG0HCVXE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Creditials applied successfully.')

Creditials applied successfully.


In [22]:
LIMIT = 100 #limit the number of venues returned
radius = 500 

In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
toronto_venues = getNearbyVenues(names=df6['Neighborhood'], 
                                latitudes=df6['Latitude'], 
                                longitudes=df6['Longitude'])

Regent Park / Harbourfront
Queen's Park / Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond / Adelaide / King
Dufferin / Dovercourt Village
Harbourfront East / Union Station / Toronto Islands
Little Portugal / Trinity
The Danforth West / Riverdale
Toronto Dominion Centre / Design Exchange
Brockton / Parkdale Village / Exhibition Place
India Bazaar / The Beaches West
Commerce Court / Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park / The Junction South
North Toronto West
The Annex / North Midtown / Yorkville
Parkdale / Roncesvalles
Davisville
University of Toronto / Harbord
Runnymede / Swansea
Moore Park / Summerhill East
Kensington Market / Chinatown / Grange Park
Summerhill West / Rathnelly / South Hill / Forest Hill SE / Deer Park
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport
Roseda

In [25]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,55,55,55,55,55,55
Brockton / Parkdale Village / Exhibition Place,24,24,24,24,24,24
Business reply mail Processing CentrE,19,19,19,19,19,19
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport,17,17,17,17,17,17
Central Bay Street,65,65,65,65,65,65
Christie,17,17,17,17,17,17
Church and Wellesley,74,74,74,74,74,74
Commerce Court / Victoria Hotel,100,100,100,100,100,100
Davisville,34,34,34,34,34,34
Davisville North,11,11,11,11,11,11


## Find out the unique venues in Toronto

In [43]:
venues = toronto_venues['Venue Category'].unique()
venues

array(['Bakery', 'Coffee Shop', 'Distribution Center', 'Spa',
       'Breakfast Spot', 'Restaurant', 'Park', 'Historic Site', 'Pub',
       'Farmers Market', 'Chocolate Shop', 'Dessert Shop', 'Theater',
       'Performing Arts Venue', 'Gym / Fitness Center',
       'French Restaurant', 'Café', 'Mexican Restaurant', 'Event Space',
       'Yoga Studio', 'Ice Cream Shop', 'Shoe Store', 'Art Gallery',
       'Cosmetics Shop', 'Electronics Store', 'Bank', 'Beer Store',
       'Health Food Store', 'Antique Shop', 'Italian Restaurant',
       'Beer Bar', 'Creperie', 'Arts & Crafts Store', 'Burrito Place',
       'Diner', 'Hobby Shop', 'Discount Store', 'Fried Chicken Joint',
       'Burger Joint', 'Juice Bar', 'Sandwich Place', 'Gym',
       'College Auditorium', 'Bar', 'Clothing Store', 'Comic Shop',
       'Plaza', 'Tea Room', 'Music Venue', 'Ramen Restaurant',
       'Thai Restaurant', 'Movie Theater', 'Steakhouse', 'Shopping Mall',
       'American Restaurant', 'Japanese Restaurant', 'Col

In [44]:
print('There are {} unique categories'.format(len(venues)))

There are 226 unique categories


In [53]:
q1="Brewery" in venues
q2="Pub" in venues
q3="Bar" in venues

print("q1={}, q2={}, q3={}".format(q1, q2, q3))
#These results tell us there are locations with the category of Pub, Brewery and Bar in Toronto. We will need to factor this in when choosing a location for our little startup brewery. 

q1=True, q2=True, q3=True


In [54]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Business Service,Butcher,Café,Cajun / Creole Restaurant,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Auditorium,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hospital,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Regent Park / Harbourfront,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [56]:
#Grouping rows by neighborhood and taking the mean of the frequency of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Business Service,Butcher,Café,Cajun / Creole Restaurant,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Auditorium,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hospital,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.018182,0.0,0.018182,0.036364,0.0,0.0,0.0,0.018182,0.018182,0.0,0.036364,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.036364,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.0,0.036364,0.054545,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.036364,0.018182,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CN Tower / King and Spadina / Railway Lands / ...,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.030769,0.0,0.0,0.015385,0.0,0.046154,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.184615,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.015385,0.015385,0.015385,0.015385,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.030769,0.015385,0.0,0.0,0.0,0.061538,0.030769,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.015385,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.046154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.235294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.013514,0.0,0.027027,0.013514,0.0,0.0,0.0,0.027027,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.067568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.027027,0.054054,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.027027,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.013514,0.054054,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.027027,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.0,0.027027,0.013514,0.0,0.0,0.0,0.040541,0.0,0.013514,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.0,0.0,0.040541,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.013514,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Commerce Court / Victoria Hotel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.088235,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.058824,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.088235,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
numBrewery = len(toronto_grouped["Brewery"]>0)
numPub = len(toronto_grouped["Pub"]>0)
numBar = len(toronto_grouped["Bar"]>0)

print("There are {} breweries, {} Pubs and {} Bars in Toronto".format(numBrewery,numPub,numBar))

There are 39 breweries, 39 Pubs and 39 Bars in Toronto


In [74]:
#Create dataframes for pubs, breweries and bars within neighborhoods of Toronto
toronto_brew = toronto_grouped[['Neighborhood','Brewery']]
toronto_pub = toronto_grouped[['Neighborhood','Pub']]
toronto_bar = toronto_grouped[['Neighborhood','Bar']]
toronto_all = toronto_grouped[['Neighborhood','Bar','Pub','Brewery']]
#toronto_brew.shape

In [70]:
toronto_brew.head(12)

Unnamed: 0,Neighborhood,Brewery
0,Berczy Park,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.0
2,Business reply mail Processing CentrE,0.052632
3,CN Tower / King and Spadina / Railway Lands / ...,0.0
4,Central Bay Street,0.0
5,Christie,0.0
6,Church and Wellesley,0.0
7,Commerce Court / Victoria Hotel,0.0
8,Davisville,0.029412
9,Davisville North,0.0


In [71]:
toronto_pub.head(12)

Unnamed: 0,Neighborhood,Pub
0,Berczy Park,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.0
2,Business reply mail Processing CentrE,0.0
3,CN Tower / King and Spadina / Railway Lands / ...,0.0
4,Central Bay Street,0.0
5,Christie,0.0
6,Church and Wellesley,0.027027
7,Commerce Court / Victoria Hotel,0.01
8,Davisville,0.0
9,Davisville North,0.0


In [72]:
toronto_bar.head(12)

Unnamed: 0,Neighborhood,Bar
0,Berczy Park,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.041667
2,Business reply mail Processing CentrE,0.0
3,CN Tower / King and Spadina / Railway Lands / ...,0.058824
4,Central Bay Street,0.015385
5,Christie,0.0
6,Church and Wellesley,0.0
7,Commerce Court / Victoria Hotel,0.02
8,Davisville,0.0
9,Davisville North,0.0


In [92]:
toronto_all.head(12)

Unnamed: 0,Neighborhood,Bar,Pub,Brewery
0,Berczy Park,0.0,0.0,0.0
1,Brockton / Parkdale Village / Exhibition Place,0.041667,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.052632
3,CN Tower / King and Spadina / Railway Lands / ...,0.058824,0.0,0.0
4,Central Bay Street,0.015385,0.0,0.0
5,Christie,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.027027,0.0
7,Commerce Court / Victoria Hotel,0.02,0.01,0.0
8,Davisville,0.0,0.0,0.029412
9,Davisville North,0.0,0.0,0.0


## Brewery Clustering

In [76]:
from sklearn.cluster import KMeans

In [107]:
brew_clusters = 3

brew_clust = toronto_brew.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=brew_clusters, random_state=1)
kmeans.fit_transform(brew_clust)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 2],
      dtype=int32)

In [108]:
brew_merg = toronto_brew.copy()
brew_merg["Cluster Labels"] = kmeans.labels_
brew_merg.head(5)

Unnamed: 0,Neighborhood,Brewery,Cluster Labels
0,Berczy Park,0.0,0
1,Brockton / Parkdale Village / Exhibition Place,0.0,0
2,Business reply mail Processing CentrE,0.052632,1
3,CN Tower / King and Spadina / Railway Lands / ...,0.0,0
4,Central Bay Street,0.0,0


In [109]:
brew_merg_f = brew_merg.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")
print(brew_merg_f.shape)
brew_merg_f.head()

(1635, 9)


Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.0,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.0,0,43.644771,-79.373306,Starbucks,43.644285,-79.369771,Coffee Shop


In [110]:
brew_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(brew_clusters)
ys = [i + x + (i*x)**2 for i in range(brew_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(brew_merg_f['Neighborhood Latitude'], brew_merg_f['Neighborhood Longitude'], brew_merg_f['Neighborhood'], brew_merg_f['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(brew_map_clusters)
       
brew_map_clusters

In [111]:
brew_merg_f.loc[(brew_merg_f['Cluster Labels'] ==0) & (brew_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


In [112]:
brew_merg_f.loc[(brew_merg_f['Cluster Labels'] ==1) & (brew_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
2,Business reply mail Processing CentrE,0.052632,1,43.662744,-79.321558,Rorschach Brewing Co.,43.663483,-79.319824,Brewery
10,Dufferin / Dovercourt Village,0.066667,1,43.669005,-79.442259,Blood Brothers Brewing,43.669944,-79.436533,Brewery
16,India Bazaar / The Beaches West,0.05,1,43.668999,-79.315572,Godspeed Brewery,43.67262,-79.319228,Brewery
32,Studio District,0.04878,1,43.659526,-79.340923,Avling Kitchen & Brewery,43.661515,-79.338117,Brewery
32,Studio District,0.04878,1,43.659526,-79.340923,Saulter Street Brewery,43.658412,-79.346392,Brewery


In [113]:
brew_merg_f.loc[(brew_merg_f['Cluster Labels'] ==2) & (brew_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
8,Davisville,0.029412,2,43.704324,-79.38879,Granite Brewery,43.707991,-79.389943,Brewery
14,Harbourfront East / Union Station / Toronto Is...,0.03,2,43.640816,-79.381752,Steam Whistle Brewing,43.641752,-79.387089,Brewery
14,Harbourfront East / Union Station / Toronto Is...,0.03,2,43.640816,-79.381752,Labatt Breweries of Canada,43.638282,-79.380378,Brewery
14,Harbourfront East / Union Station / Toronto Is...,0.03,2,43.640816,-79.381752,Amsterdam Brewhouse,43.638122,-79.384803,Brewery
19,Little Portugal / Trinity,0.023256,2,43.647927,-79.41975,Bellwoods Brewery,43.647097,-79.419955,Brewery
36,The Danforth West / Riverdale,0.023256,2,43.679557,-79.352188,Louis Cifer Brew Works,43.677663,-79.351313,Brewery


## Pub Clustering

In [114]:
pub_clusters = 3

pub_clust = toronto_pub.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=pub_clusters, random_state=1)
kmeans.fit_transform(pub_clust)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
      dtype=int32)

In [119]:
pub_merg = toronto_pub.copy()
pub_merg["Cluster Labels"] = kmeans.labels_
pub_merg.head(12)

Unnamed: 0,Neighborhood,Pub,Cluster Labels
0,Berczy Park,0.0,0
1,Brockton / Parkdale Village / Exhibition Place,0.0,0
2,Business reply mail Processing CentrE,0.0,0
3,CN Tower / King and Spadina / Railway Lands / ...,0.0,0
4,Central Bay Street,0.0,0
5,Christie,0.0,0
6,Church and Wellesley,0.027027,2
7,Commerce Court / Victoria Hotel,0.01,0
8,Davisville,0.0,0
9,Davisville North,0.0,0


In [124]:
pub_merg_f = pub_merg.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")
print(pub_merg_f.shape)
pub_merg_f.head()

(1635, 9)


Unnamed: 0,Neighborhood,Pub,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.0,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.0,0,43.644771,-79.373306,Starbucks,43.644285,-79.369771,Coffee Shop


In [125]:
pub_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(pub_clusters)
ys = [i + x + (i*x)**2 for i in range(brew_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(pub_merg_f['Neighborhood Latitude'], pub_merg_f['Neighborhood Longitude'], pub_merg_f['Neighborhood'], pub_merg_f['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(pub_map_clusters)
       
pub_map_clusters

In [126]:
pub_merg_f.loc[(pub_merg_f['Cluster Labels'] ==0) & (pub_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Pub,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
7,Commerce Court / Victoria Hotel,0.01,0,43.648198,-79.379817,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
11,First Canadian Place / Underground city,0.01,0,43.648429,-79.38228,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
13,"Garden District, Ryerson",0.01,0,43.657162,-79.378937,Imperial Pub,43.656254,-79.378955,Pub
31,Stn A PO Boxes,0.010526,0,43.646435,-79.374846,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
37,Toronto Dominion Centre / Design Exchange,0.01,0,43.647177,-79.381576,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub


In [127]:
pub_merg_f.loc[(pub_merg_f['Cluster Labels'] ==1) & (pub_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Pub,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
33,Summerhill West / Rathnelly / South Hill / For...,0.125,1,43.686412,-79.400049,Fionn MacCool's,43.687921,-79.394783,Pub
33,Summerhill West / Rathnelly / South Hill / For...,0.125,1,43.686412,-79.400049,Scallywags,43.687982,-79.394676,Pub
35,The Beaches,0.2,1,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub


In [128]:
pub_merg_f.loc[(pub_merg_f['Cluster Labels'] ==2) & (pub_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Pub,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
6,Church and Wellesley,0.027027,2,43.66586,-79.38316,Churchmouse & Firkin,43.664632,-79.380406,Pub
6,Church and Wellesley,0.027027,2,43.66586,-79.38316,Bishop and Belcher,43.670096,-79.382354,Pub
16,India Bazaar / The Beaches West,0.05,2,43.668999,-79.315572,Murphy's Law,43.667319,-79.312656,Pub
24,Regent Park / Harbourfront,0.06383,2,43.65426,-79.360636,Dominion Pub and Kitchen,43.656919,-79.358967,Pub
24,Regent Park / Harbourfront,0.06383,2,43.65426,-79.360636,Mill St. Brew Pub,43.650353,-79.358489,Pub
24,Regent Park / Harbourfront,0.06383,2,43.65426,-79.360636,The Aviary,43.653634,-79.354662,Pub
28,Runnymede / Swansea,0.04878,2,43.651571,-79.48445,Bryden's Pub,43.649259,-79.484651,Pub
28,Runnymede / Swansea,0.04878,2,43.651571,-79.48445,My Place - a Canadian Pub,43.648458,-79.485187,Pub
30,St. James Town / Cabbagetown,0.045455,2,43.667967,-79.367675,Stout Irish Pub,43.663891,-79.36903,Pub
30,St. James Town / Cabbagetown,0.045455,2,43.667967,-79.367675,The Flying Beaver Pubaret,43.664829,-79.368292,Pub


## Bar Clustering

In [129]:
bar_clusters = 3

bar_clust = toronto_bar.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=bar_clusters, random_state=1)
kmeans.fit_transform(bar_clust)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([2, 0, 2, 1, 0, 2, 2, 0, 2, 2, 1, 0, 2, 2, 0, 1, 2, 0, 2, 1],
      dtype=int32)

In [130]:
bar_merg = toronto_bar.copy()
bar_merg["Cluster Labels"] = kmeans.labels_
bar_merg.head(12)

Unnamed: 0,Neighborhood,Bar,Cluster Labels
0,Berczy Park,0.0,2
1,Brockton / Parkdale Village / Exhibition Place,0.041667,0
2,Business reply mail Processing CentrE,0.0,2
3,CN Tower / King and Spadina / Railway Lands / ...,0.058824,1
4,Central Bay Street,0.015385,0
5,Christie,0.0,2
6,Church and Wellesley,0.0,2
7,Commerce Court / Victoria Hotel,0.02,0
8,Davisville,0.0,2
9,Davisville North,0.0,2


In [131]:
bar_merg_f = bar_merg.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")
print(bar_merg_f.shape)
bar_merg_f.head()

(1635, 9)


Unnamed: 0,Neighborhood,Bar,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,2,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.0,2,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.0,2,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.0,2,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.0,2,43.644771,-79.373306,Starbucks,43.644285,-79.369771,Coffee Shop


In [138]:
bar_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(bar_clusters)
ys = [i + x + (i*x)**2 for i in range(bar_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bar_merg_f['Neighborhood Latitude'], bar_merg_f['Neighborhood Longitude'], bar_merg_f['Neighborhood'], bar_merg_f['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(bar_map_clusters)
       
bar_map_clusters

In [133]:
bar_merg_f.loc[(bar_merg_f['Cluster Labels'] ==0) & (bar_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,Brockton / Parkdale Village / Exhibition Place,0.041667,0,43.636847,-79.428191,Pharmacy,43.63809,-79.43181,Bar
4,Central Bay Street,0.015385,0,43.657952,-79.387383,Duke's Refresher + Bar,43.65898,-79.382949,Bar
7,Commerce Court / Victoria Hotel,0.02,0,43.648198,-79.379817,Boxcar Social Temperance,43.650557,-79.381956,Bar
7,Commerce Court / Victoria Hotel,0.02,0,43.648198,-79.379817,Earls Kitchen & Bar,43.647946,-79.383706,Bar
11,First Canadian Place / Underground city,0.03,0,43.648429,-79.38228,Earls Kitchen & Bar,43.647946,-79.383706,Bar
11,First Canadian Place / Underground city,0.03,0,43.648429,-79.38228,Boxcar Social Temperance,43.650557,-79.381956,Bar
11,First Canadian Place / Underground city,0.03,0,43.648429,-79.38228,Loose Moose,43.645281,-79.383966,Bar
14,Harbourfront East / Union Station / Toronto Is...,0.02,0,43.640816,-79.381752,Corks Beer & Wine Bar,43.642493,-79.38154,Bar
14,Harbourfront East / Union Station / Toronto Is...,0.02,0,43.640816,-79.381752,The Rec Room,43.64111,-79.386763,Bar
17,Kensington Market / Chinatown / Grange Park,0.048387,0,43.653206,-79.400049,Cold Tea,43.654193,-79.401075,Bar


In [134]:
bar_merg_f.loc[(bar_merg_f['Cluster Labels'] ==1) & (bar_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
3,CN Tower / King and Spadina / Railway Lands / ...,0.058824,1,43.628947,-79.39442,Market@416,43.631653,-79.39451,Bar
10,Dufferin / Dovercourt Village,0.066667,1,43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
15,High Park / The Junction South,0.08,1,43.661608,-79.464763,Hole in the Wall,43.665296,-79.465118,Bar
15,High Park / The Junction South,0.08,1,43.661608,-79.464763,Shoxs,43.665353,-79.463563,Bar
19,Little Portugal / Trinity,0.116279,1,43.647927,-79.41975,Reposado,43.647321,-79.420032,Bar
19,Little Portugal / Trinity,0.116279,1,43.647927,-79.41975,The Communist's Daughter,43.649362,-79.420963,Bar
19,Little Portugal / Trinity,0.116279,1,43.647927,-79.41975,Dakota Tavern,43.64968,-79.420838,Bar
19,Little Portugal / Trinity,0.116279,1,43.647927,-79.41975,apt 200,43.644026,-79.420063,Bar
19,Little Portugal / Trinity,0.116279,1,43.647927,-79.41975,Bar Fancy,43.643734,-79.421326,Bar
22,Parkdale / Roncesvalles,0.071429,1,43.64896,-79.456325,The Local Pub and Restaurant,43.651017,-79.450911,Bar


In [135]:
bar_merg_f.loc[(bar_merg_f['Cluster Labels'] ==2) & (bar_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
13,"Garden District, Ryerson",0.01,2,43.657162,-79.378937,Duke's Refresher + Bar,43.65898,-79.382949,Bar
37,Toronto Dominion Centre / Design Exchange,0.01,2,43.647177,-79.381576,Boxcar Social Temperance,43.650557,-79.381956,Bar


## Cluster with Bar, Brewery and Pubs

In [136]:
all_clusters = 5

all_clust = toronto_all.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=all_clusters, random_state=1)
kmeans.fit_transform(all_clust)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20]

array([0, 2, 3, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 2, 4, 2, 0, 2],
      dtype=int32)

In [137]:
all_merg = toronto_all.copy()
all_merg["Cluster Labels"] = kmeans.labels_
all_merg_f = all_merg.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")
print(all_merg_f.shape)
all_merg_f.head()

(1635, 11)


Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,0.0,0.0,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.0,0.0,0.0,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.0,0.0,0.0,0,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.0,0.0,0.0,0,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.0,0.0,0.0,0,43.644771,-79.373306,Starbucks,43.644285,-79.369771,Coffee Shop


In [139]:
all_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(all_clusters)
ys = [i + x + (i*x)**2 for i in range(all_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(all_merg_f['Neighborhood Latitude'], all_merg_f['Neighborhood Longitude'], all_merg_f['Neighborhood'], all_merg_f['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(all_map_clusters)
       
all_map_clusters

### Cluster 0

### Bars = 11

### Pubs = 8

### Breweries = 2

In [144]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==0) & (all_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
4,Central Bay Street,0.015385,0.0,0.0,0,43.657952,-79.387383,Duke's Refresher + Bar,43.65898,-79.382949,Bar
7,Commerce Court / Victoria Hotel,0.02,0.01,0.0,0,43.648198,-79.379817,Boxcar Social Temperance,43.650557,-79.381956,Bar
7,Commerce Court / Victoria Hotel,0.02,0.01,0.0,0,43.648198,-79.379817,Earls Kitchen & Bar,43.647946,-79.383706,Bar
11,First Canadian Place / Underground city,0.03,0.01,0.0,0,43.648429,-79.38228,Earls Kitchen & Bar,43.647946,-79.383706,Bar
11,First Canadian Place / Underground city,0.03,0.01,0.0,0,43.648429,-79.38228,Boxcar Social Temperance,43.650557,-79.381956,Bar
11,First Canadian Place / Underground city,0.03,0.01,0.0,0,43.648429,-79.38228,Loose Moose,43.645281,-79.383966,Bar
13,"Garden District, Ryerson",0.01,0.01,0.0,0,43.657162,-79.378937,Duke's Refresher + Bar,43.65898,-79.382949,Bar
23,Queen's Park / Ontario Provincial Government,0.032258,0.0,0.0,0,43.662301,-79.389494,SUDS,43.65988,-79.394712,Bar
25,Richmond / Adelaide / King,0.020619,0.0,0.0,0,43.650571,-79.384568,Boxcar Social Temperance,43.650557,-79.381956,Bar
25,Richmond / Adelaide / King,0.020619,0.0,0.0,0,43.650571,-79.384568,Earls Kitchen & Bar,43.647946,-79.383706,Bar


In [145]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==0) & (all_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
6,Church and Wellesley,0.0,0.027027,0.0,0,43.66586,-79.38316,Churchmouse & Firkin,43.664632,-79.380406,Pub
6,Church and Wellesley,0.0,0.027027,0.0,0,43.66586,-79.38316,Bishop and Belcher,43.670096,-79.382354,Pub
7,Commerce Court / Victoria Hotel,0.02,0.01,0.0,0,43.648198,-79.379817,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
11,First Canadian Place / Underground city,0.03,0.01,0.0,0,43.648429,-79.38228,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
13,"Garden District, Ryerson",0.01,0.01,0.0,0,43.657162,-79.378937,Imperial Pub,43.656254,-79.378955,Pub
31,Stn A PO Boxes,0.0,0.010526,0.0,0,43.646435,-79.374846,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub
36,The Danforth West / Riverdale,0.0,0.023256,0.023256,0,43.679557,-79.352188,The Auld Spot Pub,43.677335,-79.35313,Pub
37,Toronto Dominion Centre / Design Exchange,0.01,0.01,0.0,0,43.647177,-79.381576,Walrus Pub & Beer Hall,43.647375,-79.379515,Pub


In [146]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==0) & (all_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
8,Davisville,0.0,0.0,0.029412,0,43.704324,-79.38879,Granite Brewery,43.707991,-79.389943,Brewery
36,The Danforth West / Riverdale,0.0,0.023256,0.023256,0,43.679557,-79.352188,Louis Cifer Brew Works,43.677663,-79.351313,Brewery


### Cluster 1

### Bars = 0

### Pubs = 3

### Breweries = 0

In [147]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==1) & (all_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


In [148]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==1) & (all_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
33,Summerhill West / Rathnelly / South Hill / For...,0.0,0.125,0.0,1,43.686412,-79.400049,Fionn MacCool's,43.687921,-79.394783,Pub
33,Summerhill West / Rathnelly / South Hill / For...,0.0,0.125,0.0,1,43.686412,-79.400049,Scallywags,43.687982,-79.394676,Pub
35,The Beaches,0.0,0.2,0.0,1,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub


In [149]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==1) & (all_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


### Cluster 2

### Bars = 15

### Pubs = 1

### Breweries = 1

In [150]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==2) & (all_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,Brockton / Parkdale Village / Exhibition Place,0.041667,0.0,0.0,2,43.636847,-79.428191,Pharmacy,43.63809,-79.43181,Bar
3,CN Tower / King and Spadina / Railway Lands / ...,0.058824,0.0,0.0,2,43.628947,-79.39442,Market@416,43.631653,-79.39451,Bar
15,High Park / The Junction South,0.08,0.0,0.0,2,43.661608,-79.464763,Hole in the Wall,43.665296,-79.465118,Bar
15,High Park / The Junction South,0.08,0.0,0.0,2,43.661608,-79.464763,Shoxs,43.665353,-79.463563,Bar
17,Kensington Market / Chinatown / Grange Park,0.048387,0.0,0.0,2,43.653206,-79.400049,Cold Tea,43.654193,-79.401075,Bar
17,Kensington Market / Chinatown / Grange Park,0.048387,0.0,0.0,2,43.653206,-79.400049,Trinity Common,43.65659,-79.402761,Bar
17,Kensington Market / Chinatown / Grange Park,0.048387,0.0,0.0,2,43.653206,-79.400049,The Supermarket,43.65668,-79.402954,Bar
19,Little Portugal / Trinity,0.116279,0.0,0.023256,2,43.647927,-79.41975,Reposado,43.647321,-79.420032,Bar
19,Little Portugal / Trinity,0.116279,0.0,0.023256,2,43.647927,-79.41975,The Communist's Daughter,43.649362,-79.420963,Bar
19,Little Portugal / Trinity,0.116279,0.0,0.023256,2,43.647927,-79.41975,Dakota Tavern,43.64968,-79.420838,Bar


In [151]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==2) & (all_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
38,University of Toronto / Harbord,0.057143,0.028571,0.0,2,43.662696,-79.400049,East of Brunswick,43.665609,-79.403324,Pub


In [152]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==2) & (all_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
19,Little Portugal / Trinity,0.116279,0.0,0.023256,2,43.647927,-79.41975,Bellwoods Brewery,43.647097,-79.419955,Brewery


### Cluster 3

### Bars = 4

### Pubs = 0

### Breweries = 7

In [153]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==3) & (all_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
10,Dufferin / Dovercourt Village,0.066667,0.0,0.066667,3,43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
14,Harbourfront East / Union Station / Toronto Is...,0.02,0.0,0.03,3,43.640816,-79.381752,Corks Beer & Wine Bar,43.642493,-79.38154,Bar
14,Harbourfront East / Union Station / Toronto Is...,0.02,0.0,0.03,3,43.640816,-79.381752,The Rec Room,43.64111,-79.386763,Bar
32,Studio District,0.02439,0.0,0.04878,3,43.659526,-79.340923,The Roy Public House,43.660452,-79.342994,Bar


In [154]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==3) & (all_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category


In [155]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==3) & (all_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
2,Business reply mail Processing CentrE,0.0,0.0,0.052632,3,43.662744,-79.321558,Rorschach Brewing Co.,43.663483,-79.319824,Brewery
10,Dufferin / Dovercourt Village,0.066667,0.0,0.066667,3,43.669005,-79.442259,Blood Brothers Brewing,43.669944,-79.436533,Brewery
14,Harbourfront East / Union Station / Toronto Is...,0.02,0.0,0.03,3,43.640816,-79.381752,Steam Whistle Brewing,43.641752,-79.387089,Brewery
14,Harbourfront East / Union Station / Toronto Is...,0.02,0.0,0.03,3,43.640816,-79.381752,Labatt Breweries of Canada,43.638282,-79.380378,Brewery
14,Harbourfront East / Union Station / Toronto Is...,0.02,0.0,0.03,3,43.640816,-79.381752,Amsterdam Brewhouse,43.638122,-79.384803,Brewery
32,Studio District,0.02439,0.0,0.04878,3,43.659526,-79.340923,Avling Kitchen & Brewery,43.661515,-79.338117,Brewery
32,Studio District,0.02439,0.0,0.04878,3,43.659526,-79.340923,Saulter Street Brewery,43.658412,-79.346392,Brewery


### Cluster 4

### Bars = 1

### Pubs = 9

### Breweries = 1

In [156]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==4) & (all_merg_f['Venue Category'] == 'Bar') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
28,Runnymede / Swansea,0.02439,0.04878,0.0,4,43.651571,-79.48445,A Dark Horse,43.649533,-79.483056,Bar


In [157]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==4) & (all_merg_f['Venue Category'] == 'Pub') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
16,India Bazaar / The Beaches West,0.0,0.05,0.05,4,43.668999,-79.315572,Murphy's Law,43.667319,-79.312656,Pub
24,Regent Park / Harbourfront,0.0,0.06383,0.0,4,43.65426,-79.360636,Dominion Pub and Kitchen,43.656919,-79.358967,Pub
24,Regent Park / Harbourfront,0.0,0.06383,0.0,4,43.65426,-79.360636,Mill St. Brew Pub,43.650353,-79.358489,Pub
24,Regent Park / Harbourfront,0.0,0.06383,0.0,4,43.65426,-79.360636,The Aviary,43.653634,-79.354662,Pub
28,Runnymede / Swansea,0.02439,0.04878,0.0,4,43.651571,-79.48445,Bryden's Pub,43.649259,-79.484651,Pub
28,Runnymede / Swansea,0.02439,0.04878,0.0,4,43.651571,-79.48445,My Place - a Canadian Pub,43.648458,-79.485187,Pub
30,St. James Town / Cabbagetown,0.0,0.045455,0.0,4,43.667967,-79.367675,Stout Irish Pub,43.663891,-79.36903,Pub
30,St. James Town / Cabbagetown,0.0,0.045455,0.0,4,43.667967,-79.367675,The Flying Beaver Pubaret,43.664829,-79.368292,Pub
34,The Annex / North Midtown / Yorkville,0.0,0.045455,0.0,4,43.67271,-79.405678,Pour House,43.675641,-79.403821,Pub


In [158]:
all_merg_f.loc[(all_merg_f['Cluster Labels'] ==4) & (all_merg_f['Venue Category'] == 'Brewery') ]

Unnamed: 0,Neighborhood,Bar,Pub,Brewery,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
16,India Bazaar / The Beaches West,0.0,0.05,0.05,4,43.668999,-79.315572,Godspeed Brewery,43.67262,-79.319228,Brewery
