In [6]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


In [43]:
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [44]:
soup = BeautifulSoup(data, 'html.parser')

In [45]:
# create three lists to store table data
postalCodeList = []
boroughList = []
neighborhoodList = []

In [46]:
# append the data into the respective lists
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCodeList.append(cells[0].text)
        boroughList.append(cells[1].text)
        neighborhoodList.append(cells[2].text) # avoid new lines in neighborhood cell

In [57]:
# create a new DataFrame from the three lists
toronto_df = pd.DataFrame({"PostalCode": postalCodeList,
                           "Borough": boroughList,
                           "Neighborhood": neighborhoodList})

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


In [63]:
# drop cells with a borough that is Not assigned
toronto_df_dropna = toronto_df[toronto_df.Borough != "Not assigned\n"].reset_index(drop=True)
toronto_df_dropna.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A\n,North York\n,Parkwoods\n
1,M4A\n,North York\n,Victoria Village\n
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights\n"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"


In [68]:
toronto_df_dropna["PostalCode"] = toronto_df_dropna["PostalCode"].str.replace("\n","")
toronto_df_dropna.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,North York\n,Parkwoods\n
1,M2A,North York\n,Victoria Village\n
2,M3A,Downtown Toronto\n,"Regent Park, Harbourfront\n"
3,M4A,North York\n,"Lawrence Manor, Lawrence Heights\n"
4,M5A,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"


In [69]:
toronto_df_dropna["Borough"] = toronto_df_dropna["Borough"].str.replace("\n","")
toronto_df_dropna.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,North York,Parkwoods\n
1,M2A,North York,Victoria Village\n
2,M3A,Downtown Toronto,"Regent Park, Harbourfront\n"
3,M4A,North York,"Lawrence Manor, Lawrence Heights\n"
4,M5A,Downtown Toronto,"Queen's Park, Ontario Provincial Government\n"


In [70]:
toronto_df_dropna["Neighborhood"] = toronto_df_dropna["Neighborhood"].str.replace("\n","")
toronto_df_dropna.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,North York,Parkwoods
1,M2A,North York,Victoria Village
2,M3A,Downtown Toronto,"Regent Park, Harbourfront"
3,M4A,North York,"Lawrence Manor, Lawrence Heights"
4,M5A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [71]:
# group neighborhoods in the same borough
toronto_df_grouped = toronto_df_dropna.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,North York,Parkwoods
1,M1B,Downtown Toronto,"Garden District, Ryerson"
2,M1C,Scarborough,"Guildwood, Morningside, West Hill"
3,M1E,North York,Hillcrest Village
4,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands"


In [75]:
# for Neighborhood="Not assigned", make the value the same as Borough
for index, row in toronto_df_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        
toronto_df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,North York,Parkwoods
1,M1B,Downtown Toronto,"Garden District, Ryerson"
2,M1C,Scarborough,"Guildwood, Morningside, West Hill"
3,M1E,North York,Hillcrest Village
4,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands"


In [74]:
toronto_df_grouped.shape

(103, 3)

## Part 2

In [76]:
# load the coordinates from the csv file on Coursera
coordinates = pd.read_csv("Geospatial_Coordinates.csv")
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [77]:
coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [85]:
# merge two table on the column "PostalCode"
toronto_df1 = toronto_df_grouped.merge(coordinates, on="PostalCode", how="left")
toronto_df1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1A,North York,Parkwoods,,
1,M1B,Downtown Toronto,"Garden District, Ryerson",43.806686,-79.194353
2,M1C,Scarborough,"Guildwood, Morningside, West Hill",43.784535,-79.160497
3,M1E,North York,Hillcrest Village,43.763573,-79.188711
4,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917


In [86]:

# create a new test dataframe
column_names = ["PostalCode", "Borough", "Neighborhood", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df1[toronto_df1["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5G,North York,Downsview,43.657952,-79.387383
1,M2H,North York,Downsview,43.803762,-79.363452
2,M4B,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.706397,-79.309937
3,M1J,East Toronto,Studio District,43.744734,-79.239476
4,M4G,North York,Bayview Village,43.70906,-79.363452
5,M4M,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.659526,-79.340923
6,M9L,Downtown Toronto,"University of Toronto, Harbord",43.756303,-79.565963
7,M1B,Downtown Toronto,"Garden District, Ryerson",43.806686,-79.194353
8,M5A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.65426,-79.360636


In [87]:
toronto_df_new = toronto_df1.dropna(axis=0)
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
1,M1B,Downtown Toronto,"Garden District, Ryerson",43.806686,-79.194353
2,M1C,Scarborough,"Guildwood, Morningside, West Hill",43.784535,-79.160497
3,M1E,North York,Hillcrest Village,43.763573,-79.188711
4,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917
5,M1H,North York,"York Mills, Silver Hills",43.773136,-79.239476


## Part 3

In [88]:
address = 'Toronto'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [89]:

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [90]:
# filter borough names that contain the word Toronto
borough_names = list(toronto_df_new.Borough.unique())

borough_with_toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_with_toronto.append(x)
        
borough_with_toronto

['Downtown Toronto', 'East Toronto', 'West Toronto', 'Central Toronto']

In [91]:
# create a new DataFrame with only boroughs that contain the word Toronto
toronto_df_new = toronto_df_new[toronto_df_new['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(toronto_df_new.shape)
toronto_df_new.head()

(25, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Downtown Toronto,"Garden District, Ryerson",43.806686,-79.194353
1,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917
2,M1J,East Toronto,Studio District,43.744734,-79.239476
3,M1M,West Toronto,"Runnymede, Swansea",43.716316,-79.239476
4,M1P,Downtown Toronto,Church and Wellesley,43.75741,-79.273304


In [92]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [95]:

# define Foursquare Credentials and Version
CLIENT_ID = 'AROKBSC2ITZXX3VJINCAVEP5UWGV3U30P534PETOIQFHXB33' # your Foursquare ID
CLIENT_SECRET = 'JMFT4CUCXIGG5PJMY41ZJANODXY2RXOXVD3OGKQCGJDBHB5Z' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AROKBSC2ITZXX3VJINCAVEP5UWGV3U30P534PETOIQFHXB33
CLIENT_SECRET:JMFT4CUCXIGG5PJMY41ZJANODXY2RXOXVD3OGKQCGJDBHB5Z


In [96]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['PostalCode'], toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [97]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(452, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M1B,Downtown Toronto,"Garden District, Ryerson",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop
2,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917,Tim Hortons,43.770827,-79.223078,Coffee Shop
3,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917,Korean Grill House,43.770812,-79.214502,Korean Restaurant
4,M1J,East Toronto,Studio District,43.744734,-79.239476,McCowan Park,43.745089,-79.239336,Playground


In [98]:
#how many venues were returned for each PostalCode
venues_df.groupby(["PostalCode", "Borough", "Neighborhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M1B,Downtown Toronto,"Garden District, Ryerson",1,1,1,1,1,1
M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",3,3,3,3,3,3
M1J,East Toronto,Studio District,1,1,1,1,1,1
M1M,West Toronto,"Runnymede, Swansea",3,3,3,3,3,3
M1P,Downtown Toronto,Church and Wellesley,5,5,5,5,5,5
M2L,Central Toronto,"North Toronto West, Lawrence Park",1,1,1,1,1,1
M2N,Downtown Toronto,Rosedale,34,34,34,34,34,34
M2P,East Toronto,"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",4,4,4,4,4,4
M3A,Downtown Toronto,"Regent Park, Harbourfront",3,3,3,3,3,3
M3C,Downtown Toronto,Berczy Park,23,23,23,23,23,23


In [99]:
#how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 145 uniques categories.


In [100]:
venues_df['VenueCategory'].unique()[:50]

array(['Fast Food Restaurant', 'Coffee Shop', 'Korean Restaurant',
       'Playground', 'Motel', 'American Restaurant', 'Indian Restaurant',
       'Chinese Restaurant', 'Pet Store', 'Vietnamese Restaurant',
       'Martial Arts Dojo', 'Grocery Store', 'Café', 'Ramen Restaurant',
       'Movie Theater', 'Steakhouse', 'Indonesian Restaurant',
       'Ice Cream Shop', 'Arts & Crafts Store', 'Plaza',
       'Japanese Restaurant', 'Shopping Mall', 'Juice Bar', 'Lounge',
       'Sushi Restaurant', 'Sandwich Place', 'Discount Store',
       'Electronics Store', 'Middle Eastern Restaurant', 'Restaurant',
       'Pizza Place', 'Bubble Tea Shop', 'Hotel', 'Convenience Store',
       'Park', 'Bar', 'Moving Target', 'Food & Drink Shop',
       'Construction & Landscaping', 'Italian Restaurant', 'Gym',
       'Clothing Store', 'Bike Shop', 'Sporting Goods Shop',
       'Art Gallery', 'Beer Store', 'Supermarket', 'Asian Restaurant',
       'Dim Sum Restaurant', 'Bus Line'], dtype=object)

In [101]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(452, 148)


Unnamed: 0,PostalCode,Borough,Neighborhoods,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beach,Beer Bar,Beer Store,Bike Shop,Bistro,Bookstore,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Business Service,Butcher,Café,Candy Store,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Eastern European Restaurant,Electronics Store,Event Space,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,Historic Site,Home Service,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Lounge,Martial Arts Dojo,Middle Eastern Restaurant,Mobile Phone Shop,Motel,Movie Theater,Moving Target,Museum,Neighborhood,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pub,Ramen Restaurant,Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Spa,Speakeasy,Sporting Goods Shop,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Tailor Shop,Tea Room,Thai Restaurant,Theater,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Yoga Studio
0,M1B,Downtown Toronto,"Garden District, Ryerson",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M1J,East Toronto,Studio District,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [102]:
#group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(25, 148)


Unnamed: 0,PostalCode,Borough,Neighborhoods,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beach,Beer Bar,Beer Store,Bike Shop,Bistro,Bookstore,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Business Service,Butcher,Café,Candy Store,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Eastern European Restaurant,Electronics Store,Event Space,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,Historic Site,Home Service,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Lounge,Martial Arts Dojo,Middle Eastern Restaurant,Mobile Phone Shop,Motel,Movie Theater,Moving Target,Museum,Neighborhood,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pub,Ramen Restaurant,Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Spa,Speakeasy,Sporting Goods Shop,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Tailor Shop,Tea Room,Thai Restaurant,Theater,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Yoga Studio
0,M1B,Downtown Toronto,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1J,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1M,West Toronto,"Runnymede, Swansea",0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1P,Downtown Toronto,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
5,M2L,Central Toronto,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M2N,Downtown Toronto,Rosedale,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.058824,0.0,0.029412,0.0,0.088235,0.058824,0.0,0.0,0.0,0.058824,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0
7,M2P,East Toronto,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M3A,Downtown Toronto,"Regent Park, Harbourfront",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M3C,Downtown Toronto,Berczy Park,0.0,0.0,0.043478,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.043478,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [103]:
#create the new dataframe and display the top 10 venues for each PostalCode.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted


(25, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Downtown Toronto,"Garden District, Ryerson",Fast Food Restaurant,Yoga Studio,Eastern European Restaurant,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Creperie
1,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Korean Restaurant,Cocktail Bar,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega
2,M1J,East Toronto,Studio District,Playground,Yoga Studio,Creperie,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Coworking Space
3,M1M,West Toronto,"Runnymede, Swansea",Motel,American Restaurant,Deli / Bodega,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Creperie
4,M1P,Downtown Toronto,Church and Wellesley,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Yoga Studio,Deli / Bodega,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
5,M2L,Central Toronto,"North Toronto West, Lawrence Park",Martial Arts Dojo,Yoga Studio,Cocktail Bar,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega
6,M2N,Downtown Toronto,Rosedale,Ramen Restaurant,Coffee Shop,Sandwich Place,Café,Restaurant,Sushi Restaurant,Pizza Place,Hotel,Bubble Tea Shop,Ice Cream Shop
7,M2P,East Toronto,"Business reply mail Processing Centre, South C...",Park,Moving Target,Convenience Store,Bar,Deli / Bodega,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
8,M3A,Downtown Toronto,"Regent Park, Harbourfront",Park,Construction & Landscaping,Food & Drink Shop,Deli / Bodega,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Creperie
9,M3C,Downtown Toronto,Berczy Park,Coffee Shop,Asian Restaurant,Restaurant,Beer Store,Gym,Discount Store,Italian Restaurant,Dim Sum Restaurant,Bike Shop,Bus Line


In [104]:
#Run k-means to cluster the Toronto areas into 5 clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 3, 0, 0, 4, 0, 0, 0, 0], dtype=int32)

In [105]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = toronto_df_new.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head() 

(25, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Downtown Toronto,"Garden District, Ryerson",43.806686,-79.194353,2,Fast Food Restaurant,Yoga Studio,Eastern European Restaurant,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Creperie
1,M1G,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Cocktail Bar,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega
2,M1J,East Toronto,Studio District,43.744734,-79.239476,3,Playground,Yoga Studio,Creperie,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Coworking Space
3,M1M,West Toronto,"Runnymede, Swansea",43.716316,-79.239476,0,Motel,American Restaurant,Deli / Bodega,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Creperie
4,M1P,Downtown Toronto,Church and Wellesley,43.75741,-79.273304,0,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Yoga Studio,Deli / Bodega,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop


In [106]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(25, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,M3M,Central Toronto,"Moore Park, Summerhill East",43.728496,-79.495697,0,Business Service,Food Truck,Baseball Field,Home Service,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
18,M5A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.65426,-79.360636,0,Coffee Shop,Pub,Park,Bakery,Restaurant,Breakfast Spot,Café,Theater,Event Space,Hotel
17,M4M,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.659526,-79.340923,0,Café,Bakery,Coffee Shop,American Restaurant,Brewery,Gastropub,Fish Market,Neighborhood,Middle Eastern Restaurant,Latin American Restaurant
16,M4L,West Toronto,"Parkdale, Roncesvalles",43.668999,-79.315572,0,Pizza Place,Fast Food Restaurant,Movie Theater,Italian Restaurant,Sandwich Place,Brewery,Restaurant,Burrito Place,Pub,Ice Cream Shop
15,M4H,Downtown Toronto,"Commerce Court, Victoria Hotel",43.705369,-79.349372,0,Indian Restaurant,Yoga Studio,Sandwich Place,Grocery Store,Gym,Fast Food Restaurant,Discount Store,Intersection,Liquor Store,Coffee Shop
14,M4E,Downtown Toronto,"Richmond, Adelaide, King",43.676357,-79.293031,0,Neighborhood,Asian Restaurant,Health Food Store,Trail,Pizza Place,Pub,Concert Hall,Construction & Landscaping,Comfort Food Restaurant,Convenience Store
13,M3N,Downtown Toronto,Stn A PO Boxes,43.761631,-79.520999,0,Discount Store,Liquor Store,Grocery Store,Gym / Fitness Center,Yoga Studio,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
23,M6M,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.691116,-79.476013,0,Skating Rink,Discount Store,Sandwich Place,Turkish Restaurant,Convenience Store,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
11,M3L,Central Toronto,"The Annex, North Midtown, Yorkville",43.739015,-79.506944,0,Shopping Mall,Bank,Park,Grocery Store,Hotel,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
19,M5E,West Toronto,"Dufferin, Dovercourt Village",43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Restaurant,Seafood Restaurant,Beer Bar,Bakery,Cheese Shop,Café,Liquor Store,Park


In [107]:
#visualize the resulting clusters

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [108]:
#cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Central Toronto,0,Business Service,Food Truck,Baseball Field,Home Service,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
18,Downtown Toronto,0,Coffee Shop,Pub,Park,Bakery,Restaurant,Breakfast Spot,Café,Theater,Event Space,Hotel
17,Downtown Toronto,0,Café,Bakery,Coffee Shop,American Restaurant,Brewery,Gastropub,Fish Market,Neighborhood,Middle Eastern Restaurant,Latin American Restaurant
16,West Toronto,0,Pizza Place,Fast Food Restaurant,Movie Theater,Italian Restaurant,Sandwich Place,Brewery,Restaurant,Burrito Place,Pub,Ice Cream Shop
15,Downtown Toronto,0,Indian Restaurant,Yoga Studio,Sandwich Place,Grocery Store,Gym,Fast Food Restaurant,Discount Store,Intersection,Liquor Store,Coffee Shop
14,Downtown Toronto,0,Neighborhood,Asian Restaurant,Health Food Store,Trail,Pizza Place,Pub,Concert Hall,Construction & Landscaping,Comfort Food Restaurant,Convenience Store
13,Downtown Toronto,0,Discount Store,Liquor Store,Grocery Store,Gym / Fitness Center,Yoga Studio,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
23,Central Toronto,0,Skating Rink,Discount Store,Sandwich Place,Turkish Restaurant,Convenience Store,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
11,Central Toronto,0,Shopping Mall,Bank,Park,Grocery Store,Hotel,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
19,West Toronto,0,Coffee Shop,Cocktail Bar,Restaurant,Seafood Restaurant,Beer Bar,Bakery,Cheese Shop,Café,Liquor Store,Park


In [109]:
#cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,Downtown Toronto,1,Pizza Place,Yoga Studio,Creperie,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Coworking Space


In [110]:
#cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,2,Fast Food Restaurant,Yoga Studio,Eastern European Restaurant,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Creperie


In [111]:
#cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,East Toronto,3,Playground,Yoga Studio,Creperie,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Coworking Space


In [112]:
#cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Central Toronto,4,Martial Arts Dojo,Yoga Studio,Cocktail Bar,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega


### Most of the neighborhoods fall into Cluster 1. The place are mostly business areas with cafe, supermarkets etc. Cluster 2 and 3 are restaurants, Cluster 4 are playground, Cluster 5 is art place.