# Part one: Toronto Neighbourghood Data

In [56]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import geopy
import matplotlib.cm as cm
import matplotlib.colors as colors

### Scraping Data
Using beautifulsouls package to scrape data from the Toronto neighbourhood Wikipedia page. The data is taken from the table on the webpage.

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = bs(source,'lxml')
table = soup.find('table',{'class':'wikitable sortable'})

A new csv file is created containing the data from the scaraped webpage.

In [3]:
f = open('Toronto_Data.csv', 'w')
    
area = ""
district = ""
town = ""
    
for row in table.findAll("tr"):
    cells = row.findAll("td")
    if len(cells) == 3:
        area = cells[0].find(text=True)
        district = cells[1].findAll(text=True)
        town = cells[2].find(text=True)
        
    for x in range(len(district)):
        postcode_list = district[x].split(",")
        for i in range(len(postcode_list)):
            write_to_file = area.lstrip('\n').strip() + "," + postcode_list[i].lstrip('\n').strip() + "," + town.lstrip('\n').strip() +  "\n"
            f.write(write_to_file)
 
f.close()

### Loading in CSV file
Creating Dataframe containing three columns (Postcode, Borough and Neighbourhood).

In [4]:
Toronto = pd.read_csv("Toronto_Data.csv") 
df = pd.DataFrame(Toronto)
df.columns = ['PostCode', 'Borough','Neighbourhood']
df.head()

Unnamed: 0,PostCode,Borough,Neighbourhood
0,M2A,Not assigned,Not assigned
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M5A,Downtown Toronto,Regent Park


Dropping the Not assigned Boroughs.

In [5]:
df1 = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df1.head()

Unnamed: 0,PostCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Grouping the Borughs with multiple neighbourhoods.

In [6]:
df2 = df1.groupby(['PostCode','Borough'])['Neighbourhood'].apply(','.join).reset_index()
df2.head()

Unnamed: 0,PostCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Replacing values of Not assigned neighbourhood with borough.

In [7]:
NA_rows = df2.Neighbourhood == 'Not assigned'
df2.loc[NA_rows, 'Neighbourhood'] = df2.loc[NA_rows, 'Borough']
df2[NA_rows]

Unnamed: 0,PostCode,Borough,Neighbourhood
85,M7A,Queen's Park,Queen's Park


Cleaned Toronto data has 3 columns and 103 rows.

In [8]:
df_cleaned = df2 
df_cleaned.shape

(103, 3)

## Part 2: Geo Data for the Postcodes

!pip install wget
conda install -c conda-forge geocoder

Downloading File

In [9]:
import wget as w
import geocoder

In [15]:
#url = 'http://cocl.us/Geospatial_data/toronto_coordinates.csv'
#file = w.download(url)
#file

Reading in doownloaded coords from downloaded file.

In [16]:
Geo_Data = pd.read_csv('Geospatial_Coordinates.csv') 
Geo_df = pd.DataFrame(Geo_Data)
print(Geo_df.shape)
Geo_Data.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merging the Postcodes from the Toronto Dataframe with the latitude and longitude data from the Geo data.

In [121]:
toronto_df_temp = df_cleaned.set_index('PostCode')
Geo_temp = Geo_Data.set_index('Postal Code')
toronto_df_coors = pd.concat([toronto_df_temp, Geo_temp], axis=1, join='inner')

toronto_df_coors.index.name = 'PostalCode'
toronto_df_coors.reset_index(inplace=True)

print(toronto_df_coors.shape)
toronto_df_coors.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part 3: Clustering the Neighbourhoods

from geopy.geocoders import Nominatim

conda install -c conda-forge folium

In [122]:
import folium
tor_lat = 43.6532
tor_lng = -79.3832

# Creates map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[tor_lat, tor_lng], zoom_start=10)


for lat, lng, borough, neighbourhood in zip(toronto_df_coors['Latitude'], toronto_df_coors['Longitude'], toronto_df_coors['Borough'], toronto_df_coors['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Conecting to Foursquare

In [123]:
CLIENT_ID = 'EWVS52T0BKWVO45KSXP1HLBSA1KHYI42FMKKNHIQPFJAIAZS' # your Foursquare ID
CLIENT_SECRET = 'HAYSFJVSI2MSXA4MILHUWZI3XO5BE30RL0LLTSPXV5NPMDPJ' # your Foursquare Secret
VERSION = '20191104'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EWVS52T0BKWVO45KSXP1HLBSA1KHYI42FMKKNHIQPFJAIAZS
CLIENT_SECRET:HAYSFJVSI2MSXA4MILHUWZI3XO5BE30RL0LLTSPXV5NPMDPJ


address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

In [124]:
from pandas.io.json import json_normalize


Making API Call

In [260]:
radius = 700
LIMIT = 200

venues = []

for lat, long, post, borough, neighborhood in zip(toronto_df_coors['Latitude'], toronto_df_coors['Longitude'], toronto_df_coors['PostalCode'], toronto_df_coors['Borough'], toronto_df_coors['Neighbourhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

Inserting venue name, latitude, longitude, and category into dataframe.

In [261]:
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Postcode', 'Borough', 'Neighbourhood', 'BoroughLat', 'BoroughLong', 'Venue', 'VenueLat', 'VenueLong', 'Category']

print(venues_df.shape)
venues_df.head()

(3466, 9)


Unnamed: 0,Postcode,Borough,Neighbourhood,BoroughLat,BoroughLong,Venue,VenueLat,VenueLong,Category
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
3,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Tim Hortons,43.802,-79.198169,Coffee Shop
4,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Lee Valley,43.803161,-79.199681,Hobby Shop


Using one hot encoding for the venues to see the occurence in each neighbourhood.

In [262]:
venues_df.groupby(["Postcode", "Borough", "Neighbourhood"]).count()
df_onehot = pd.get_dummies(venues_df[['Category']], prefix="", prefix_sep="")

df_onehot['Postcode'] = venues_df['Postcode']
df_onehot['Borough'] = venues_df['Borough'] 
df_onehot['Neighbourhood'] = venues_df['Neighbourhood'] 

#df_onehot.insert(0, df_onehot.pop(df_onehot.index('Postcode')))
cols = list(df_onehot)
# move the column to head of list using index, pop and insert
cols.insert(0, cols.pop(cols.index('Postcode')))
#cols.insert(1, cols.pop(cols.index('Borough')))
#cols.insert(2, cols.pop(cols.index('Neighbourhood')))
df_oh_order = pd.DataFrame(cols)
Toronto_Fdata = df_onehot.ix[:, cols]
Toronto_Fdata


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  from ipykernel import kernelapp as app


Unnamed: 0,Postcode,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Borough,Neighbourhood
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
1,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
2,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
3,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
4,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
5,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
6,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Rouge,Malvern"
7,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Highland Creek,Rouge Hill,Port Union"
8,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Highland Creek,Rouge Hill,Port Union"
9,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Scarborough,"Highland Creek,Rouge Hill,Port Union"


Grouping by neighbourhood.

In [263]:
Toronto_grouped = df_onehot.groupby('Neighbourhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.010000,0.00,0.000000,0.000000,0.000000,0.010000,0.0,0.000000,0.000000,0.000000
1,Agincourt,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
4,"Alderwood,Long Branch",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
5,"Bathurst Manor,Downsview North,Wilson Heights",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.050000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
6,Bayview Village,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
7,"Bedford Park,Lawrence Manor East",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
8,Berczy Park,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.010000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
9,"Birch Cliff,Cliffside West",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,...,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000


In [264]:
Toronto_grouped.shape

(102, 322)

Printing each neighborhood along with the top 5 most common venues

In [265]:
num_top_venues = 10

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
                 venue  freq
0          Coffee Shop  0.07
1                 Café  0.06
2     Sushi Restaurant  0.04
3                  Bar  0.04
4     Asian Restaurant  0.03
5           Steakhouse  0.03
6      Thai Restaurant  0.03
7  American Restaurant  0.03
8              Theater  0.03
9           Restaurant  0.03


----Agincourt----
                       venue  freq
0            Badminton Court  0.11
1                  Pool Hall  0.11
2  Latin American Restaurant  0.11
3             Breakfast Spot  0.11
4            Motorcycle Shop  0.11
5               Skating Rink  0.11
6                Coffee Shop  0.11
7             Sandwich Place  0.11
8                     Lounge  0.11
9    New American Restaurant  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                  venue  freq
0    Chinese Restaurant  0.14
1           Pizza Place  0.14
2             BBQ Joint  0.07
3              Pharmacy  0.07
4  Caribbean Restaurant  0.07
5

                     venue  freq
0              Pizza Place  0.50
1                  Theater  0.25
2        Convenience Store  0.25
3                      ATM  0.00
4             Music School  0.00
5             Noodle House  0.00
6                Nightclub  0.00
7  New American Restaurant  0.00
8             Neighborhood  0.00
9               Nail Salon  0.00


----Commerce Court,Victoria Hotel----
                 venue  freq
0          Coffee Shop  0.11
1                Hotel  0.07
2                 Café  0.07
3           Restaurant  0.04
4           Steakhouse  0.03
5         Concert Hall  0.03
6  American Restaurant  0.03
7   Seafood Restaurant  0.03
8        Deli / Bodega  0.03
9   Italian Restaurant  0.03


----Davisville----
                  venue  freq
0           Pizza Place  0.07
1                   Gym  0.05
2  Fast Food Restaurant  0.05
3    Italian Restaurant  0.05
4        Sandwich Place  0.05
5           Coffee Shop  0.05
6                  Café  0.05
7          Desser

                   venue  freq
0            Coffee Shop  0.15
1                   Café  0.05
2             Restaurant  0.05
3                   Park  0.05
4                    Pub  0.04
5                Theater  0.04
6                 Bakery  0.04
7        Thai Restaurant  0.03
8     Italian Restaurant  0.03
9  Performing Arts Venue  0.03


----High Park,The Junction South----
                venue  freq
0                 Bar  0.09
1  Italian Restaurant  0.07
2                Café  0.07
3     Thai Restaurant  0.05
4                Park  0.05
5         Flea Market  0.03
6              Bakery  0.03
7      Discount Store  0.03
8  Mexican Restaurant  0.03
9       Grocery Store  0.03


----Highland Creek,Rouge Hill,Port Union----
                     venue  freq
0           Breakfast Spot  0.50
1                      Bar  0.25
2             Burger Joint  0.25
3                      ATM  0.00
4              Music Venue  0.00
5                   Office  0.00
6             Noodle House  0.00
7

                 venue  freq
0          Coffee Shop  0.08
1           Restaurant  0.04
2  American Restaurant  0.04
3         Gourmet Shop  0.04
4            Gift Shop  0.04
5            Bookstore  0.04
6       Breakfast Spot  0.04
7                 Café  0.04
8      Thai Restaurant  0.04
9                  Bar  0.04


----Parkwoods----
                     venue  freq
0        Food & Drink Shop  0.33
1                     Park  0.33
2                Pet Store  0.33
3                      ATM  0.00
4             Music School  0.00
5             Noodle House  0.00
6                Nightclub  0.00
7  New American Restaurant  0.00
8             Neighborhood  0.00
9               Nail Salon  0.00


----Queen's Park----
                venue  freq
0         Coffee Shop  0.19
1      Sandwich Place  0.06
2  Italian Restaurant  0.05
3                 Gym  0.04
4                Café  0.04
5  Falafel Restaurant  0.03
6               Diner  0.03
7                Park  0.03
8           Gastropub  

            venue  freq
0    Home Service  0.12
1     Coffee Shop  0.12
2         Butcher  0.12
3            Park  0.12
4   Grocery Store  0.12
5        Pharmacy  0.12
6     Pizza Place  0.12
7  Discount Store  0.12
8      Playground  0.00
9          Office  0.00


----Woburn----
                     venue  freq
0                     Park   0.4
1              Coffee Shop   0.4
2         Business Service   0.2
3                      ATM   0.0
4               Nail Salon   0.0
5              Opera House   0.0
6                   Office   0.0
7             Noodle House   0.0
8                Nightclub   0.0
9  New American Restaurant   0.0


----Woodbine Gardens,Parkview Hill----
                  venue  freq
0  Fast Food Restaurant  0.14
1           Pizza Place  0.14
2        Breakfast Spot  0.07
3    Rock Climbing Spot  0.07
4    Athletics & Sports  0.07
5              Pharmacy  0.07
6  Gym / Fitness Center  0.07
7             Gastropub  0.07
8              Bus Line  0.07
9          Inte

We define a function that sorts into descending order.

In [266]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

New Dataframe  with the most common venues for each neighbourhood from one to ten.

In [305]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

#create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = Toronto_grouped['Neighbourhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)
  
neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Sushi Restaurant,Bar,Thai Restaurant,Steakhouse,Hotel,Restaurant,Theater,Asian Restaurant
1,Agincourt,Motorcycle Shop,Pool Hall,Badminton Court,Breakfast Spot,Sandwich Place,Coffee Shop,Lounge,Skating Rink,Latin American Restaurant,Drugstore
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Chinese Restaurant,Pizza Place,Gym,Caribbean Restaurant,Udon Restaurant,Park,Noodle House,Pharmacy,Shop & Service,Fast Food Restaurant
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Pizza Place,Grocery Store,Fast Food Restaurant,Fried Chicken Joint,Hardware Store,Sandwich Place,Beer Store,Pharmacy,Drugstore,Discount Store
4,"Alderwood,Long Branch",Pizza Place,Convenience Store,Pool,Sandwich Place,Coffee Shop,Gym,Pub,Skating Rink,Pharmacy,Gas Station


In [None]:
Using K-meqns clustering to fin

In [306]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 6

Toronto_grouped_clustering = Toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 2, 2, 2, 0, 2, 0, 0, 0])

In [307]:
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = toronto_df_coors

#merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

Toronto_merged # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,4.0,Fast Food Restaurant,Coffee Shop,Spa,Bus Station,Hobby Shop,Hardware Store,Drugstore,Diner,Discount Store,Dive Bar
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0.0,Breakfast Spot,Burger Joint,Bar,Yoga Studio,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,2.0,Pizza Place,Fast Food Restaurant,Intersection,Rental Car Location,Breakfast Spot,Sports Bar,Medical Center,Beer Store,Electronics Store,Moving Target
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Park,Business Service,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Bakery,Coffee Shop,Indian Restaurant,Thai Restaurant,Bank,Caribbean Restaurant,Hakka Restaurant,Gym / Fitness Center,Flower Shop,Fried Chicken Joint
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,4.0,Fast Food Restaurant,Coffee Shop,Pizza Place,Convenience Store,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,0.0,Coffee Shop,Convenience Store,Hobby Shop,Intersection,Department Store,Sandwich Place,Discount Store,Bus Station,Light Rail Station,Metro Station
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577,0.0,Intersection,Diner,Bus Line,Bakery,Coffee Shop,Park,Soccer Field,Bus Station,Fast Food Restaurant,Metro Station
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476,2.0,Wings Joint,Hardware Store,Chinese Restaurant,Burger Joint,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848,0.0,College Stadium,Photography Studio,Thai Restaurant,Diner,General Entertainment,Skating Rink,Park,Café,Eastern European Restaurant,Dumpling Restaurant


In [308]:
Toronto_merged = Toronto_merged.dropna()
#f = df.fillna(0)
Toronto_merged

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,4.0,Fast Food Restaurant,Coffee Shop,Spa,Bus Station,Hobby Shop,Hardware Store,Drugstore,Diner,Discount Store,Dive Bar
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0.0,Breakfast Spot,Burger Joint,Bar,Yoga Studio,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,2.0,Pizza Place,Fast Food Restaurant,Intersection,Rental Car Location,Breakfast Spot,Sports Bar,Medical Center,Beer Store,Electronics Store,Moving Target
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Park,Business Service,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Bakery,Coffee Shop,Indian Restaurant,Thai Restaurant,Bank,Caribbean Restaurant,Hakka Restaurant,Gym / Fitness Center,Flower Shop,Fried Chicken Joint
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,4.0,Fast Food Restaurant,Coffee Shop,Pizza Place,Convenience Store,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,0.0,Coffee Shop,Convenience Store,Hobby Shop,Intersection,Department Store,Sandwich Place,Discount Store,Bus Station,Light Rail Station,Metro Station
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577,0.0,Intersection,Diner,Bus Line,Bakery,Coffee Shop,Park,Soccer Field,Bus Station,Fast Food Restaurant,Metro Station
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476,2.0,Wings Joint,Hardware Store,Chinese Restaurant,Burger Joint,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848,0.0,College Stadium,Photography Studio,Thai Restaurant,Diner,General Entertainment,Skating Rink,Park,Café,Eastern European Restaurant,Dumpling Restaurant


## Examining Clusters
Map of the neighbourhood clusters in Toronto by colour.

In [309]:
# create map
map_clusters = folium.Map(location=[tor_lat, tor_lng], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
#c_len = len(Toronto_merged['Latitude'])
#cluster1 = int(cluster)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Cluster 1

In [310]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.0,Breakfast Spot,Burger Joint,Bar,Yoga Studio,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
3,Scarborough,0.0,Coffee Shop,Park,Business Service,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,Scarborough,0.0,Bakery,Coffee Shop,Indian Restaurant,Thai Restaurant,Bank,Caribbean Restaurant,Hakka Restaurant,Gym / Fitness Center,Flower Shop,Fried Chicken Joint
6,Scarborough,0.0,Coffee Shop,Convenience Store,Hobby Shop,Intersection,Department Store,Sandwich Place,Discount Store,Bus Station,Light Rail Station,Metro Station
7,Scarborough,0.0,Intersection,Diner,Bus Line,Bakery,Coffee Shop,Park,Soccer Field,Bus Station,Fast Food Restaurant,Metro Station
9,Scarborough,0.0,College Stadium,Photography Studio,Thai Restaurant,Diner,General Entertainment,Skating Rink,Park,Café,Eastern European Restaurant,Dumpling Restaurant
10,Scarborough,0.0,Fast Food Restaurant,Indian Restaurant,Electronics Store,Gym,Bakery,Light Rail Station,Chinese Restaurant,Pet Store,Vietnamese Restaurant,Coffee Shop
11,Scarborough,0.0,Middle Eastern Restaurant,Pizza Place,Print Shop,Burger Joint,Smoke Shop,Fish Market,Moving Target,Seafood Restaurant,Korean Restaurant,Bakery
12,Scarborough,0.0,Motorcycle Shop,Pool Hall,Badminton Court,Breakfast Spot,Sandwich Place,Coffee Shop,Lounge,Skating Rink,Latin American Restaurant,Drugstore
18,North York,0.0,Clothing Store,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Baseball Field,Electronics Store,Sporting Goods Shop,Restaurant,Chinese Restaurant,Department Store


## Cluster 2

In [312]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,North York,1.0,Cafeteria,College Theater,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant


## Cluster 3

In [313]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,2.0,Pizza Place,Fast Food Restaurant,Intersection,Rental Car Location,Breakfast Spot,Sports Bar,Medical Center,Beer Store,Electronics Store,Moving Target
8,Scarborough,2.0,Wings Joint,Hardware Store,Chinese Restaurant,Burger Joint,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
13,Scarborough,2.0,Pharmacy,Pizza Place,Intersection,Bank,Fried Chicken Joint,Thai Restaurant,Italian Restaurant,Seafood Restaurant,Noodle House,Shopping Mall
14,Scarborough,2.0,Chinese Restaurant,Pizza Place,Gym,Caribbean Restaurant,Udon Restaurant,Park,Noodle House,Pharmacy,Shop & Service,Fast Food Restaurant
15,Scarborough,2.0,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Other Great Outdoors,Auto Garage,Bubble Tea Shop,Sandwich Place,Noodle House,Grocery Store,Pharmacy
17,North York,2.0,Diner,Fast Food Restaurant,Bakery,Pharmacy,Housing Development,Chinese Restaurant,Dumpling Restaurant,Dive Bar,Dog Run,Doner Restaurant
19,North York,2.0,Bank,Japanese Restaurant,Skating Rink,Chinese Restaurant,Grocery Store,Café,Yoga Studio,Drugstore,Dive Bar,Dog Run
23,North York,2.0,Park,Tennis Court,Intersection,Bank,Convenience Store,Gym,Pet Store,Yoga Studio,Dumpling Restaurant,Dive Bar
24,North York,2.0,Pharmacy,Butcher,Discount Store,Home Service,Pizza Place,Coffee Shop,Grocery Store,Park,Dumpling Restaurant,Dive Bar
25,North York,2.0,Pet Store,Food & Drink Shop,Park,Yoga Studio,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore


## Cluster 4

In [314]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,North York,3.0,Baseball Field,Middle Eastern Restaurant,Home Service,Vietnamese Restaurant,Business Service,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore
79,North York,3.0,Home Service,Business Service,Construction & Landscaping,Bakery,Park,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop
91,Etobicoke,3.0,Home Service,Baseball Field,Yoga Studio,Electronics Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant


## Cluster 5

In [315]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,4.0,Fast Food Restaurant,Coffee Shop,Spa,Bus Station,Hobby Shop,Hardware Store,Drugstore,Diner,Discount Store,Dive Bar
5,Scarborough,4.0,Fast Food Restaurant,Coffee Shop,Pizza Place,Convenience Store,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop


## Cluster 6

In [316]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 5, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Downtown Toronto,5.0,Park,Playground,Trail,Yoga Studio,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
64,Central Toronto,5.0,Park,Jewelry Store,Sushi Restaurant,Trail,Yoga Studio,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
