# Toronto Neighbors

## Open the csv file postalcodes and coordinates

In [174]:
import pandas as pd
neigh = pd.read_csv('postal_codes_latlon.csv',index_col =0).iloc[:,0:3]


In [175]:
neigh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [176]:
#add Geo-spatial data from the file
latlon= pd.read_csv("http://cocl.us/Geospatial_data")
latlon.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
neighborhoods=pd.merge(neigh, latlon,on='PostalCode')
neighborhoods.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


In [177]:
neighborhoods.shape

(103, 5)

We get only the Boroughs which contains the word Toronto:

In [178]:
toronto = neighborhoods[neighborhoods.Borough.str.contains(pat='Toronto')].reset_index()
toronto.shape

(39, 6)

We got 39 Boroughs

### Creating a map with the neighbors

Red circles for Toronto Boroughs, blue for the rest

In [179]:
import folium

In [180]:
# create map of Toronto using latitude and longitude mean values
map_toronto = folium.Map(location=[neighborhoods.Latitude.mean(), neighborhoods.Longitude.mean()], zoom_start=11)

# add markers to map
blue = neighborhoods[~neighborhoods.Borough.str.contains(pat='Toronto')]

for lat, lng, borough, neighborhood in zip(blue['Latitude'], blue['Longitude'], blue['Borough'], blue['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 
    
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
    
map_toronto

Toronto boroughs are the ones in the center mostly

## Clustering

### Define Foursquare credentials and version

This is the user info for Foursquare queries

In [237]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


In [182]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

###  Define a function for get all the venues given a neighborhoods list

In [183]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Now we create a list with all the venues in Toronto

In [184]:
toronto_venues = getNearbyVenues(toronto['Neighborhood'], 
                                 toronto['Latitude'], 
                                 toronto['Longitude'], 
                                 radius=500)

Harbourfront
Queen's Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
The Danforth West, Riverdale
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
Harbord, University of Toronto
Runnymede, Swansea
Moore Park, Summerhill East
Chinatown, Grange Park, Kensington Market
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
Fir

In [185]:
print(toronto_venues.shape)
toronto_venues

(1699, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.654260,-79.360636,Cooper Koo Family YMCA,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.654260,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.654260,-79.360636,Impact Kitchen,43.656369,-79.356980,Restaurant
5,Harbourfront,43.654260,-79.360636,Dominion Pub and Kitchen,43.656919,-79.358967,Pub
6,Harbourfront,43.654260,-79.360636,Corktown Common,43.655618,-79.356211,Park
7,Harbourfront,43.654260,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
8,Harbourfront,43.654260,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site
9,Harbourfront,43.654260,-79.360636,Distillery Sunday Market,43.650075,-79.361832,Farmers Market


In [186]:
#it is important nto save this, because we do not want to use FourSquare again to get the data
toronto_venues.to_csv('toronto_venues.csv')

Count the venues per Neighborhood:

In [187]:
toronto_venues.groupby('Neighborhood').count()\
    .sort_values(by='Venue', ascending=False)\
    .reset_index().iloc[:,0:2].rename(columns = {'Neighborhood Latitude':'n'})

Unnamed: 0,Neighborhood,n
0,"Adelaide, King, Richmond",100
1,"Design Exchange, Toronto Dominion Centre",100
2,St. James Town,100
3,"Ryerson, Garden District",100
4,"Harbourfront East, Toronto Islands, Union Station",100
5,"Commerce Court, Victoria Hotel",100
6,"First Canadian Place, Underground city",100
7,Stn A PO Boxes 25 The Esplanade,94
8,"Chinatown, Grange Park, Kensington Market",86
9,Central Bay Street,83


#### Unique venue categories:

In [188]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 231 uniques categories.


## Analize each Neiborhood

We will prepare a dataset with all categories in columns and the row will mark one or zero in each case.

In [189]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
cols = list(toronto_onehot.columns)
toronto_onehot.shape

(1699, 231)

In [190]:
# add neighborhood column back to dataframe
toronto_onehot['neigh'] = toronto_venues['Neighborhood']
toronto_onehot.shape

(1699, 232)

In [191]:
toronto_onehot = toronto_onehot[['neigh'] + cols]
toronto_onehot.head()

Unnamed: 0,neigh,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [192]:
toronto_onehot.shape

(1699, 232)

###  Group the venues category mean of ocurrencies by neiborhhod

In [193]:
toronto_grouped = toronto_onehot.groupby('neigh').mean().reset_index()

In [194]:
toronto_grouped

Unnamed: 0,neigh,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,...,0.0,0.0,0.0,0.012048,0.0,0.0,0.012048,0.0,0.0,0.012048
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.046512,0.0,0.069767,0.011628,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.012195,0.0,0.012195


In [195]:
toronto_grouped.shape

(39, 232)

### Let's print each neighborhood along with the top 5 most common venues

In [196]:
num_top_venues = 10

def print_most_common_list(data_grouped, hood, num_top):
    print("----"+hood+"----")
    temp = data_grouped[data_grouped['neigh'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top))
    print('\n')
    
for hood in toronto_grouped['neigh']:
    print_most_common_list(toronto_grouped, hood, num_top_venues)

----Adelaide, King, Richmond----
              venue  freq
0       Coffee Shop  0.07
1              Café  0.04
2        Steakhouse  0.04
3               Bar  0.04
4   Thai Restaurant  0.03
5    Cosmetics Shop  0.03
6        Restaurant  0.03
7  Asian Restaurant  0.03
8            Bakery  0.03
9    Breakfast Spot  0.02


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2            Beer Bar  0.04
3  Seafood Restaurant  0.04
4         Cheese Shop  0.04
5          Steakhouse  0.04
6                Café  0.04
7      Farmers Market  0.04
8              Bakery  0.04
9          Restaurant  0.02


----Brockton, Exhibition Place, Parkdale Village----
                    venue  freq
0          Breakfast Spot  0.09
1                    Café  0.09
2             Coffee Shop  0.09
3           Grocery Store  0.05
4            Intersection  0.05
5                 Stadium  0.05
6               Nightclub  0.05
7                  Office  0.05
8  Furni

                venue  freq
0           Gift Shop  0.15
1             Dog Run  0.08
2        Dessert Shop  0.08
3      Breakfast Spot  0.08
4  Italian Restaurant  0.08
5          Restaurant  0.08
6       Movie Theater  0.08
7                 Bar  0.08
8         Coffee Shop  0.08
9           Bookstore  0.08


----Queen's Park----
                 venue  freq
0          Coffee Shop  0.26
1                  Gym  0.05
2                 Park  0.05
3          Yoga Studio  0.03
4  Fried Chicken Joint  0.03
5     Sushi Restaurant  0.03
6            Nightclub  0.03
7             Beer Bar  0.03
8          Salad Place  0.03
9       Sandwich Place  0.03


----Rosedale----
                       venue  freq
0                       Park  0.50
1                 Playground  0.25
2                      Trail  0.25
3               Neighborhood  0.00
4                     Market  0.00
5   Mediterranean Restaurant  0.00
6                Men's Store  0.00
7         Mexican Restaurant  0.00
8  Middle Easter

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.


In [197]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [198]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['neigh']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Café,Bar,Bakery,Restaurant,Asian Restaurant,Cosmetics Shop,Thai Restaurant,Seafood Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Steakhouse,Cheese Shop,Café,Farmers Market,Beer Bar,Bakery,Seafood Restaurant,Gourmet Shop
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Café,Breakfast Spot,Grocery Store,Bakery,Office,Performing Arts Venue,Pet Store,Nightclub,Climbing Gym
3,Business Reply Mail Processing Centre 969 Eastern,Pizza Place,Auto Workshop,Brewery,Light Rail Station,Farmers Market,Spa,Fast Food Restaurant,Burrito Place,Restaurant,Recording Studio
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport Service,Airport Terminal,Plane,Harbor / Marina,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry,Bar


## Clustering Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.

In [199]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('neigh', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [200]:
toronto_grouped_clustering.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.117647,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Dataframe that includes the cluster as well as the top 10 venues for each neighborhood:

In [201]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto.drop('index',1)

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,Coffee Shop,Café,Park,Bakery,Pub,Mexican Restaurant,Restaurant,Yoga Studio,Beer Store,Hotel
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,0,Coffee Shop,Park,Gym,Yoga Studio,Burrito Place,Fast Food Restaurant,Italian Restaurant,Juice Bar,Seafood Restaurant,Sandwich Place
2,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Bakery,Tea Room,Italian Restaurant,Pizza Place,Bubble Tea Shop
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Restaurant,Café,Hotel,Breakfast Spot,Cocktail Bar,Beer Bar,Cosmetics Shop,Bakery,Italian Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Neighborhood,Health Food Store,Trail,Pub,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


### Visualizing the clusters

In [202]:
# create map of Toronto using latitude and longitude mean values
map_clusters = folium.Map(location=[toronto_merged.Latitude.mean(), toronto_merged.Longitude.mean()], 
                          zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], 
                                  toronto_merged['Longitude'], 
                                  toronto_merged['Neighborhood'], 
                                  toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

### Cluster 1

In [204]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, 
                   toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Harbourfront,Coffee Shop,Café,Park,Bakery,Pub,Mexican Restaurant,Restaurant,Yoga Studio,Beer Store,Hotel
1,Downtown Toronto,Queen's Park,Coffee Shop,Park,Gym,Yoga Studio,Burrito Place,Fast Food Restaurant,Italian Restaurant,Juice Bar,Seafood Restaurant,Sandwich Place
2,Downtown Toronto,"Ryerson, Garden District",Coffee Shop,Clothing Store,Cosmetics Shop,Café,Japanese Restaurant,Bakery,Tea Room,Italian Restaurant,Pizza Place,Bubble Tea Shop
3,Downtown Toronto,St. James Town,Coffee Shop,Restaurant,Café,Hotel,Breakfast Spot,Cocktail Bar,Beer Bar,Cosmetics Shop,Bakery,Italian Restaurant
5,Downtown Toronto,Berczy Park,Coffee Shop,Cocktail Bar,Steakhouse,Cheese Shop,Café,Farmers Market,Beer Bar,Bakery,Seafood Restaurant,Gourmet Shop
6,Downtown Toronto,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Burger Joint,Juice Bar,Japanese Restaurant,Ice Cream Shop,Café,Gym / Fitness Center,Spa
7,Downtown Toronto,Christie,Grocery Store,Café,Park,Candy Store,Athletics & Sports,Italian Restaurant,Gas Station,Baby Store,Diner,Coffee Shop
8,Downtown Toronto,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Café,Bar,Bakery,Restaurant,Asian Restaurant,Cosmetics Shop,Thai Restaurant,Seafood Restaurant
9,West Toronto,"Dovercourt Village, Dufferin",Pharmacy,Bakery,Grocery Store,Art Gallery,Park,Café,Middle Eastern Restaurant,Bar,Bank,Supermarket
10,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",Coffee Shop,Aquarium,Hotel,Café,Italian Restaurant,Restaurant,Brewery,Scenic Lookout,Fried Chicken Joint,Pizza Place


Neighbors with coffe shops, banks and restaurans. Downtown

### Cluster 2

In [205]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, 
                   toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,Roselawn,Garden,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Neighbor with Outdoors activities 

### Cluster 3

In [206]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, 
                   toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Central Toronto,"Moore Park, Summerhill East",Playground,Summer Camp,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Playground

### Cluster 4

In [207]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, 
                   toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,Lawrence Park,Dim Sum Restaurant,Park,Bus Line,Swim School,Yoga Studio,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
21,Central Toronto,"Forest Hill North, Forest Hill West",Park,Jewelry Store,Trail,Sushi Restaurant,Yoga Studio,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
33,Downtown Toronto,Rosedale,Park,Playground,Trail,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


Park and Yoga

### Cluster 5

In [208]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, 
                   toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,The Beaches,Neighborhood,Health Food Store,Trail,Pub,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Pub

## Do the same with all neigbors

List all the venues

In [209]:
neighborhoods_venues = getNearbyVenues(neighborhoods['Neighborhood'], 
                                 neighborhoods['Latitude'], 
                                 neighborhoods['Longitude'], 
                                 radius=500)

Parkwoods
Victoria Village
Harbourfront
Lawrence Heights, Lawrence Manor
Queen's Park
Queen's Park
Rouge, Malvern
Don Mills North
Woodbine Gardens, Parkview Hill
Ryerson, Garden District
Glencairn
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Highland Creek, Rouge Hill, Port Union
Flemingdon Park, Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
CFB Toronto, Downsview East
The Danforth West, Riv

In [210]:
#save the data
neighborhoods_venues.to_csv('neighborhoods_venues.csv')

#### Unique venues

In [211]:
print('There are {} uniques categories.'.format(len(neighborhoods_venues['Venue Category'].unique())))

There are 270 uniques categories.


#### Creation dummies table

In [212]:
# one hot encoding
neighborhoods_onehot = pd.get_dummies(neighborhoods_venues[['Venue Category']], prefix="", prefix_sep="")
cols = list(neighborhoods_onehot.columns)
# add neighborhood column back to dataframe
neighborhoods_onehot['neigh'] = neighborhoods_venues['Neighborhood']
neighborhoods_onehot = neighborhoods_onehot[['neigh'] + cols]

print(neighborhoods_onehot.shape)
neighborhoods_onehot.head()

(2213, 271)


Unnamed: 0,neigh,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Grouped by neighborhood

In [213]:
neighborhoods_grouped = neighborhoods_onehot.groupby('neigh').mean().reset_index()

### DataFrame with top categories

In [214]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = neighborhoods_grouped['neigh']

for ind in np.arange(neighborhoods_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(neighborhoods_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Bar,Café,Bakery,Cosmetics Shop,Restaurant,Asian Restaurant,Thai Restaurant,Sushi Restaurant
1,Agincourt,Lounge,Breakfast Spot,Skating Rink,Latin American Restaurant,Yoga Studio,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Video Store,Sandwich Place,Beer Store,Coffee Shop,Fried Chicken Joint,Liquor Store,Pharmacy,Fast Food Restaurant,Pizza Place
4,"Alderwood, Long Branch",Pizza Place,Skating Rink,Gym,Coffee Shop,Pool,Pub,Sandwich Place,Pharmacy,General Travel,General Entertainment


## Clustering

In [215]:
# set number of clusters
kclusters = 5

neighborhoods_grouped_clustering = neighborhoods_grouped.drop('neigh', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(neighborhoods_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 0, 0, 0, 0, 0, 0, 0])

Mergin clusters

In [216]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
neighborhoods_merged = neighborhoods_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

neighborhoods_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Food & Drink Shop,Park,BBQ Joint,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Pizza Place,Portuguese Restaurant,French Restaurant,Coffee Shop,Hockey Arena,Intersection,Electronics Store,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0.0,Coffee Shop,Bakery,Park,Café,Pub,Restaurant,Mexican Restaurant,Beer Store,Bank,French Restaurant
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,0.0,Furniture / Home Store,Clothing Store,Accessories Store,Arts & Crafts Store,Gift Shop,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Women's Store
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,0.0,Coffee Shop,Park,Gym,Beer Bar,Seafood Restaurant,Burger Joint,Burrito Place,Sandwich Place,Salad Place,Café


### Map

In [222]:
neighborhoods_merged = neighborhoods_merged.dropna()

In [224]:
# create map of Toronto using latitude and longitude mean values
map_clusters = folium.Map(location=[neighborhoods_merged.Latitude.mean(), neighborhoods_merged.Longitude.mean()], 
                          zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_merged['Latitude'], 
                                  neighborhoods_merged['Longitude'], 
                                  neighborhoods_merged['Neighborhood'], 
                                  neighborhoods_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

### Cluster 1

In [230]:
neighborhoods_merged.loc[neighborhoods_merged['Cluster Labels'] == 0, 
                         neighborhoods_merged.columns[[1,2] + list(range(6, neighborhoods_merged.shape[1]))]]\
    .sort_values(by=list(neighborhoods_merged.columns[list(range(6, neighborhoods_merged.shape[1]))]))

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
40,North York,"CFB Toronto, Downsview East",Airport,Snack Place,Park,Other Repair Shop,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop
87,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport Service,Airport Terminal,Harbor / Marina,Bar,Coffee Shop,Rental Car Location,Sculpture Garden,Boutique,Boat or Ferry
51,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",American Restaurant,Motel,Yoga Studio,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Diner
44,Scarborough,"Clairlea, Golden Mile, Oakridge",Bakery,Bus Line,Soccer Field,Bus Station,Ice Cream Shop,Park,Intersection,Metro Station,Donut Shop,Drugstore
49,North York,"Downsview, North Park, Upwood Park",Bakery,Construction & Landscaping,Park,Basketball Court,Yoga Studio,Electronics Store,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
69,West Toronto,"High Park, The Junction South",Bar,Mexican Restaurant,Café,Thai Restaurant,Grocery Store,Gastropub,Fried Chicken Joint,Bakery,Cajun / Creole Restaurant,Diner
37,West Toronto,"Little Portugal, Trinity",Bar,Restaurant,Coffee Shop,Asian Restaurant,Vietnamese Restaurant,Café,Men's Store,Pizza Place,Yoga Studio,Park
12,Scarborough,"Highland Creek, Rouge Hill, Port Union",Bar,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Field
80,Downtown Toronto,"Harbord, University of Toronto",Café,Bookstore,Bar,Japanese Restaurant,Sandwich Place,Restaurant,Bakery,Flower Shop,Italian Restaurant,Beer Bar
54,East Toronto,Studio District,Café,Coffee Shop,Bakery,Italian Restaurant,Gastropub,American Restaurant,Brewery,Yoga Studio,Stationery Store,Fish Market


Coffe shops and airpot

###  Cluster 2

In [231]:
neighborhoods_merged.loc[neighborhoods_merged['Cluster Labels'] == 1, 
                         neighborhoods_merged.columns[[1,2] + list(range(6, neighborhoods_merged.shape[1]))]]\
    .sort_values(by=list(neighborhoods_merged.columns[list(range(6, neighborhoods_merged.shape[1]))]))

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
66,North York,York Mills West,Bank,Convenience Store,Park,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
35,East York,East Toronto,Convenience Store,Coffee Shop,Park,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
0,North York,Parkwoods,Food & Drink Shop,Park,BBQ Joint,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
91,Downtown Toronto,Rosedale,Park,Trail,Playground,Yoga Studio,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
21,York,Caledonia-Fairbanks,Park,Women's Store,Fast Food Restaurant,Market,Greek Restaurant,Gourmet Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant
64,York,Weston,Park,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
52,North York,"Newtonbrook, Willowdale",Piano Bar,Park,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
98,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",River,Park,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


Parks and donut shops are the main features for this cluster

### Cluster 3

In [232]:
neighborhoods_merged.loc[neighborhoods_merged['Cluster Labels'] == 2, 
                         neighborhoods_merged.columns[[1,2] + list(range(6, neighborhoods_merged.shape[1]))]]\
    .sort_values(by=list(neighborhoods_merged.columns[list(range(6, neighborhoods_merged.shape[1]))]))

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough,Scarborough Village,Playground,Dim Sum Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
85,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
83,Central Toronto,"Moore Park, Summerhill East",Summer Camp,Playground,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Yoga Studio


The Playground neighbors

### Cluster 4

In [234]:
neighborhoods_merged.loc[neighborhoods_merged['Cluster Labels'] == 3, 
                         neighborhoods_merged.columns[[1,2] + list(range(6, neighborhoods_merged.shape[1]))]]\
    .sort_values(by=list(neighborhoods_merged.columns[list(range(6, neighborhoods_merged.shape[1]))]))

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York,"Silver Hills, York Mills",Cafeteria,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Colombian Restaurant


### Cluster 5

In [236]:
neighborhoods_merged.loc[neighborhoods_merged['Cluster Labels'] == 4, 
                         neighborhoods_merged.columns[[1,2] + list(range(6, neighborhoods_merged.shape[1]))]]\
    .sort_values(by=list(neighborhoods_merged.columns[list(range(6, neighborhoods_merged.shape[1]))]))

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Central Toronto,Roselawn,Garden,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Field


The two last cluster are indetermined