# Clustering and Segmentation of Neighborhoods: Part 3 #

This section involves creating a cluster map of a selected Borough (i.e., West Toronto) and the subsequent neighborhoods within the Borough. A new cluster map will then be created to give insight into the venues within each neighborhood for the selected Borough using Foursquare location data.

In [12]:
#Import the required libraries
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
from IPython.display import Image 
from IPython.core.display import HTML
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 
import requests 
from pandas.io.json import json_normalize 

print('Libraries Imported!')

Libraries Imported!


#### Attain the Latitude and Longitude of Toronto ####

In [13]:
#Attain Latitude and Longitude of Toronto
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The latitude and longitide of Toronto is {}, {}, respectively.'.format(latitude, longitude))

The latitude and longitide of Toronto is 43.653963, -79.387207, respectively.


#### Create a map of Toronto ####

In [14]:
# Create a map 
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 12)

#the addition of neighborhood markers to the Toronto map
for lat, long, bor, neigh in zip(geo_df['Latitude'], geo_df['Longitude'], 
                                 geo_df['Borough'], geo_df['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'green',
        fill = True,
        fill_color = 'white',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto)
        
map_toronto

#### Select a borough for further analysis ####

In [15]:
geo_df['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Queen's Park         1
Name: Borough, dtype: int64

West Toronto attains the median value for geo_df dataframe and hence will be studied further for analysis. 

#### Neighborhoods in West Toronto ####

In [17]:
#Select for Neighborhoods in "West Toronto" only
geo_df = geo_df[geo_df['Borough'] =="West Toronto"].reset_index(drop=True)
geo_df.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
2,M6K,West Toronto,"Exhibition Place, Parkdale Village, Brockton",43.636847,-79.428191
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763
4,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325


#### Create a map of West Toronto and subsequent Neighborhoods ####

In [18]:
# Coordinates for West Toronto
address_WT = 'West Toronto, Toronto'
geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
WT_lat = location.latitude
WT_long = location.longitude
print('The geograpical coordinates of West Toronto are {}, {}.'.format(WT_lat, WT_long))

The geograpical coordinates of West Toronto are 43.653963, -79.387207.


In [20]:
map_WT = folium.Map(location=[WT_lat, WT_long], zoom_start=11)

# add markers to map
for lat, lng, label in zip(geo_df['Latitude'], geo_df['Longitude'], geo_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_WT)  
    
map_WT

#### Connect to Foursqaure and Explore ####

In [1]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # API version

In [24]:
#Explore the first neighborhoods
geo_df.loc[0, 'Neighborhood']

'Dovercourt Village, Dufferin'

In [25]:
DCV_lat = geo_df.loc[0, 'Latitude'] 
DCV_long = geo_df.loc[0, 'Longitude'] 

neighborhood_name = geo_df.loc[0, 'Neighborhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               DCV_lat, 
                                                               DCV_long))

Latitude and longitude values of Dovercourt Village, Dufferin are 43.66900510000001, -79.4422593.


#### List top 100 venues within a 500 metre radius of Dovercourt Village and Dufferin ####

In [26]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    DCV_lat, 
    DCV_long, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=QPENQ4U03JYRKTBKH3FSCP5W5EK3DWSAP0ZBV3NK4HRZQACO&client_secret=EKZUCEE3YG5FM5WWSYMTXCMEOMVTQHJMMSUMEMFPL3DY4LMK&v=20180605&ll=43.66900510000001,-79.4422593&radius=500&limit=100'

In [28]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c874df44c1f6763521fe030'},
 'response': {'headerLocation': 'Davenport',
  'headerFullLocation': 'Davenport, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 20,
  'suggestedBounds': {'ne': {'lat': 43.67350510450001,
    'lng': -79.43604977526607},
   'sw': {'lat': 43.664505095500004, 'lng': -79.44846882473394}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5753753b498eeb535c53aed5',
       'name': 'The Greater Good Bar',
       'location': {'address': '229 Geary St',
        'crossStreet': 'at Dufferin St',
        'lat': 43.669409,
        'lng': -79.439267,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.669409,
          'lng': -79.439267}],
        'distance': 245,
        'postalC

In [29]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [32]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Greater Good Bar,Bar,43.669409,-79.439267
1,Parallel,Middle Eastern Restaurant,43.669516,-79.438728
2,Happy Bakery & Pastries,Bakery,43.66705,-79.441791
3,FreshCo,Supermarket,43.667918,-79.440754
4,Blood Brothers Brewing,Brewery,43.669944,-79.436533


In [33]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

20 venues were returned by Foursquare.


In [34]:
print('There are {} uniques categories.'.format(len(nearby_venues['categories'].unique())))

There are 16 uniques categories.


In [35]:
print(nearby_venues['categories'].unique())

['Bar' 'Middle Eastern Restaurant' 'Bakery' 'Supermarket' 'Brewery'
 'Gym / Fitness Center' 'Music Venue' 'Café' 'Bank' 'Discount Store'
 'Pharmacy' 'Liquor Store' 'Park' 'Athletics & Sports' 'Pool'
 'Portuguese Restaurant']


In [37]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                lat, 
                lng, 
                VERSION, 
                radius, 
                LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)
    

In [41]:
WT_venues = getNearbyVenues(names=geo_df['Neighborhood'],
                                   latitudes=geo_df['Latitude'],
                                   longitudes=geo_df['Longitude']
                                  )

Dovercourt Village, Dufferin
Little Portugal, Trinity
Exhibition Place, Parkdale Village, Brockton
High Park, The Junction South
Parkdale, Roncesvalles
Swansea, Runnymede


#### Shape of dataframe ####

In [42]:
print(WT_venues.shape)
WT_venues.head()

(180, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Dovercourt Village, Dufferin",43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
1,"Dovercourt Village, Dufferin",43.669005,-79.442259,Parallel,43.669516,-79.438728,Middle Eastern Restaurant
2,"Dovercourt Village, Dufferin",43.669005,-79.442259,Happy Bakery & Pastries,43.66705,-79.441791,Bakery
3,"Dovercourt Village, Dufferin",43.669005,-79.442259,FreshCo,43.667918,-79.440754,Supermarket
4,"Dovercourt Village, Dufferin",43.669005,-79.442259,Blood Brothers Brewing,43.669944,-79.436533,Brewery


#### Create a count to determine the neighborhood with the most venues ####

In [43]:
WT_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Dovercourt Village, Dufferin",20,20,20,20,20,20
"Exhibition Place, Parkdale Village, Brockton",23,23,23,23,23,23
"High Park, The Junction South",24,24,24,24,24,24
"Little Portugal, Trinity",60,60,60,60,60,60
"Parkdale, Roncesvalles",16,16,16,16,16,16
"Swansea, Runnymede",37,37,37,37,37,37


Little Portugal and Trinity have the most venues within west Toronto. 

In [45]:
#How many unique categories are there within West Toronto
print('There are {} uniques categories.'.format(len(WT_venues['Venue Category'].unique())))

There are 90 uniques categories.


#### Analyze category data about each neighborhood ####

In [46]:
# one hot encoding
WT_onehot = pd.get_dummies(WT_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
WT_onehot['Neighborhood'] = WT_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [WT_onehot.columns[-1]] + list(WT_onehot.columns[:-1])
WT_onehot = WT_onehot[fixed_columns]

WT_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Bistro,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Ice Cream Shop,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pool,Portuguese Restaurant,Post Office,Pub,Record Shop,Restaurant,Salon / Barbershop,Sandwich Place,Smoothie Shop,Southern / Soul Food Restaurant,Speakeasy,Stadium,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Dovercourt Village, Dufferin",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
4,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [47]:
WT_onehot.shape

(180, 91)

In [49]:
# Group category frequency via neighborhood
WT_grouped = WT_onehot.groupby('Neighborhood').mean().reset_index()
WT_grouped.head(6)

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Bistro,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Ice Cream Shop,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pool,Portuguese Restaurant,Post Office,Pub,Record Shop,Restaurant,Salon / Barbershop,Sandwich Place,Smoothie Shop,Southern / Soul Food Restaurant,Speakeasy,Stadium,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Dovercourt Village, Dufferin",0.0,0.0,0.0,0.0,0.0,0.05,0.1,0.05,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.1,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Exhibition Place, Parkdale Village, Brockton",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.086957,0.0,0.0,0.043478,0.086957,0.0,0.043478,0.043478,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.043478,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"High Park, The Junction South",0.0,0.041667,0.0,0.041667,0.0,0.0,0.041667,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.083333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.041667,0.041667,0.041667,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0
3,"Little Portugal, Trinity",0.016667,0.0,0.016667,0.0,0.05,0.0,0.033333,0.0,0.133333,0.016667,0.0,0.033333,0.0,0.016667,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.05,0.0,0.016667,0.016667,0.0,0.016667,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.016667,0.0,0.016667,0.0,0.0,0.016667,0.016667,0.0,0.016667,0.016667,0.016667,0.0,0.0,0.016667,0.016667,0.033333,0.0,0.0,0.016667,0.0,0.016667,0.016667,0.0,0.016667,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.016667,0.016667,0.033333,0.016667,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.016667,0.0,0.033333,0.016667,0.016667
4,"Parkdale, Roncesvalles",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0625,0.0,0.125,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Swansea, Runnymede",0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027,0.081081,0.0,0.0,0.0,0.0,0.081081,0.0,0.0,0.0,0.027027,0.027027,0.0,0.0,0.0,0.027027,0.0,0.027027,0.027027,0.0,0.027027,0.027027,0.0,0.0,0.027027,0.0,0.027027,0.0,0.0,0.027027,0.0,0.0,0.027027,0.054054,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.081081,0.0,0.0,0.027027,0.027027,0.0,0.027027,0.0,0.027027,0.027027,0.0,0.0,0.0,0.0,0.054054,0.0,0.027027,0.0,0.0,0.027027,0.0,0.0,0.0


#### Display top venues in each Neighborhood ####

In [50]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [51]:
#Pick number of venues
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# construct a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = WT_grouped['Neighborhood']

for ind in np.arange(WT_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(WT_grouped.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Dovercourt Village, Dufferin",Discount Store,Pharmacy,Bakery,Supermarket,Brewery,Café,Middle Eastern Restaurant,Park,Pool,Portuguese Restaurant
1,"Exhibition Place, Parkdale Village, Brockton",Coffee Shop,Performing Arts Venue,Café,Breakfast Spot,Gym,Restaurant,Furniture / Home Store,Grocery Store,Gym / Fitness Center,Italian Restaurant
2,"High Park, The Junction South",Mexican Restaurant,Grocery Store,Café,Gastropub,Bookstore,Diner,Park,Cajun / Creole Restaurant,Fast Food Restaurant,Italian Restaurant
3,"Little Portugal, Trinity",Bar,Asian Restaurant,Coffee Shop,Men's Store,Boutique,Pizza Place,Restaurant,Cocktail Bar,Café,Bakery
4,"Parkdale, Roncesvalles",Breakfast Spot,Gift Shop,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Restaurant,Burger Joint,Dessert Shop,Coffee Shop
5,"Swansea, Runnymede",Pizza Place,Café,Coffee Shop,Sushi Restaurant,Italian Restaurant,Fish & Chips Shop,Falafel Restaurant,Indie Movie Theater,Fish Market,Post Office


# k-means to cluster the neighborhoods into 5 clusters #

In [64]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# Set the number of clusters to 4
n = 4

# Drop the categorical data from the WT_grouped dataframe
WT_grouped_clustering = WT_grouped.drop("Neighborhood", axis = 1)

# Run the k-means clustering algorithm on the clustering dataframe
kmeans = KMeans(n_clusters = n, random_state = 0).fit(WT_grouped_clustering)

# Display the cluster labels
kmeans.labels_

array([3, 1, 2, 1, 0, 1], dtype=int32)

In [65]:
WT_merge = geo_df

# Create a column with the cluster label from the k-means fitting
WT_merge['Cluster Labels'] = kmeans.labels_

# Add the common venue data from Foursquare
WT_merge = WT_merge.join(neighborhoods_venues_sorted.set_index("Neighborhood"), on = "Neighborhood")

WT_merge.head(6)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,3,Discount Store,Pharmacy,Bakery,Supermarket,Brewery,Café,Middle Eastern Restaurant,Park,Pool,Portuguese Restaurant
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,1,Bar,Asian Restaurant,Coffee Shop,Men's Store,Boutique,Pizza Place,Restaurant,Cocktail Bar,Café,Bakery
2,M6K,West Toronto,"Exhibition Place, Parkdale Village, Brockton",43.636847,-79.428191,2,Coffee Shop,Performing Arts Venue,Café,Breakfast Spot,Gym,Restaurant,Furniture / Home Store,Grocery Store,Gym / Fitness Center,Italian Restaurant
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763,1,Mexican Restaurant,Grocery Store,Café,Gastropub,Bookstore,Diner,Park,Cajun / Creole Restaurant,Fast Food Restaurant,Italian Restaurant
4,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,0,Breakfast Spot,Gift Shop,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Restaurant,Burger Joint,Dessert Shop,Coffee Shop
5,M6S,West Toronto,"Swansea, Runnymede",43.651571,-79.48445,1,Pizza Place,Café,Coffee Shop,Sushi Restaurant,Italian Restaurant,Fish & Chips Shop,Falafel Restaurant,Indie Movie Theater,Fish Market,Post Office


#### Map clusters #####

In [66]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
WT_clusters = folium.Map(location = [WT_lat, WT_long], zoom_start=10)

# Set the cluster colors
x = np.arange(n)
ys = [i+x+(i*x)**2 for i in range(n)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(WT_merge['Latitude'], WT_merge['Longitude'], WT_merge['Neighborhood'], WT_merge['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(WT_clusters)
       
WT_clusters

# Examine Clusters #

Cluster 1

In [67]:
WT_merge.loc[WT_merge['Cluster Labels'] == 0, WT_merge.columns[[2] + list(range(5, WT_merge.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Parkdale, Roncesvalles",0,Breakfast Spot,Gift Shop,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Restaurant,Burger Joint,Dessert Shop,Coffee Shop


Cluster 2

In [68]:
WT_merge.loc[WT_merge['Cluster Labels'] == 1, WT_merge.columns[[2] + list(range(5, WT_merge.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Little Portugal, Trinity",1,Bar,Asian Restaurant,Coffee Shop,Men's Store,Boutique,Pizza Place,Restaurant,Cocktail Bar,Café,Bakery
3,"High Park, The Junction South",1,Mexican Restaurant,Grocery Store,Café,Gastropub,Bookstore,Diner,Park,Cajun / Creole Restaurant,Fast Food Restaurant,Italian Restaurant
5,"Swansea, Runnymede",1,Pizza Place,Café,Coffee Shop,Sushi Restaurant,Italian Restaurant,Fish & Chips Shop,Falafel Restaurant,Indie Movie Theater,Fish Market,Post Office


Cluster 3

In [69]:
WT_merge.loc[WT_merge['Cluster Labels'] == 2, WT_merge.columns[[2] + list(range(5, WT_merge.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Exhibition Place, Parkdale Village, Brockton",2,Coffee Shop,Performing Arts Venue,Café,Breakfast Spot,Gym,Restaurant,Furniture / Home Store,Grocery Store,Gym / Fitness Center,Italian Restaurant


Cluster 4

In [70]:
WT_merge.loc[WT_merge['Cluster Labels'] == 3, WT_merge.columns[[2] + list(range(5, WT_merge.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Dovercourt Village, Dufferin",3,Discount Store,Pharmacy,Bakery,Supermarket,Brewery,Café,Middle Eastern Restaurant,Park,Pool,Portuguese Restaurant
