# Segmenting and Clustering Neighborhoods in Toronto
## Week 3

##### Importing all libraries

In [71]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np 
from bs4 import BeautifulSoup
import urllib.request
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print('all libraries imported')


all libraries imported


##### Scraping the data from Wikipedia

In [72]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, "lxml")
table=soup.find('table', class_='wikitable sortable')
Postcode=[]
Borough=[]
Neighborhood=[]
for row in table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        Postcode.append(cells[0].find(text=True))
        Borough.append(cells[1].find(text=True))
        Neighborhood.append(cells[2].find(text=True))

      
df = pd.DataFrame(Postcode,columns=['Postcode'])
df['Borough']=Borough
df['Neighborhood']=Neighborhood

##### Cleaning up & arranging the data

In [73]:
df = df[df['Borough']!='Not assigned'] 
df.index = np.arange(0, len(df))

In [74]:
df['Neighborhood'] = df.groupby('Postcode')['Neighborhood'].transform(lambda x: "%s" % ','.join(x)).values
df = df.drop_duplicates().reset_index(drop=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned\n


In [75]:
df['Neighborhood'].replace("Not assigned\n", df['Borough'], inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [76]:
df.shape

(103, 3)

##### Adding latitude and longitude information to the dataframe

In [77]:
df1= pd.read_csv('https://cocl.us/Geospatial_data')
df1 = df1.rename(index=str, columns={"Postal Code": "Postcode"})
df=df.merge(df1, how='left', on='Postcode')
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


##### Clustering and visualization

In [78]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes

import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.





Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Folium installed
Libraries imported.




In [79]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [80]:
CLIENT_ID = '1T1BO50DCYM4O1WD0OUENVUYYRI2YAFL1HN5GO5I2CIIQ4KN' # your Foursquare ID
CLIENT_SECRET = 'PORDSZOO0E3DTAMF14SF14NOVCLIB1IGJBVL40LFLTYQIRB0' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1T1BO50DCYM4O1WD0OUENVUYYRI2YAFL1HN5GO5I2CIIQ4KN
CLIENT_SECRET:PORDSZOO0E3DTAMF14SF14NOVCLIB1IGJBVL40LFLTYQIRB0


In [81]:
import requests
latitude = 43.653963
longitude = -79.387207
radius = 500
limit = 15
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    latitude, 
    longitude, 
    VERSION, 
    radius, 
    LIMIT)
results = requests.get(url).json()

In [82]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10.7) # generate map centred around Toronto

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=500,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)
    
map_toronto

In [83]:
def getNearbyVenues(postcode, names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for pc, names, lat, lng in zip(postcode, names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            pc,
            names, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])


    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode',
                  'Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [84]:
toronto_venues = getNearbyVenues(postcode=df['Postcode'], names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )
toronto_venues.head()

Unnamed: 0,Postcode,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,M3A,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,M3A,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,M4A,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,M4A,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [85]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Stadium,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Drugstore,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Other Great Outdoors,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,River,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Soccer Field,Social Club,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [86]:
#examine dataframe size
toronto_onehot.shape

(1338, 238)

In [87]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

In [88]:
toronto_grouped.shape

(99, 238)

In [89]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide
,King
,Richmond
----
              venue  freq
0        Steakhouse  0.10
1       Pizza Place  0.07
2             Hotel  0.07
3       Coffee Shop  0.07
4  Asian Restaurant  0.07


----Agincourt----
            venue  freq
0  Clothing Store  0.25
1          Lounge  0.25
2    Skating Rink  0.25
3  Breakfast Spot  0.25
4     Yoga Studio  0.00


----Agincourt North,L'Amoreaux East
,Milliken,Steeles East
----
            venue  freq
0            Park   0.5
1      Playground   0.5
2           Motel   0.0
3          Market   0.0
4  Massage Studio   0.0


----Albion Gardens
,Beaumond Heights,Humbergate
,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
            venue  freq
0     Pizza Place  0.18
1   Grocery Store  0.18
2     Coffee Shop  0.09
3      Beer Store  0.09
4  Sandwich Place  0.09


----Alderwood,Long Branch----
            venue  freq
0     Pizza Place  0.25
1  Sandwich Place  0.12
2             Gym  0.12
3     Coffee Shop  0.12
4        Pharmacy  0.12



              venue  freq
0    Clothing Store  0.13
1       Coffee Shop  0.10
2  Department Store  0.07
3    Cosmetics Shop  0.03
4         Juice Bar  0.03


----First Canadian Place,Underground city----
           venue  freq
0           Café  0.13
1    Coffee Shop  0.10
2     Restaurant  0.07
3     Steakhouse  0.07
4  Deli / Bodega  0.07


----Flemingdon Park,Don Mills South
----
                venue  freq
0          Beer Store  0.10
1         Coffee Shop  0.10
2                 Gym  0.10
3    Asian Restaurant  0.10
4  Chinese Restaurant  0.05


----Forest Hill North,Forest Hill West
----
              venue  freq
0              Park   0.2
1      Home Service   0.2
2  Sushi Restaurant   0.2
3     Jewelry Store   0.2
4             Trail   0.2


----Glencairn
----
                 venue  freq
0   Italian Restaurant   0.2
1                  Pub   0.2
2  Japanese Restaurant   0.2
3               Bakery   0.2
4     Asian Restaurant   0.2


----Guildwood
,Morningside,West Hill----
       

               venue  freq
0     Sandwich Place  0.12
1        Coffee Shop  0.12
2               Café  0.12
3        Pizza Place  0.08
4  Indian Restaurant  0.04


----The Beaches----
                  venue  freq
0                 Trail   0.2
1  Other Great Outdoors   0.2
2     Health Food Store   0.2
3                   Pub   0.2
4           Yoga Studio   0.0


----The Beaches West
,India Bazaar----
               venue  freq
0  Fish & Chips Shop  0.05
1   Sushi Restaurant  0.05
2          Pet Store  0.05
3        Pizza Place  0.05
4        Coffee Shop  0.05


----The Danforth West
,Riverdale----
                     venue  freq
0         Greek Restaurant  0.27
1       Italian Restaurant  0.07
2           Ice Cream Shop  0.07
3  Fruit & Vegetable Store  0.03
4                Juice Bar  0.03


----The Junction North
,Runnymede
----
           venue  freq
0    Pizza Place  0.25
1  Grocery Store  0.25
2        Brewery  0.25
3       Bus Line  0.25
4    Yoga Studio  0.00


----The Kingswa

In [90]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [101]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide\n,King\n,Richmond\n",Steakhouse,Coffee Shop,Café,Asian Restaurant,Pizza Place,Hotel,Concert Hall,Monument / Landmark,Sushi Restaurant,Lounge
1,Agincourt,Clothing Store,Lounge,Skating Rink,Breakfast Spot,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
2,"Agincourt North,L'Amoreaux East\n,Milliken,Ste...",Playground,Park,Women's Store,Cuban Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
3,"Albion Gardens\n,Beaumond Heights,Humbergate\n...",Grocery Store,Pizza Place,Coffee Shop,Video Store,Beer Store,Pharmacy,Fried Chicken Joint,Fast Food Restaurant,Sandwich Place,Dim Sum Restaurant
4,"Alderwood,Long Branch",Pizza Place,Gym,Skating Rink,Coffee Shop,Pub,Pharmacy,Sandwich Place,Women's Store,Dessert Shop,Dance Studio
5,"Bathurst Manor,Downsview North\n,Wilson Heights",Coffee Shop,Gift Shop,Pharmacy,Shopping Mall,Diner,Restaurant,Deli / Bodega,Bridal Shop,Fast Food Restaurant,Bank
6,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Women's Store,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
7,"Bedford Park,Lawrence Manor East\n",Italian Restaurant,Coffee Shop,Liquor Store,Pharmacy,Pub,Fast Food Restaurant,Sushi Restaurant,Sandwich Place,Butcher,Café
8,Berczy Park,Café,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Farmers Market,Tea Room,Liquor Store,Bistro,Jazz Club
9,"Birch Cliff,Cliffside West\n",Skating Rink,College Stadium,General Entertainment,Café,Women's Store,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run


In [152]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 3, 4, 4, 2, 0, 2, 2, 0])

In [153]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters_merged.head() # check the last columns!

ValueError: cannot insert Cluster Labels, already exists

In [154]:

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)



# add markers to the map
markers_colors = []
for lat, lon, neigh, pc, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Postcode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(neigh) + '(' + str(pc) + '): Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(map_clusters)
map_clusters

In [155]:
dfa=toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(0, toronto_merged.shape[1]))]]
dfa

Unnamed: 0,Borough,Postcode,Borough.1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,North York,M3B,North York,Don Mills North\n,43.745906,-79.352188,0.0,Gym / Fitness Center,Basketball Court,Japanese Restaurant,Café,Baseball Field,Caribbean Restaurant,Drugstore,Eastern European Restaurant,Department Store,Electronics Store
8,East York,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,0.0,Pizza Place,Fast Food Restaurant,Gym / Fitness Center,Bank,Café,Athletics & Sports,Gastropub,Intersection,Pharmacy,Breakfast Spot
10,North York,M6B,North York,Glencairn\n,43.709577,-79.445073,0.0,Japanese Restaurant,Italian Restaurant,Asian Restaurant,Pub,Bakery,Dessert Shop,Dim Sum Restaurant,Diner,Women's Store,Deli / Bodega
11,Etobicoke,M9B,Etobicoke,"Cloverdale\n,Islington,Martin Grove\n,Princess...",43.650943,-79.554724,0.0,Golf Course,Bank,Women's Store,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
12,Scarborough,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0.0,Golf Course,Bar,Women's Store,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
16,York,M6C,York,Humewood-Cedarvale,43.693781,-79.428191,0.0,Field,Dog Run,Trail,Hockey Arena,Women's Store,Curling Ice,Electronics Store,Eastern European Restaurant,Drugstore,Discount Store
18,Scarborough,M1E,Scarborough,"Guildwood\n,Morningside,West Hill",43.763573,-79.188711,0.0,Electronics Store,Rental Car Location,Pizza Place,Breakfast Spot,Medical Center,Intersection,Mexican Restaurant,Women's Store,Drugstore,Dog Run
19,East Toronto,M4E,East Toronto,The Beaches,43.676357,-79.293031,0.0,Other Great Outdoors,Trail,Pub,Health Food Store,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant
26,Scarborough,M1H,Scarborough,Cedarbrae\n,43.773136,-79.239476,0.0,Fried Chicken Joint,Bank,Athletics & Sports,Bakery,Thai Restaurant,Caribbean Restaurant,Hakka Restaurant,Drugstore,Dog Run,Discount Store
27,North York,M2H,North York,Hillcrest Village,43.803762,-79.363452,0.0,Golf Course,Dog Run,Fast Food Restaurant,Mediterranean Restaurant,Pool,Women's Store,Curling Ice,Eastern European Restaurant,Drugstore,Discount Store


In [156]:
dfb=toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(0, toronto_merged.shape[1]))]]
dfb

Unnamed: 0,Borough,Postcode,Borough.1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,1.0,Fast Food Restaurant,Women's Store,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner


In [157]:
dfc=toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(0, toronto_merged.shape[1]))]]
dfc

Unnamed: 0,Borough,Postcode,Borough.1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Coffee Shop,Portuguese Restaurant,Intersection,Hockey Arena,French Restaurant,Dog Run,Discount Store,Diner,Curling Ice,Dessert Shop
2,Downtown Toronto,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636,2.0,Coffee Shop,Bakery,Park,Gym / Fitness Center,Breakfast Spot,Mexican Restaurant,Spa,Café,Chocolate Shop,Dessert Shop
3,North York,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763,2.0,Furniture / Home Store,Clothing Store,Women's Store,Miscellaneous Shop,Boutique,Event Space,Athletics & Sports,Arts & Crafts Store,Coffee Shop,Vietnamese Restaurant
4,Queen's Park,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,2.0,Coffee Shop,Park,Gym,Diner,Burrito Place,Italian Restaurant,Japanese Restaurant,Smoothie Shop,Seafood Restaurant,Burger Joint
9,Downtown Toronto,M5B,Downtown Toronto,"Ryerson\n,Garden District\n",43.657162,-79.378937,2.0,Café,Clothing Store,Shopping Mall,Ramen Restaurant,Fast Food Restaurant,Spa,Beer Bar,Sandwich Place,Japanese Restaurant,Diner
13,North York,M3C,North York,"Flemingdon Park,Don Mills South\n",43.7259,-79.340923,2.0,Coffee Shop,Asian Restaurant,Gym,Beer Store,Clothing Store,Chinese Restaurant,Dim Sum Restaurant,Restaurant,Discount Store,Sandwich Place
14,East York,M4C,East York,Woodbine Heights,43.695344,-79.318389,2.0,Skating Rink,Park,Pharmacy,Beer Store,Spa,Curling Ice,Cosmetics Shop,Athletics & Sports,Video Store,Comfort Food Restaurant
15,Downtown Toronto,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2.0,Coffee Shop,Gastropub,Restaurant,Japanese Restaurant,Italian Restaurant,Hotel,Church,Cosmetics Shop,Diner,Speakeasy
17,Etobicoke,M9C,Etobicoke,"Bloordale Gardens\n,Eringate\n,Markland Wood,O...",43.643515,-79.577201,2.0,Park,Coffee Shop,Beer Store,Pharmacy,Pizza Place,Liquor Store,Café,Convenience Store,Dim Sum Restaurant,Department Store
20,Downtown Toronto,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2.0,Café,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Farmers Market,Tea Room,Liquor Store,Bistro,Jazz Club


In [158]:
dfd=toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(0, toronto_merged.shape[1]))]]
dfd

Unnamed: 0,Borough,Postcode,Borough.1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,M3A,North York,Parkwoods,43.753259,-79.329656,3.0,Park,Fast Food Restaurant,Food & Drink Shop,Women's Store,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
21,York,M6E,York,Caledonia-Fairbanks\n,43.689026,-79.453512,3.0,Park,Pharmacy,Fast Food Restaurant,Market,Women's Store,Airport Gate,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant
40,North York,M3K,North York,"CFB Toronto,Downsview East\n",43.737473,-79.464763,3.0,Park,Airport,Curling Ice,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
46,North York,M3L,North York,Downsview West,43.739015,-79.506944,3.0,Park,Grocery Store,Shopping Mall,Bank,Curling Ice,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
61,Central Toronto,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3.0,Park,Swim School,Bus Line,Coffee Shop,College Arts Building,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
64,York,M9N,York,Weston,43.706876,-79.518188,3.0,Park,Convenience Store,Dance Studio,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
66,North York,M2P,North York,York Mills West\n,43.752758,-79.400049,3.0,Park,Bank,Convenience Store,Dance Studio,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
85,Scarborough,M1V,Scarborough,"Agincourt North,L'Amoreaux East\n,Milliken,Ste...",43.815252,-79.284577,3.0,Playground,Park,Women's Store,Cuban Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
91,Downtown Toronto,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,3.0,Park,Playground,Trail,Building,Curling Ice,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
98,Etobicoke,M8X,Etobicoke,"The Kingsway,Montgomery Road\n,Old Mill North\n",43.653654,-79.506944,3.0,Park,Pool,River,Cuban Restaurant,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner,Dim Sum Restaurant


In [159]:
dff=toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(0, toronto_merged.shape[1]))]]
dff

Unnamed: 0,Borough,Postcode,Borough.1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,4.0,Pizza Place,Playground,Convenience Store,Women's Store,Curling Ice,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
35,East York,M4J,East York,East Toronto,43.685347,-79.338106,4.0,Park,Coffee Shop,Pizza Place,Convenience Store,Women's Store,Dim Sum Restaurant,Deli / Bodega,Department Store,Dessert Shop,Discount Store
50,North York,M9L,North York,Humber Summit,43.756303,-79.565963,4.0,Empanada Restaurant,Pizza Place,Women's Store,Curling Ice,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
63,York,M6N,York,"The Junction North\n,Runnymede\n",43.673185,-79.487262,4.0,Grocery Store,Bus Line,Pizza Place,Brewery,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
70,Etobicoke,M9P,Etobicoke,Westmount\n,43.696319,-79.532242,4.0,Pizza Place,Coffee Shop,Chinese Restaurant,Sandwich Place,Middle Eastern Restaurant,Curling Ice,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
72,North York,M2R,North York,Willowdale West,43.782736,-79.442259,4.0,Coffee Shop,Pharmacy,Butcher,Pizza Place,Discount Store,Grocery Store,Eastern European Restaurant,Drugstore,Dog Run,Cuban Restaurant
77,Etobicoke,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens\n,Richv...",43.688905,-79.554724,4.0,Park,Bus Line,Pizza Place,Mobile Phone Shop,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner,Dim Sum Restaurant
89,Etobicoke,M9V,Etobicoke,"Albion Gardens\n,Beaumond Heights,Humbergate\n...",43.739416,-79.588437,4.0,Grocery Store,Pizza Place,Coffee Shop,Video Store,Beer Store,Pharmacy,Fried Chicken Joint,Fast Food Restaurant,Sandwich Place,Dim Sum Restaurant
93,Etobicoke,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,4.0,Pizza Place,Gym,Skating Rink,Coffee Shop,Pub,Pharmacy,Sandwich Place,Women's Store,Dessert Shop,Dance Studio
