# Capstone Assignment - Segmenting and Clustering Neigborhoods in Toronto

### Module 1- Scrape data from wiki webpage and transform into a pandas dataframe

In [152]:
import pandas as pd
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Module 1 - Remove rows where Borough is not assigned

In [153]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


### Module 1 - Group neighborhoods by postal code

In [154]:
df_grouped=df.groupby("Postcode").agg(lambda x:','.join(set(x)))
df_grouped.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"West Hill,Guildwood,Morningside"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Module 1 - Update Neigborhood and Borough name to be the same if a borough is not assigned

In [155]:
df_grouped.loc[df_grouped['Neighborhood']=="Not assigned",'Neighborhood']=df_grouped.loc[df_grouped['Neighborhood']=="Not assigned",'Borough']
df_grouped.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"West Hill,Guildwood,Morningside"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Module 1 - Print number of rows in the dataframe

In [156]:
df_grouped.shape

(103, 2)

### Module 2 - Load neigborhood geographical coordinates data

In [159]:
file_name='https://cocl.us/Geospatial_data'
df1=pd.read_csv(file_name)
df1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


###  Module 2 - Merge dataframes so that the resultant dataframe contains PostCode, Borough, Neighborhhod, Latitude and Longitude columns

In [160]:
df1.rename(columns = {'Postal Code':'Postcode'}, inplace = True) 
result=pd.merge(df_grouped,df1[['Postcode','Latitude','Longitude']],on='Postcode')
result.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Guildwood,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Module 3 - Installing and importing dependencies

In [161]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
import requests
from pandas.io.json import json_normalize
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### Module 3 - Use geopy library to get the latitude and longitude values of Toronto

In [162]:
address ='Toronto, ON'
geolocator=Nominatim(user_agent="toronto_explorer")
location= geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('The geographical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Toronto are 43.653963, -79.387207.


###  Module 3- Create a map of Toronto with neighborhoods superimposed on top.

In [163]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(result['Latitude'], result['Longitude'], result['Borough'], result['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
map_toronto

### Module 3 -  From the Toronto dataframe filter to and create a  dataframe of Etobicoke which is a Borough within Toronto

In [164]:
etobicoke_data=result[result['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M8V,Etobicoke,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484
2,M8X,Etobicoke,"Old Mill North,The Kingsway,Montgomery Road",43.653654,-79.506944
3,M8Y,Etobicoke,"The Queensway East,Royal York South East,Kings...",43.636258,-79.498509
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,Royal York ...",43.628841,-79.520999


### Module 3 - Use geopy library to get the latitude and longitude values of Etobicoke which is a Borough within Toronto

In [165]:
address ='Etobicoke,ON'
geolocator=Nominatim(user_agent="toronto_explorer")
location= geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('The geograpical coordinates of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Etobicoke are 43.67145915, -79.5524920661167.


###  Module 3- Create a map of Etobicoke with neighborhoods superimposed on top.

In [166]:
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], etobicoke_data['Borough'], etobicoke_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)

map_etobicoke

### Module 3 - Define Foursquare Credentials and Version

In [167]:
CLIENT_ID = 'SINHNMALADY235YVGA53UZS5RDCMYEGZB330V2CN5RNTL1IA'
CLIENT_SECRET = 'YAX4F5HJ11XAPIJHWNU0ML3XURXAAGNZNJL1ZMSXZIVES22E'
VERSION = '20191227' 
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SINHNMALADY235YVGA53UZS5RDCMYEGZB330V2CN5RNTL1IA
CLIENT_SECRET:YAX4F5HJ11XAPIJHWNU0ML3XURXAAGNZNJL1ZMSXZIVES22E


### Module 3 - Explore first neighborhood in the Etobicoke dataframe.

In [168]:
etobicoke_data.loc[0, 'Neighborhood']
neighborhood_latitude = etobicoke_data.loc[0, 'Latitude']
neighborhood_longitude = etobicoke_data.loc[0, 'Longitude']
neighborhood_name = etobicoke_data.loc[0, 'Neighborhood']
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, neighborhood_latitude,neighborhood_longitude))

Latitude and longitude values of New Toronto,Mimico South,Humber Bay Shores are 43.6056466, -79.50132070000001.


### Module 3 - Using the GET request url, get the top 100 venues that are in New Toronto,Mimico South,Humber Bay Shores within a radius of 500 meters

In [169]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        neighborhood_latitude, 
        neighborhood_longitude, 
        radius, 
        LIMIT)
url
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e06df539388d7001b364fb9'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 15,
  'suggestedBounds': {'ne': {'lat': 43.6101466045, 'lng': -79.49511771930959},
   'sw': {'lat': 43.6011465955, 'lng': -79.50752368069043}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b119977f964a520488023e3',
       'name': 'LCBO',
       'location': {'address': '2762 Lake Shore Blvd W',
        'crossStreet': 'btwn 1st & 2nd St',
        'lat': 43.60228082768786,
        'lng': -79.4993016827402,
        'labeledLatLngs': [{'label': 'display',
          'lat':

### Module 3 - Create a function that extracts the category of the venue

In [170]:
def get_category_type(row):
    try:
            categories_list = row['categories']
    except:
            categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Module 3 - Transform json into DataFrame

In [171]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,LCBO,Liquor Store,43.602281,-79.499302
1,Domino's Pizza,Pizza Place,43.601676,-79.500908
2,New Toronto Fish & Chips,Restaurant,43.601849,-79.503281
3,Delicia Bakery & Pastry,Bakery,43.601403,-79.503012
4,Lucky Dice Restaurant,Café,43.601392,-79.503056


In [172]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

15 venues were returned by Foursquare.


### Module - 3 function to repeat the same process to all the neighborhoods in Etobicoke

In [173]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [174]:
etobicoke_venues = getNearbyVenues(names=etobicoke_data['Neighborhood'],
                                   latitudes=etobicoke_data['Latitude'],
                                   longitudes=etobicoke_data['Longitude']
                                  )

New Toronto,Mimico South,Humber Bay Shores
Alderwood,Long Branch
Old Mill North,The Kingsway,Montgomery Road
The Queensway East,Royal York South East,Kingsway Park South East,Sunnylea,King's Mill Park,Mimico NE,Old Mill South,Humber Bay
Kingsway Park South West,Mimico NW,Royal York South West,The Queensway West,South of Bloor
Cloverdale,Islington,Princess Gardens,West Deane Park,Martin Grove
Markland Wood,Bloordale Gardens,Eringate,Old Burnhamthorpe
Westmount
Martin Grove Gardens,Richview Gardens,St. Phillips,Kingsview Village
Silverstone,Mount Olive,Humbergate,Beaumond Heights,Jamestown,Thistletown,South Steeles,Albion Gardens
Northwest


In [175]:
print(etobicoke_venues.shape)
etobicoke_venues.head()

(74, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
1,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,Domino's Pizza,43.601676,-79.500908,Pizza Place
2,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
3,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,Delicia Bakery & Pastry,43.601403,-79.503012,Bakery
4,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,Lucky Dice Restaurant,43.601392,-79.503056,Café


In [138]:
etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood,Long Branch",9,9,9,9,9,9
"Kingsway Park South West,Mimico NW,Royal York South West,The Queensway West,South of Bloor",13,13,13,13,13,13
"Markland Wood,Bloordale Gardens,Eringate,Old Burnhamthorpe",9,9,9,9,9,9
"Martin Grove Gardens,Richview Gardens,St. Phillips,Kingsview Village",4,4,4,4,4,4
"New Toronto,Mimico South,Humber Bay Shores",15,15,15,15,15,15
Northwest,2,2,2,2,2,2
"Old Mill North,The Kingsway,Montgomery Road",4,4,4,4,4,4
"Silverstone,Mount Olive,Humbergate,Beaumond Heights,Jamestown,Thistletown,South Steeles,Albion Gardens",9,9,9,9,9,9
"The Queensway East,Royal York South East,Kingsway Park South East,Sunnylea,King's Mill Park,Mimico NE,Old Mill South,Humber Bay",2,2,2,2,2,2
Westmount,7,7,7,7,7,7


In [177]:
print('There are {} unique categories.'.format(len(etobicoke_venues['Venue Category'].unique())))

There are 40 unique categories.


In [178]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
etobicoke_onehot['Neighborhood'] = etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_onehot.head()

Unnamed: 0,Neighborhood,Athletics & Sports,Bakery,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Business Service,Café,...,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Smoke Shop,Supplement Shop,Wings Joint
0,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,"New Toronto,Mimico South,Humber Bay Shores",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [179]:
etobicoke_onehot.shape

(74, 41)

In [142]:
etobicoke_grouped = etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
etobicoke_grouped

Unnamed: 0,Neighborhood,Athletics & Sports,Bakery,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Business Service,Café,...,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Smoke Shop,Supplement Shop,Wings Joint
0,"Alderwood,Long Branch",0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0
1,"Kingsway Park South West,Mimico NW,Royal York ...",0.0,0.076923,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.076923
2,"Markland Wood,Bloordale Gardens,Eringate,Old B...",0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Martin Grove Gardens,Richview Gardens,St. Phil...",0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"New Toronto,Mimico South,Humber Bay Shores",0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.133333,...,0.0,0.0,0.066667,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0
5,Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Old Mill North,The Kingsway,Montgomery Road",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0
7,"Silverstone,Mount Olive,Humbergate,Beaumond He...",0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0
8,"The Queensway East,Royal York South East,Kings...",0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Westmount,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0


In [143]:
etobicoke_grouped.shape

(10, 41)

In [180]:
num_top_venues = 5

for hood in etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood,Long Branch----
                venue  freq
0         Pizza Place  0.22
1  Athletics & Sports  0.11
2      Sandwich Place  0.11
3                 Gym  0.11
4            Pharmacy  0.11


----Kingsway Park South West,Mimico NW,Royal York South West,The Queensway West,South of Bloor----
               venue  freq
0     Hardware Store  0.08
1  Convenience Store  0.08
2    Supplement Shop  0.08
3     Sandwich Place  0.08
4             Bakery  0.08


----Markland Wood,Bloordale Gardens,Eringate,Old Burnhamthorpe----
         venue  freq
0   Beer Store  0.11
1         Park  0.11
2    Pet Store  0.11
3     Pharmacy  0.11
4  Pizza Place  0.11


----Martin Grove Gardens,Richview Gardens,St. Phillips,Kingsview Village----
                venue  freq
0   Mobile Phone Shop  0.25
1                Park  0.25
2            Bus Line  0.25
3         Pizza Place  0.25
4  Athletics & Sports  0.00


----New Toronto,Mimico South,Humber Bay Shores----
                  venue  freq
0             

In [181]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [182]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = etobicoke_grouped['Neighborhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood,Long Branch",Pizza Place,Gym,Pub,Coffee Shop,Pharmacy,Athletics & Sports,Sandwich Place,Skating Rink,River,Business Service
1,"Kingsway Park South West,Mimico NW,Royal York ...",Wings Joint,Hardware Store,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Grocery Store
2,"Markland Wood,Bloordale Gardens,Eringate,Old B...",Pizza Place,Pharmacy,Beer Store,Convenience Store,Liquor Store,Coffee Shop,Café,Park,Pet Store,Drugstore
3,"Martin Grove Gardens,Richview Gardens,St. Phil...",Pizza Place,Mobile Phone Shop,Bus Line,Park,Wings Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping
4,"New Toronto,Mimico South,Humber Bay Shores",Café,Gym,Pizza Place,Fried Chicken Joint,Coffee Shop,Liquor Store,Mexican Restaurant,Business Service,Fast Food Restaurant,Pharmacy


In [185]:
# set number of clusters
kclusters = 5

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 2, 3, 2, 0, 4, 2, 1, 2], dtype=int32)

In [186]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

etobicoke_merged = etobicoke_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
etobicoke_merged = etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etobicoke_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,2.0,Café,Gym,Pizza Place,Fried Chicken Joint,Coffee Shop,Liquor Store,Mexican Restaurant,Business Service,Fast Food Restaurant,Pharmacy
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,2.0,Pizza Place,Gym,Pub,Coffee Shop,Pharmacy,Athletics & Sports,Sandwich Place,Skating Rink,River,Business Service
2,M8X,Etobicoke,"Old Mill North,The Kingsway,Montgomery Road",43.653654,-79.506944,4.0,Smoke Shop,River,Pool,Park,Wings Joint,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
3,M8Y,Etobicoke,"The Queensway East,Royal York South East,Kings...",43.636258,-79.498509,1.0,Baseball Field,Construction & Landscaping,Wings Joint,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,Royal York ...",43.628841,-79.520999,2.0,Wings Joint,Hardware Store,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Grocery Store


In [187]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etobicoke_merged['Latitude'], etobicoke_merged['Longitude'], etobicoke_merged['Neighborhood'], etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[4],
        fill=True,
       fill_color=rainbow[4],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Module 3 - Cluster 1

In [188]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 0, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Etobicoke,0.0,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Flower Shop,Fast Food Restaurant,Discount Store,Convenience Store,Construction & Landscaping,Coffee Shop


### Module 3 - Cluster 2

etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 1, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

### Module 3 - Cluster 3

In [90]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 2, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,2.0,Café,Gym,Pizza Place,Fried Chicken Joint,Coffee Shop,Liquor Store,Mexican Restaurant,Business Service,Fast Food Restaurant,Pharmacy
1,Etobicoke,2.0,Pizza Place,Gym,Pub,Coffee Shop,Pharmacy,Athletics & Sports,Sandwich Place,Skating Rink,River,Business Service
4,Etobicoke,2.0,Wings Joint,Hardware Store,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Grocery Store
6,Etobicoke,2.0,Pizza Place,Pharmacy,Beer Store,Convenience Store,Liquor Store,Coffee Shop,Café,Park,Pet Store,Drugstore
7,Etobicoke,2.0,Pizza Place,Intersection,Sandwich Place,Discount Store,Coffee Shop,Chinese Restaurant,Wings Joint,Fast Food Restaurant,Drugstore,Convenience Store
9,Etobicoke,2.0,Grocery Store,Pizza Place,Pharmacy,Fast Food Restaurant,Liquor Store,Fried Chicken Joint,Sandwich Place,Beer Store,Burger Joint,Construction & Landscaping


### Module 3 - Cluster 4

In [91]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 3, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Etobicoke,3.0,Pizza Place,Mobile Phone Shop,Bus Line,Park,Wings Joint,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store,Construction & Landscaping


### Module 3 - Cluster 5

In [92]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 4, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,4.0,Smoke Shop,River,Pool,Park,Wings Joint,Chinese Restaurant,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
