In [1]:
import pandas as pd
import numpy as np # library to handle data in a vectorized manner


!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [102]:
canada_df = pd.read_csv('canada.csv')

In [103]:
canada_df = canada_df.drop(['Unnamed: 0', 'Postal Code.1'], axis=1)
canada_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [104]:
canada_df[canada_df['Borough'] == 'York']

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
73,M6C,York,Humewood-Cedarvale,43.693781,-79.428191
74,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512
80,M6M,York,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",43.691116,-79.476013
81,M6N,York,"Runnymede, The Junction North",43.673185,-79.487262
98,M9N,York,Weston,43.706876,-79.518188


In [105]:
# latitude and longitude
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 43.6534817, -79.3839347.


In [106]:
# Folium map with neighbhorhood superimposed
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(canada_df['Latitude'], canada_df['Longitude'], canada_df['Borough'], canada_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [107]:
# FourSquare credentials
CLIENT_ID = 'Y0SB1KFG5R3HDSHQ4SRGLUIO5AJOBJPXTQIENEG5NDXYRRYD' # your Foursquare ID
CLIENT_SECRET = 'JAVZOS2UBIHBSKGGXQ12J1NCHIWIGVMB3LRRFJPFYXOU5KBU' # your Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Y0SB1KFG5R3HDSHQ4SRGLUIO5AJOBJPXTQIENEG5NDXYRRYD
CLIENT_SECRET:JAVZOS2UBIHBSKGGXQ12J1NCHIWIGVMB3LRRFJPFYXOU5KBU


In [108]:
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [109]:
canada_df.loc[0, 'Neighborhood']

'Malvern, Rouge'

In [110]:
neighborhood_latitude = canada_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = canada_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = canada_df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.8066863, -79.19435340000003.


In [111]:
# top 100 venues that are in Marble Hill within a radius of 500 meters
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=Y0SB1KFG5R3HDSHQ4SRGLUIO5AJOBJPXTQIENEG5NDXYRRYD&client_secret=JAVZOS2UBIHBSKGGXQ12J1NCHIWIGVMB3LRRFJPFYXOU5KBU&v=20180604&ll=43.8066863,-79.19435340000003&radius=500&limit=100'

In [112]:
results = requests.get(url).json()
results["response"]

 'headerLocation': 'Malvern',
 'headerFullLocation': 'Malvern, Toronto',
 'headerLocationGranularity': 'neighborhood',
 'totalResults': 2,
 'suggestedBounds': {'ne': {'lat': 43.811186304500005,
   'lng': -79.18812958073043},
  'sw': {'lat': 43.8021862955, 'lng': -79.20057721926962}},
 'groups': [{'type': 'Recommended Places',
   'name': 'recommended',
   'items': [{'reasons': {'count': 0,
      'items': [{'summary': 'This spot is popular',
        'type': 'general',
        'reasonName': 'globalInteractionReason'}]},
     'venue': {'id': '4bb6b9446edc76b0d771311c',
      'name': 'Wendy’s',
      'location': {'crossStreet': 'Morningside & Sheppard',
       'lat': 43.80744841934756,
       'lng': -79.19905558052072,
       'labeledLatLngs': [{'label': 'display',
         'lat': 43.80744841934756,
         'lng': -79.19905558052072}],
       'distance': 387,
       'cc': 'CA',
       'city': 'Toronto',
       'state': 'ON',
       'country': 'Canada',
       'formattedAddress': ['Toronto 

In [113]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [114]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  


Unnamed: 0,name,categories,lat,lng
0,Wendy’s,Fast Food Restaurant,43.807448,-79.199056
1,Interprovincial Group,Print Shop,43.80563,-79.200378


In [115]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        for v in results:
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name'])])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [116]:

toronto_venues = getNearbyVenues(names=canada_df['Neighborhood'],
                                   latitudes=canada_df['Latitude'],
                                   longitudes=canada_df['Longitude']
                                  )


Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto, Broadview North (Old East York)
The Danforth West, 

In [117]:
#toronto_venues[toronto_venues['Neighborhood'] == 'York Mills, Silver Hills']
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Malvern, Rouge",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [118]:
toronto_venues.groupby('Neighborhood').count().sort_values('Venue')

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Humber Summit,1,1,1,1,1,1
"Rouge Hill, Port Union, Highland Creek",1,1,1,1,1,1
"York Mills, Silver Hills",2,2,2,2,2,2
Weston,2,2,2,2,2,2
"The Kingsway, Montgomery Road, Old Mill North",2,2,2,2,2,2
...,...,...,...,...,...,...
"Harbourfront East, Union Station, Toronto Islands",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100


In [119]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 272 uniques categories.


In [120]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-83]] + list(toronto_onehot.columns[:-83])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.tail()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Movie Theater,Museum,Music Venue
2127,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2128,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2129,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2130,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2131,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [121]:
toronto_onehot.shape

(2132, 190)

In [122]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Movie Theater,Museum,Music Venue
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [123]:
toronto_grouped.shape

(95, 190)

In [124]:
# Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

#temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues)

----Agincourt----
                       venue  freq
0  Latin American Restaurant  0.25
1                     Lounge  0.25
2             Breakfast Spot  0.25
3               Gourmet Shop  0.00
4                Gas Station  0.00


----Alderwood, Long Branch----
                venue  freq
0         Coffee Shop   0.1
1                 Gym   0.1
2  Athletics & Sports   0.1
3   Accessories Store   0.0
4             Gay Bar   0.0


----Bathurst Manor, Wilson Heights, Downsview North----
                 venue  freq
0          Coffee Shop  0.11
1                 Bank  0.11
2       Ice Cream Shop  0.05
3  Fried Chicken Joint  0.05
4        Deli / Bodega  0.05


----Bayview Village----
                 venue  freq
0                 Café  0.25
1                 Bank  0.25
2   Chinese Restaurant  0.25
3  Japanese Restaurant  0.25
4     Greek Restaurant  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0         Coffee Shop  0.09
1  Italian Restaurant  0.09
2   Indian 

In [125]:
# Let's put that into a pandas dataframe

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]  # exclue 0th row
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [126]:
toronto_grouped.head(3)

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Movie Theater,Museum,Music Venue
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [127]:
#toronto_grouped.head()
toronto_grouped.iloc[0, 1:]

Accessories Store      0
Afghan Restaurant      0
Airport                0
Airport Food Court     0
Airport Lounge         0
                      ..
Moroccan Restaurant    0
Motel                  0
Movie Theater          0
Museum                 0
Music Venue            0
Name: 0, Length: 189, dtype: object

In [141]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)
    # ind = 0  iloc[0, 1:]  takes 0th row, makes column names as row name and corresponds the values
    
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Breakfast Spot,Latin American Restaurant,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
1,"Alderwood, Long Branch",Gym,Coffee Shop,Athletics & Sports,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Gas Station,Diner,Fried Chicken Joint,Bridal Shop,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop,Music Venue
3,Bayview Village,Café,Bank,Chinese Restaurant,Japanese Restaurant,Music Venue,Cocktail Bar,College Stadium,College Rec Center,College Gym,College Cafeteria
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Butcher,Juice Bar,Café,Comfort Food Restaurant,Grocery Store,Greek Restaurant,American Restaurant,Liquor Store
...,...,...,...,...,...,...,...,...,...,...,...
90,"Willowdale, Willowdale West",Bank,Home Service,Coffee Shop,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
91,Woburn,Coffee Shop,Korean Restaurant,Music Venue,Bus Station,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
92,Woodbine Heights,Dance Studio,Curling Ice,Beer Store,Cosmetics Shop,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop
93,York Mills West,Bank,Convenience Store,Music Venue,Comfort Food Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building


<h1>Cluster the neighbhorhood<\h1>

In [142]:
#k-means to cluster the neighborhood into 5 clusters.
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0)
kmeans.fit(toronto_grouped_clustering)

valuuee = kmeans.labels_[0:]


In [145]:
#canada_df = canada_df.drop(['level_0', 'index'], axis=1)
#canada_df = canada_df.reset_index()
canada_df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [146]:
#neighborhoods_venues_sorted.drop('Cluster Labels', 1)
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Breakfast Spot,Latin American Restaurant,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
1,"Alderwood, Long Branch",Gym,Coffee Shop,Athletics & Sports,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Gas Station,Diner,Fried Chicken Joint,Bridal Shop,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop,Music Venue
3,Bayview Village,Café,Bank,Chinese Restaurant,Japanese Restaurant,Music Venue,Cocktail Bar,College Stadium,College Rec Center,College Gym,College Cafeteria
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Butcher,Juice Bar,Café,Comfort Food Restaurant,Grocery Store,Greek Restaurant,American Restaurant,Liquor Store


In [152]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', valuuee)
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Agincourt,Lounge,Breakfast Spot,Latin American Restaurant,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
1,1,"Alderwood, Long Branch",Gym,Coffee Shop,Athletics & Sports,Music Venue,Clothing Store,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
2,1,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Gas Station,Diner,Fried Chicken Joint,Bridal Shop,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop,Music Venue
3,1,Bayview Village,Café,Bank,Chinese Restaurant,Japanese Restaurant,Music Venue,Cocktail Bar,College Stadium,College Rec Center,College Gym,College Cafeteria
4,1,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Butcher,Juice Bar,Café,Comfort Food Restaurant,Grocery Store,Greek Restaurant,American Restaurant,Liquor Store


In [158]:
neighborhoods_venues_sorted.isnull().values.any()

False

In [163]:
toronto_merged = canada_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
#toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged = pd.merge(canada_df, neighborhoods_venues_sorted, on='Neighborhood')

#toronto_merged = pd.concat([toronto_merged, neighborhoods_venues_sorted], axis=1)
# df_merge_col = pd.merge(df_row, df3, on='id')
#toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)

In [164]:
toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,Fast Food Restaurant,Music Venue,Clothing Store,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,2,Bar,Clothing Store,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Medical Center,Electronics Store,Bank,Breakfast Spot,Mexican Restaurant,Intersection,Music Venue,College Rec Center,College Gym,College Cafeteria
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Music Venue,Bus Station,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Fried Chicken Joint,Hakka Restaurant,Caribbean Restaurant,Bank,Bakery,Gas Station,Athletics & Sports,College Arts Building,Colombian Restaurant,College Stadium


In [165]:
# Visualization

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'],toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [172]:
Examine the 5 clusters




In [174]:
cluster1 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]
cluster1

In [189]:
cluster2 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]
cluster2

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,Fast Food Restaurant,Music Venue,Clothing Store,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Medical Center,Electronics Store,Bank,Breakfast Spot,Mexican Restaurant,Intersection,Music Venue,College Rec Center,College Gym,College Cafeteria
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Fried Chicken Joint,Hakka Restaurant,Caribbean Restaurant,Bank,Bakery,Gas Station,Athletics & Sports,College Arts Building,Colombian Restaurant,College Stadium
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,1,Music Venue,Climbing Gym,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop,Cocktail Bar
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,1,Bakery,Bus Station,Metro Station,Intersection,Ice Cream Shop,Bus Line,Cajun / Creole Restaurant,Clothing Store,College Gym,College Cafeteria
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,M9L,North York,Humber Summit,43.756303,-79.565963,1,Music Venue,Climbing Gym,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop,Cocktail Bar
94,M9N,York,Weston,43.706876,-79.518188,1,Convenience Store,Music Venue,Comfort Food Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop
96,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1,Mobile Phone Shop,Music Venue,Climbing Gym,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop
97,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,1,Grocery Store,Discount Store,Fried Chicken Joint,Liquor Store,Fast Food Restaurant,Japanese Restaurant,Beer Store,Music Venue,Clothing Store,College Gym


In [188]:
cluster3 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]
cluster3

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,2,Bar,Clothing Store,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building,Coffee Shop


In [187]:
cluster4 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 3]
cluster4

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,M5N,Central Toronto,Roselawn,43.711695,-79.416936,3,Music Venue,Garden,Café,Cocktail Bar,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium


In [186]:
cluster5 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 4]
cluster5

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
89,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,4,Baseball Field,Construction & Landscaping,Music Venue,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
93,M9M,North York,"Humberlea, Emery",43.724766,-79.532242,4,Baseball Field,Furniture / Home Store,Café,Cajun / Creole Restaurant,Comfort Food Restaurant,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Music Venue,Bus Station,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,0,Discount Store,Bus Station,Coffee Shop,Department Store,Chinese Restaurant,Climbing Gym,College Rec Center,College Gym,College Cafeteria,College Auditorium
38,M4J,East York,"East Toronto, Broadview North (Old East York)",43.685347,-79.338106,0,Convenience Store,Coffee Shop,Music Venue,Comfort Food Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium,College Arts Building
78,M6M,York,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",43.691116,-79.476013,0,Fast Food Restaurant,Coffee Shop,Music Venue,Clothing Store,Colombian Restaurant,College Stadium,College Rec Center,College Gym,College Cafeteria,College Auditorium
83,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Music Venue,Bank,Distribution Center,Diner,Creperie,Fried Chicken Joint,College Cafeteria,College Auditorium,General Entertainment
