In [75]:
import numpy as np
import pandas as pd
import requests
import lxml.html as lh

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from sklearn.cluster import KMeans

### Scrape wikipage

In [76]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#Create a handle, page, to handle the contents of the website
page = requests.get(url)
#Store the contents of the website under doc
doc = lh.fromstring(page.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')

In [77]:
tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    print ('%d:"%s"'%(i,name))
    col.append((name,[]))

1:"Postal Code
"
2:"Borough
"
3:"Neighbourhood
"


In [78]:
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 3, the //tr data is not from our table 
    if len(T)!=3:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

In [79]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [80]:
df.head()

Unnamed: 0,Postal Code\n,Borough\n,Neighbourhood\n
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


In [81]:
df.rename(columns = {'Postal Code\n':'postal code', 'Borough\n':'borough','Neighbourhood\n':'neighborhood'}, inplace = True)


#### Clean up the dataframe

In [82]:
df.replace('\n','', regex = True, inplace = True)

In [83]:
df.head(15)

Unnamed: 0,postal code,borough,neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [84]:
df.dtypes

postal code     object
borough         object
neighborhood    object
dtype: object

In [85]:
df['neighborhood'].str.strip()

0                                           Not assigned
1                                           Not assigned
2                                              Parkwoods
3                                       Victoria Village
4                              Regent Park, Harbourfront
                             ...                        
176                                         Not assigned
177                                         Not assigned
178    Mimico NW, The Queensway West, South of Bloor,...
179                                         Not assigned
180                                                     
Name: neighborhood, Length: 181, dtype: object

### How many rows are are not assigned a neigborhood and a borough? 

In [86]:
missing_boro_neigh = (df['neighborhood'] == 'Not assigned') & (df['borough'] == 'Not assigned')

In [87]:
missing_boro_neigh.count()

181

In [88]:
df['borough'].value_counts()

Not assigned             77
North York               24
Downtown Toronto         19
Scarborough              17
Etobicoke                12
Central Toronto           9
West Toronto              6
East York                 5
East Toronto              5
York                      5
Mississauga               1
Canadian postal codes     1
Name: borough, dtype: int64

#### If a cell has a borough but a Not assigned neighborhood replace the neighborhood with the borough name.

In [89]:
# assign neighbohood name to borough if 'Not assigned'
df['neighborhood'] = np.where(df['neighborhood'] =='Not assigned' ,df['neighborhood'], df['borough'])


In [90]:
indexNames = df[df['borough'] == 'Not assigned' ].index

In [91]:
#df.drop(df[df.score < 50].index, inplace=True)
df.drop(indexNames, inplace=True)

In [92]:
df.shape

(104, 3)

##### Get the geospatial data

In [93]:
geo_df = pd.read_csv('http://cocl.us/Geospatial_data')


In [94]:
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [95]:
geo_df.dtypes

Postal Code     object
Latitude       float64
Longitude      float64
dtype: object

### Merge the geographical data with the neighborhood information

In [96]:
neighborhoods = df.merge(geo_df,how='inner', left_on = 'postal code', right_on = 'Postal Code')

In [97]:
neighborhoods.head()

Unnamed: 0,postal code,borough,neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,North York,M3A,43.753259,-79.329656
1,M4A,North York,North York,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,Downtown Toronto,M5A,43.65426,-79.360636
3,M6A,North York,North York,M6A,43.718518,-79.464763
4,M7A,Downtown Toronto,Downtown Toronto,M7A,43.662301,-79.389494


#### Clean up column names

In [98]:
neighborhoods.columns = neighborhoods.columns.str.strip()

In [99]:
#check values in latitude & longitude
print("Latitude range", neighborhoods['Latitude'].min(), neighborhoods['Latitude'].max())
print("Longitude range", neighborhoods['Longitude'].min(), neighborhoods['Longitude'].max())
 


Latitude range 43.60241370000001 43.836124700000006
Longitude range -79.61581899999999 -79.16049709999999


In [100]:
# check for nan in lat and long
print(neighborhoods['Latitude'].isnull().values.any())
print(neighborhoods['Longitude'].isnull().values.any())

False
False


In [101]:
# Format the latitude & longitude
neighborhoods['Latitude'] = pd.to_numeric(geo_df['Latitude'], errors = 'coerce', downcast = 'float')
neighborhoods['Longitude'] = pd.to_numeric(geo_df['Longitude'], errors = 'coerce', downcast = 'float')



In [102]:
neighborhoods

Unnamed: 0,postal code,borough,neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,North York,M3A,43.806686,-79.194351
1,M4A,North York,North York,M4A,43.784534,-79.160500
2,M5A,Downtown Toronto,Downtown Toronto,M5A,43.763573,-79.188713
3,M6A,North York,North York,M6A,43.770992,-79.216919
4,M7A,Downtown Toronto,Downtown Toronto,M7A,43.773136,-79.239479
...,...,...,...,...,...,...
98,M8X,Etobicoke,Etobicoke,M8X,43.706875,-79.518188
99,M4Y,Downtown Toronto,Downtown Toronto,M4Y,43.696320,-79.532242
100,M7Y,East Toronto,East Toronto,M7Y,43.688904,-79.554726
101,M8Y,Etobicoke,Etobicoke,M8Y,43.739418,-79.588440


In [103]:
toronto_data = neighborhoods[['borough','neighborhood','postal code','Latitude','Longitude']].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,borough,neighborhood,postal code,Latitude,Longitude
0,North York,North York,M3A,43.806686,-79.194351
1,North York,North York,M4A,43.784534,-79.1605
2,Downtown Toronto,Downtown Toronto,M5A,43.763573,-79.188713
3,North York,North York,M6A,43.770992,-79.216919
4,Downtown Toronto,Downtown Toronto,M7A,43.773136,-79.239479


In [104]:
# find the center of the data
mid_point_latitude = ((neighborhoods['Latitude'].min() + neighborhoods['Latitude'].max())/2)
mid_point_longitude = ((neighborhoods['Longitude'].min() + neighborhoods['Longitude'].max())/2)
print(mid_point_latitude, mid_point_longitude)


43.719268798828125 -79.3881607055664


In [105]:
map_toronto = folium.Map(location=[mid_point_latitude, mid_point_longitude], zoom_start=11)
map_toronto

### Create map around Toronto

In [106]:
import folium
# create map of Toronto using latitude and longitude values
latitude = neighborhoods['Latitude']
longitude = neighborhoods['Longitude']

#map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['borough'], neighborhoods['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare Credentials and Version

In [107]:
CLIENT_ID = '450JZYHOXGKOWLVHKRM0BEGKHDY5XJL0Q1DHODCMLQRRNGC3' # your Foursquare ID
CLIENT_SECRET = 'MDC2EYL0Y3OVOJL3WJIJJKUT3M1ALJLIILOGA2CCNKNO23O2' # your Foursquare Secret
VERSION = '20200801' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 450JZYHOXGKOWLVHKRM0BEGKHDY5XJL0Q1DHODCMLQRRNGC3
CLIENT_SECRET:MDC2EYL0Y3OVOJL3WJIJJKUT3M1ALJLIILOGA2CCNKNO23O2


In [108]:
neighborhoods.loc[0, 'neighborhood']

'North York'

In [109]:
neighborhood_latitude = neighborhoods.loc[3, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[3, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[3, 'neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of North York are 43.770992279052734, -79.2169189453125.


In [110]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display UR

'https://api.foursquare.com/v2/venues/explore?&client_id=450JZYHOXGKOWLVHKRM0BEGKHDY5XJL0Q1DHODCMLQRRNGC3&client_secret=MDC2EYL0Y3OVOJL3WJIJJKUT3M1ALJLIILOGA2CCNKNO23O2&v=20200801&ll=43.770992279052734,-79.2169189453125&radius=500&limit=100'

In [111]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f33085e03cd7d164b937e77'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.77549228355274,
    'lng': -79.21069884168455},
   'sw': {'lat': 43.76649227455273, 'lng': -79.22313904894045}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4cc1d28c06c254815ac18547',
       'name': 'Starbucks',
       'location': {'address': '300 Borough Dr',
        'crossStreet': 'Scarborough Town Centre',
        'lat': 43.770037201625215,
        'lng': -79.22115586641958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.770037201625215,
          'lng': -79.22115586641958}],
        'distance': 356,
     

In [112]:
# tranform venues into a dataframe
dataframe = json_normalize(results)
dataframe.head()

  


Unnamed: 0,meta.code,meta.requestId,response.headerLocation,response.headerFullLocation,response.headerLocationGranularity,response.totalResults,response.suggestedBounds.ne.lat,response.suggestedBounds.ne.lng,response.suggestedBounds.sw.lat,response.suggestedBounds.sw.lng,response.groups
0,200,5f33085e03cd7d164b937e77,Toronto,Toronto,city,4,43.775492,-79.210699,43.766492,-79.223139,"[{'type': 'Recommended Places', 'name': 'recom..."


In [113]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [114]:
venues = results['response']['groups'][0]['items']


nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  after removing the cwd from sys.path.


Unnamed: 0,name,categories,lat,lng
0,Starbucks,Coffee Shop,43.770037,-79.221156
1,Tim Hortons,Coffee Shop,43.770827,-79.223078
2,Korean Grill House,Korean Restaurant,43.770812,-79.214502
3,Jessies Variety Store,Convenience Store,43.772778,-79.2225


###  Create a function to repeat the same process to all the neighborhoods in Toronto

In [115]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['neighborhood', 
                  'name'
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [116]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Starbucks,Coffee Shop,43.770037,-79.221156
1,Tim Hortons,Coffee Shop,43.770827,-79.223078
2,Korean Grill House,Korean Restaurant,43.770812,-79.214502
3,Jessies Variety Store,Convenience Store,43.772778,-79.2225


In [117]:
nearby_venues = getNearbyVenues(names= toronto_data['neighborhood'],
                                   latitudes = toronto_data['Latitude'],
                                   longitudes= toronto_data['Longitude'])
                                  

North York
North York
Downtown Toronto
North York
Downtown Toronto
Etobicoke
Scarborough
North York
East York
Downtown Toronto
North York
Etobicoke
Scarborough
North York
East York
Downtown Toronto
York
Etobicoke
Scarborough
East Toronto
Downtown Toronto
York
Scarborough
East York
Downtown Toronto
Downtown Toronto
Scarborough
North York
North York
East York
Downtown Toronto
West Toronto
Scarborough
North York
North York
East York
Downtown Toronto
West Toronto
Scarborough
North York
North York
East Toronto
Downtown Toronto
West Toronto
Scarborough
North York
North York
East Toronto
Downtown Toronto
North York
North York
Scarborough
North York
North York
East Toronto
North York
York
North York
Scarborough
North York
North York
Central Toronto
Central Toronto
York
York
Scarborough


KeyError: 'groups'

In [120]:
print(toronto_venues.shape)
toronto_venues.head()

(2133, 7)


Unnamed: 0,neighborhood,nameNeighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,North York,43.806686,-79.194351,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,North York,43.784534,-79.1605,Great Shine Window Cleaning,43.783145,-79.157431,Home Service
2,North York,43.784534,-79.1605,Royal Canadian Legion,43.782533,-79.163085,Bar
3,Downtown Toronto,43.763573,-79.188713,RBC Royal Bank,43.76679,-79.191151,Bank
4,Downtown Toronto,43.763573,-79.188713,G & G Electronics,43.765309,-79.191537,Electronics Store


In [133]:
# How many venues in each neighborhood?
toronto_venues.groupby('neighborhood').Venue.count()

neighborhood
Central Toronto     243
Downtown Toronto    158
East Toronto        186
East York            22
Etobicoke           193
Mississauga          16
North York          668
Scarborough         415
West Toronto        168
York                 64
Name: Venue, dtype: int64

Let's find out how many unique categories can be curated from all the returned venues¶

In [134]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 269 uniques categories.


In [136]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['neighborhood'] = toronto_venues['neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,North York,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Downtown Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [137]:
toronto_grouped = toronto_onehot.groupby('neighborhood').mean().reset_index()
toronto_grouped

num_top_venues = 5

for hood in toronto_grouped['neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.05
2   Restaurant  0.04
3        Hotel  0.03
4          Bar  0.03


----Downtown Toronto----
            venue  freq
0     Pizza Place  0.05
1     Coffee Shop  0.04
2            Park  0.04
3  Sandwich Place  0.04
4      Restaurant  0.03


----East Toronto----
                venue  freq
0         Coffee Shop  0.07
1      Clothing Store  0.05
2    Greek Restaurant  0.04
3                Café  0.04
4  Italian Restaurant  0.04


----East York----
               venue  freq
0        Pizza Place  0.09
1               Park  0.09
2         Playground  0.05
3       Intersection  0.05
4  Convenience Store  0.05


----Etobicoke----
         venue  freq
0  Coffee Shop  0.08
1         Café  0.06
2  Pizza Place  0.04
3          Bar  0.04
4   Restaurant  0.04


----Mississauga----
         venue  freq
0       Bakery  0.12
1     Pharmacy  0.12
2  Art Gallery  0.06
3          Bar  0.06
4         Bank  0.06


----North 

### Put the venues into a dataframe

In [138]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Display top ten venues

In [139]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['neighborhood'] = toronto_grouped['neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Café,Restaurant,Hotel,Bar,American Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant,Park,Gym
1,Downtown Toronto,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
2,East Toronto,Coffee Shop,Clothing Store,Italian Restaurant,Greek Restaurant,Café,Pizza Place,Japanese Restaurant,Sandwich Place,Dessert Shop,Restaurant
3,East York,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant
4,Etobicoke,Coffee Shop,Café,Bar,Pizza Place,Restaurant,Gym,Asian Restaurant,Japanese Restaurant,Hotel,Sandwich Place


Cluster Neighborhoods

In [140]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['neighborhood'] = toronto_grouped['neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Café,Restaurant,Hotel,Bar,American Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant,Park,Gym
1,Downtown Toronto,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
2,East Toronto,Coffee Shop,Clothing Store,Italian Restaurant,Greek Restaurant,Café,Pizza Place,Japanese Restaurant,Sandwich Place,Dessert Shop,Restaurant
3,East York,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant
4,Etobicoke,Coffee Shop,Café,Bar,Pizza Place,Restaurant,Gym,Asian Restaurant,Japanese Restaurant,Hotel,Sandwich Place


In [141]:
toronto_data = neighborhoods[neighborhoods['neighborhood'] == 'Toronto'].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,postal code,borough,neighborhood,Postal Code,Latitude,Longitude


In [142]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 1, 2, 1, 3, 1, 1, 4, 4])

In [143]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [144]:
# add labels

#toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = neighborhoods.join(neighborhoods_venues_sorted.set_index('neighborhood'), on='neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,postal code,borough,neighborhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,North York,M3A,43.806686,-79.194351,1,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Park,Italian Restaurant,Sandwich Place,Sushi Restaurant,Clothing Store
1,M4A,North York,North York,M4A,43.784534,-79.1605,1,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Park,Italian Restaurant,Sandwich Place,Sushi Restaurant,Clothing Store
2,M5A,Downtown Toronto,Downtown Toronto,M5A,43.763573,-79.188713,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
3,M6A,North York,North York,M6A,43.770992,-79.216919,1,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Park,Italian Restaurant,Sandwich Place,Sushi Restaurant,Clothing Store
4,M7A,Downtown Toronto,Downtown Toronto,M7A,43.773136,-79.239479,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store


#### Create map of the 4 clusters

In [148]:
#create map
map_clusters = folium.Map(location=[mid_point_latitude, mid_point_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine the neighborhoods

#####  Three out of the four Most Common Venues in Toronto are food service related (Coffee Shop, Pizza and Bakeries)! 

In [162]:
toronto_merged['1st Most Common Venue'].value_counts()

Coffee Shop    78
Pizza Place    19
Park            5
Bakery          1
Name: 1st Most Common Venue, dtype: int64

#### There are 64 Starbucks in Toronto and 42 Tim Hortons

In [169]:
toronto_venues['Venue'].value_counts()

Starbucks                           64
Tim Hortons                         42
Subway                              29
Shoppers Drug Mart                  18
LCBO                                14
                                    ..
eastside social                      1
Il Paesano Pizzeria & Restaurant     1
Croissant Tree                       1
I.D.A. - St. Joseph Pharmacy         1
Cacao 70                             1
Name: Venue, Length: 1384, dtype: int64

#### Below is a look at the data by clusters. 

In [57]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,-79.188713,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
4,Downtown Toronto,-79.239479,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
9,Downtown Toronto,-79.264847,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
15,Downtown Toronto,-79.31839,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
20,Downtown Toronto,-79.374718,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
24,Downtown Toronto,-79.442261,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
25,Downtown Toronto,-79.329659,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
30,Downtown Toronto,-79.46476,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
36,Downtown Toronto,-79.31839,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store
42,Downtown Toronto,-79.315575,0,Pizza Place,Coffee Shop,Sandwich Place,Park,Grocery Store,Bank,Fast Food Restaurant,Restaurant,Burrito Place,Discount Store


In [58]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,-79.194351,1,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Park,Italian Restaurant,Sandwich Place,Sushi Restaurant,Clothing Store
1,North York,-79.160500,1,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Park,Italian Restaurant,Sandwich Place,Sushi Restaurant,Clothing Store
3,North York,-79.216919,1,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Park,Italian Restaurant,Sandwich Place,Sushi Restaurant,Clothing Store
5,Etobicoke,-79.239479,1,Coffee Shop,Café,Bar,Pizza Place,Restaurant,Gym,Asian Restaurant,Japanese Restaurant,Hotel,Sandwich Place
6,Scarborough,-79.262032,1,Coffee Shop,Café,Clothing Store,Restaurant,Sandwich Place,Furniture / Home Store,Japanese Restaurant,Pizza Place,Bakery,Fast Food Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Scarborough,-79.577202,1,Coffee Shop,Café,Clothing Store,Restaurant,Sandwich Place,Furniture / Home Store,Japanese Restaurant,Pizza Place,Bakery,Fast Food Restaurant
98,Etobicoke,-79.518188,1,Coffee Shop,Café,Bar,Pizza Place,Restaurant,Gym,Asian Restaurant,Japanese Restaurant,Hotel,Sandwich Place
100,East Toronto,-79.554726,1,Coffee Shop,Clothing Store,Italian Restaurant,Greek Restaurant,Café,Pizza Place,Japanese Restaurant,Sandwich Place,Dessert Shop,Restaurant
101,Etobicoke,-79.588440,1,Coffee Shop,Café,Bar,Pizza Place,Restaurant,Gym,Asian Restaurant,Japanese Restaurant,Hotel,Sandwich Place


In [145]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,East York,-79.239479,2,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant
14,East York,-79.284576,2,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant
23,East York,-79.400047,2,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant
29,East York,-79.487259,2,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant
35,East York,-79.309937,2,Park,Pizza Place,Athletics & Sports,Convenience Store,Coffee Shop,Pharmacy,Motel,Playground,Miscellaneous Shop,Caribbean Restaurant


In [146]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
76,Mississauga,-79.442261,3,Bakery,Pharmacy,Park,Middle Eastern Restaurant,Music Venue,Pizza Place,Café,Brewery,Smoke Shop,Supermarket


In [147]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,York,-79.205635,4,Coffee Shop,Cocktail Bar,Sushi Restaurant,Park,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Bakery
21,York,-79.408493,4,Coffee Shop,Cocktail Bar,Sushi Restaurant,Park,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Bakery
31,West Toronto,-79.506943,4,Coffee Shop,Café,Park,Grocery Store,Italian Restaurant,Bakery,Restaurant,Pub,Seafood Restaurant,Japanese Restaurant
37,West Toronto,-79.29303,4,Coffee Shop,Café,Park,Grocery Store,Italian Restaurant,Bakery,Restaurant,Pub,Seafood Restaurant,Japanese Restaurant
43,West Toronto,-79.340919,4,Coffee Shop,Café,Park,Grocery Store,Italian Restaurant,Bakery,Restaurant,Pub,Seafood Restaurant,Japanese Restaurant
56,York,-79.373306,4,Coffee Shop,Cocktail Bar,Sushi Restaurant,Park,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Bakery
63,York,-79.416939,4,Coffee Shop,Cocktail Bar,Sushi Restaurant,Park,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Bakery
64,York,-79.411308,4,Coffee Shop,Cocktail Bar,Sushi Restaurant,Park,Cheese Shop,Café,Restaurant,Seafood Restaurant,Beer Bar,Bakery
69,West Toronto,-79.374847,4,Coffee Shop,Café,Park,Grocery Store,Italian Restaurant,Bakery,Restaurant,Pub,Seafood Restaurant,Japanese Restaurant
75,West Toronto,-79.422562,4,Coffee Shop,Café,Park,Grocery Store,Italian Restaurant,Bakery,Restaurant,Pub,Seafood Restaurant,Japanese Restaurant


#### Thanks for reviewing!