**1. Download Dataset using Pandas**


*Using only Pandas library to download from wiki.*


*Datadrame created as per requirement "The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood"*


In [1]:
import pandas as pd
import numpy as np

link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

tables = pd.read_html(link)
src_dtframe = pd.DataFrame(tables[0])
stg_dtframe = src_dtframe.rename(columns = { 'Postcode' : 'PostalCode'})
stg_dtframe.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [2]:
#Dataframe processed to consider the cells that have an assigned borough. Ignore cells with a borough that is Not assigned
stg_dtframe = stg_dtframe[stg_dtframe['Borough'] != 'Not assigned']
stg_dtframe.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [3]:
#More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11  in the above table
stg_dtframe1 = stg_dtframe
gp =stg_dtframe1.groupby(['Postal Code','Borough'])['Neighbourhood'].apply(', '.join) 
stg_dtframe2 = pd.DataFrame(gp)
stg_dtframe2.reset_index(inplace = True)
stg_dtframe2

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [4]:
#If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough
stg_dtframe3 = stg_dtframe2

stg_dtframe3.loc[stg_dtframe3['Neighbourhood'] == 'Not assigned','Neighbourhood'] = stg_dtframe3['Borough']
stg_dtframe3[stg_dtframe3['Postal Code'] == 'M7A']

Unnamed: 0,Postal Code,Borough,Neighbourhood
85,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
#use the .shape method to print the number of rows of our dataframe

stg_dtframe3.shape

(103, 3)

**Used Geospatial Data to generate dataframe and joined it with postal dataframe to create a combined frame**

In [6]:
!wget -q -O 'Geospatial_data.csv' http://cocl.us/Geospatial_data
df_geospatial = pd.read_csv('Geospatial_data.csv')

In [7]:
df_geospatial.rename(columns ={'Postal Code':'PostalCode'},inplace=True)
df_geospatial.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
df_geospatial.columns

Index(['PostalCode', 'Latitude', 'Longitude'], dtype='object')

In [9]:
merged_postals = pd.merge(left=stg_dtframe3,right=df_geospatial, left_on='Postal Code', right_on='PostalCode')
merged_postals.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,PostalCode,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


**Installed geopy and folium package**

In [10]:
!pip install folium



In [11]:
!pip install geopy



In [12]:
import folium
from geopy.geocoders import Nominatim

**Selected Borough that contains Toronto (as per requirement)**

In [13]:
map_toronto = folium.Map(width=500,height=500,location=[43.6532, -79.3832], zoom_start=11
                         
                        )
toronto_postals = merged_postals[merged_postals['Borough'].str.contains('Toronto')]
toronto_postals_copy = toronto_postals
toronto_postals.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,PostalCode,Latitude,Longitude
37,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",M4K,43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",M4L,43.668999,-79.315572
43,M4M,East Toronto,Studio District,M4M,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879


**Visualising Toronto Neighborhood**

In [14]:
# add markers to map
for lat, lng, Borough, Neighbourhood in zip(toronto_postals['Latitude'], toronto_postals['Longitude'], toronto_postals['Borough'],toronto_postals['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [15]:
CLIENT_ID = 'FNUS1AVFZRJZDPZQ0DW1ZZ0C0N1AFXR42SLH5YQFLO53SSQH' # your Foursquare ID
CLIENT_SECRET = 'ODSGSWB2EG2TWWWBUYT45OMAMP0PKIFU5O1XTD3IQLNUBIDT' # your Foursquare Secret
VERSION = '20190605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FNUS1AVFZRJZDPZQ0DW1ZZ0C0N1AFXR42SLH5YQFLO53SSQH
CLIENT_SECRET:ODSGSWB2EG2TWWWBUYT45OMAMP0PKIFU5O1XTD3IQLNUBIDT


In [16]:
toronto_postals.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,PostalCode,Latitude,Longitude
37,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",M4K,43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",M4L,43.668999,-79.315572
43,M4M,East Toronto,Studio District,M4M,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879


In [17]:
toronto_postals.reset_index(inplace=True)
latitude = str(toronto_postals.loc[0,'Latitude'])
longitude = str(toronto_postals.loc[0,'Longitude'])
search_query = "Indian"
radius = 500
LIMIT = 30

latitude
longitude

toronto_postals.iloc[0]

index                      37
Postal Code               M4E
Borough          East Toronto
Neighbourhood     The Beaches
PostalCode                M4E
Latitude              43.6764
Longitude             -79.293
Name: 0, dtype: object

**Exploring neighborhood using Foursquare API**

In [18]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=FNUS1AVFZRJZDPZQ0DW1ZZ0C0N1AFXR42SLH5YQFLO53SSQH&client_secret=ODSGSWB2EG2TWWWBUYT45OMAMP0PKIFU5O1XTD3IQLNUBIDT&v=20190605&ll=43.67635739999999,-79.2930312&radius=500&limit=30'

In [19]:
import requests

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ff081f18ed5f550e55bd11f'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 6,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'labe

In [20]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
import json
from pandas.io.json import json_normalize

In [22]:
venues = results['response']['groups'][0]['items']
venues

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '4bd461bc77b29c74a07d9282',
   'name': 'Glen Manor Ravine',
   'location': {'address': 'Glen Manor',
    'crossStreet': 'Queen St.',
    'lat': 43.67682094413784,
    'lng': -79.29394208780985,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.67682094413784,
      'lng': -79.29394208780985}],
    'distance': 89,
    'cc': 'CA',
    'city': 'Toronto',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['Glen Manor (Queen St.)', 'Toronto ON', 'Canada']},
   'categories': [{'id': '4bf58dd8d48988d159941735',
     'name': 'Trail',
     'pluralName': 'Trails',
     'shortName': 'Trail',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/hikingtrail_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []}},
  'referralId': 'e-0-4bd461bc77b2

**NearBy Venues Explore**

In [23]:
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  if __name__ == '__main__':


Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869
4,Dip 'n Sip,Coffee Shop,43.678897,-79.297745


In [24]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

6 venues were returned by Foursquare.


**Defining a function to find the nearby venues using get**

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:

toronto_venues = getNearbyVenues(names=toronto_postals['Neighbourhood'],
                                   latitudes=toronto_postals['Latitude'],
                                   longitudes=toronto_postals['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West, Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High 

In [27]:
print(toronto_venues.shape)
toronto_venues.head()

(863, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,The Beaches,43.676357,-79.293031,Dip 'n Sip,43.678897,-79.297745,Coffee Shop


In [28]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,30,30,30,30,30,30
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",17,17,17,17,17,17
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16
Central Bay Street,30,30,30,30,30,30
Christie,16,16,16,16,16,16
Church and Wellesley,30,30,30,30,30,30
"Commerce Court, Victoria Hotel",30,30,30,30,30,30
Davisville,30,30,30,30,30,30
Davisville North,9,9,9,9,9,9


In [29]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 195 uniques categories.


In [38]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0625,0.0625,0.0625,0.125,0.125,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
toronto_grouped.shape

(39, 196)

**Venues Frequency from the Toronto Neighbourhood Grouped Data**

In [42]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0  Seafood Restaurant  0.07
1         Coffee Shop  0.07
2         Cheese Shop  0.07
3            Beer Bar  0.07
4      Farmers Market  0.07


----Brockton, Parkdale Village, Exhibition Place----
                venue  freq
0                Café  0.14
1      Breakfast Spot  0.09
2         Coffee Shop  0.09
3        Climbing Gym  0.05
4  Italian Restaurant  0.05


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3         Pizza Place  0.06
4                Park  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3   Harbor / Marina  0.06
4           Airport  0.06


----Central Bay Street----
                venue  f

**Return the Most Common Venues**

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [48]:
# Storing the venues data in neighborhoods_venues_sorted based on neighborhood in toronto dataframe
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Cheese Shop,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Farmers Market,Bistro,Jazz Club,Restaurant,Liquor Store
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Convenience Store,Burrito Place,Nightclub,Restaurant,Italian Restaurant,Intersection,Stadium
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Comic Shop,Smoke Shop,Brewery,Burrito Place,Restaurant,Farmers Market,Fast Food Restaurant,Auto Workshop
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Coffee Shop,Boutique,Rental Car Location,Harbor / Marina,Plane,Sculpture Garden
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Yoga Studio,Seafood Restaurant,Sandwich Place,Bubble Tea Shop,Ramen Restaurant,Poke Place,Modern European Restaurant


**Clustering**

In [52]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

In [53]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 2, 0, 2, 0, 0, 2, 2, 2], dtype=int32)

In [54]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_postals_copy


In [55]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
 # check the last columns!
toronto_merged.head()

Unnamed: 0,index,Postal Code,Borough,Neighbourhood,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,37,M4E,East Toronto,The Beaches,M4E,43.676357,-79.293031,0,Coffee Shop,Trail,Neighborhood,Health Food Store,Pub,Asian Restaurant,Yoga Studio,Creperie,Distribution Center,Discount Store
1,41,M4K,East Toronto,"The Danforth West, Riverdale",M4K,43.679557,-79.352188,0,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Yoga Studio,Pizza Place,Juice Bar,Bookstore,Restaurant,Dessert Shop,Spa
2,42,M4L,East Toronto,"India Bazaar, The Beaches West",M4L,43.668999,-79.315572,0,Sandwich Place,Fast Food Restaurant,Italian Restaurant,Gym,Pub,Steakhouse,Fish & Chips Shop,Burrito Place,Restaurant,Ice Cream Shop
3,43,M4M,East Toronto,Studio District,M4M,43.659526,-79.340923,0,Coffee Shop,Bakery,Café,American Restaurant,Yoga Studio,Neighborhood,Seafood Restaurant,Brewery,Cheese Shop,Pet Store
4,44,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879,1,Park,Bus Line,Swim School,Business Service,College Rec Center,Dance Studio,Dog Run,Distribution Center,Discount Store,Diner


In [56]:
map_clusters = folium.Map(location=[float(latitude), float(longitude)], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

**Analyzing Clusters**

In [57]:
# analyzing cluster 0
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(3, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,The Beaches,M4E,43.676357,-79.293031,0,Coffee Shop,Trail,Neighborhood,Health Food Store,Pub,Asian Restaurant,Yoga Studio,Creperie,Distribution Center,Discount Store
1,M4K,"The Danforth West, Riverdale",M4K,43.679557,-79.352188,0,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Yoga Studio,Pizza Place,Juice Bar,Bookstore,Restaurant,Dessert Shop,Spa
2,M4L,"India Bazaar, The Beaches West",M4L,43.668999,-79.315572,0,Sandwich Place,Fast Food Restaurant,Italian Restaurant,Gym,Pub,Steakhouse,Fish & Chips Shop,Burrito Place,Restaurant,Ice Cream Shop
3,M4M,Studio District,M4M,43.659526,-79.340923,0,Coffee Shop,Bakery,Café,American Restaurant,Yoga Studio,Neighborhood,Seafood Restaurant,Brewery,Cheese Shop,Pet Store
6,M4R,"North Toronto West, Lawrence Park",M4R,43.715383,-79.405678,0,Coffee Shop,Clothing Store,Yoga Studio,Bagel Shop,Gym / Fitness Center,Fast Food Restaurant,Diner,Mexican Restaurant,Cosmetics Shop,Park
9,M4V,"Summerhill West, Rathnelly, South Hill, Forest...",M4V,43.686412,-79.400049,0,Coffee Shop,American Restaurant,Pizza Place,Pub,Supermarket,Bagel Shop,Bank,Restaurant,Sushi Restaurant,Fried Chicken Joint
11,M4X,"St. James Town, Cabbagetown",M4X,43.667967,-79.367675,0,Bakery,Coffee Shop,Restaurant,Italian Restaurant,Café,Park,Beer Store,Bank,Pub,Butcher
12,M4Y,Church and Wellesley,M4Y,43.66586,-79.38316,0,Japanese Restaurant,Coffee Shop,Adult Boutique,Mexican Restaurant,Breakfast Spot,Salon / Barbershop,Bubble Tea Shop,Restaurant,Burger Joint,Ramen Restaurant
13,M5A,"Regent Park, Harbourfront",M5A,43.65426,-79.360636,0,Coffee Shop,Bakery,Park,Breakfast Spot,Yoga Studio,Restaurant,Pub,Café,Chocolate Shop,Mexican Restaurant
25,M5S,"University of Toronto, Harbord",M5S,43.662696,-79.400049,0,Café,Italian Restaurant,Bookstore,Japanese Restaurant,Bar,Bakery,Sushi Restaurant,Nightclub,Noodle House,College Gym


In [58]:
# analyzing cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(3, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M4N,Lawrence Park,M4N,43.72802,-79.38879,1,Park,Bus Line,Swim School,Business Service,College Rec Center,Dance Studio,Dog Run,Distribution Center,Discount Store,Diner


In [59]:
# analyzing cluster 2

toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(3, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,M4P,Davisville North,M4P,43.712751,-79.390197,2,Food & Drink Shop,Park,Pizza Place,Breakfast Spot,Gym / Fitness Center,Sandwich Place,Department Store,Dance Studio,Hotel,Yoga Studio
7,M4S,Davisville,M4S,43.704324,-79.38879,2,Dessert Shop,Italian Restaurant,Sushi Restaurant,Pizza Place,Café,Sandwich Place,Coffee Shop,Gym,Pharmacy,Restaurant
14,M5B,"Garden District, Ryerson",M5B,43.657162,-79.378937,2,Café,Theater,Coffee Shop,Tanning Salon,Burrito Place,Electronics Store,Hotel,Sporting Goods Shop,Plaza,Ramen Restaurant
15,M5C,St. James Town,M5C,43.651494,-79.375418,2,Café,Farmers Market,Coffee Shop,Gastropub,Restaurant,Hotel,Italian Restaurant,Japanese Restaurant,Jazz Club,Diner
16,M5E,Berczy Park,M5E,43.644771,-79.373306,2,Cheese Shop,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Farmers Market,Bistro,Jazz Club,Restaurant,Liquor Store
17,M5G,Central Bay Street,M5G,43.657952,-79.387383,2,Coffee Shop,Italian Restaurant,Café,Yoga Studio,Seafood Restaurant,Sandwich Place,Bubble Tea Shop,Ramen Restaurant,Poke Place,Modern European Restaurant
18,M5H,"Richmond, Adelaide, King",M5H,43.650571,-79.384568,2,Coffee Shop,Café,Pizza Place,Fast Food Restaurant,Asian Restaurant,Plaza,Hotel,Speakeasy,Lounge,Bakery
19,M5J,"Harbourfront East, Union Station, Toronto Islands",M5J,43.640816,-79.381752,2,Café,Park,Hotel,Plaza,Bubble Tea Shop,Deli / Bodega,Dance Studio,Skating Rink,Roof Deck,IT Services
20,M5K,"Toronto Dominion Centre, Design Exchange",M5K,43.647177,-79.381576,2,Café,Coffee Shop,Restaurant,Japanese Restaurant,Art Gallery,Gym,Deli / Bodega,Hotel,Steakhouse,Bakery
21,M5L,"Commerce Court, Victoria Hotel",M5L,43.648198,-79.379817,2,Café,Coffee Shop,Gastropub,Deli / Bodega,American Restaurant,Restaurant,Japanese Restaurant,Hotel,Gluten-free Restaurant,Tailor Shop


In [60]:
# analyzing cluster 3

toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(3, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Neighbourhood,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,M4T,"Moore Park, Summerhill East",M4T,43.689574,-79.38316,3,Park,Trail,Restaurant,Tennis Court,College Auditorium,College Gym,Distribution Center,Discount Store,Diner,Dessert Shop
10,M4W,Rosedale,M4W,43.679563,-79.377529,3,Park,Playground,Trail,Creperie,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store
23,M5P,"Forest Hill North & West, Forest Hill Road Park",M5P,43.696948,-79.411307,3,Park,Bus Line,Jewelry Store,Trail,Sushi Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop
