In [1]:
import pandas as pd


1 Cut a table from the page

In [2]:
page ='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikitables = pd.read_html(page, header=0, attrs={"class":"wikitable sortable"})

print ("Extracted {num} wikitables".format(num=len(wikitables)))

Extracted 1 wikitables


2 Save the table to the DataFrame

In [3]:
df = wikitables[0]
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


3 We study the parameters of the DataFrame

In [4]:
df.columns

Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')

In [5]:
df.index

RangeIndex(start=0, stop=289, step=1)

In [6]:
df.shape

(289, 3)

4 Delete rows with $Borough = 'Not assigned'$

In [7]:
df = df[df.Borough != 'Not assigned']
df.shape

(212, 3)

5 Correct rows with $Neighbourhood = 'Not assigned'$

In [8]:
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,Not assigned


In [9]:
for ind in df[df.Neighbourhood == 'Not assigned'].index:
    df.xs(ind)['Neighbourhood'] = df.xs(ind)['Borough']

In [10]:
df[df.Borough == "Queen's Park"]

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,Queen's Park


6 Join $Neighbourhood$ for rows with $Postcode == Borough$

In [11]:
df.groupby(['Postcode','Borough']).size().head()

Postcode  Borough    
M1B       Scarborough    2
M1C       Scarborough    3
M1E       Scarborough    3
M1G       Scarborough    1
M1H       Scarborough    1
dtype: int64

In [12]:
df = df.groupby(['Postcode','Borough'], as_index=False).agg(lambda x: ','.join(x))
df.shape

(103, 3)

7 See our results

In [13]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [14]:
df.shape

(103, 3)

# Part 2

8 Use the csv file

In [15]:
geo = pd.read_csv('Geospatial_Coordinates.csv')
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


8 Correct column names in $geo$

In [16]:
geo.columns = ['Postal_Code', 'Latitude', 'Longitude']

9 Add 2 columns in $df$ 

In [17]:
df['Latitude'] = 0
df['Longitude'] = 0
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",0,0
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",0,0
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",0,0
3,M1G,Scarborough,Woburn,0,0
4,M1H,Scarborough,Cedarbrae,0,0


10 Extract coordinates form $geo$ and add in $df$

In [18]:
for ipc in df['Postcode'].index:
    ind = geo[geo.Postal_Code == df.xs(ipc)['Postcode']]
    df.loc[ipc,'Latitude'] = ind.iloc[0][1]
    df.loc[ipc,'Longitude'] = ind.iloc[0][2]
df        

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


11 I hope this is all :)

# === Part 3 ===

In [19]:
import folium

In [20]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [21]:
latitude, longitude = 43.650943, -79.554724
toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
toronto

In [22]:
# add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto)  

toronto

In [23]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.653963, -79.387207.


In [24]:
etobicoke_data = df[df['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke_data.head


<bound method NDFrame.head of    Postcode    Borough                                      Neighbourhood  \
0       M8V  Etobicoke         Humber Bay Shores,Mimico South,New Toronto   
1       M8W  Etobicoke                              Alderwood,Long Branch   
2       M8X  Etobicoke        The Kingsway,Montgomery Road,Old Mill North   
3       M8Y  Etobicoke  Humber Bay,King's Mill Park,Kingsway Park Sout...   
4       M8Z  Etobicoke  Kingsway Park South West,Mimico NW,The Queensw...   
5       M9A  Etobicoke                                   Islington Avenue   
6       M9B  Etobicoke  Cloverdale,Islington,Martin Grove,Princess Gar...   
7       M9C  Etobicoke  Bloordale Gardens,Eringate,Markland Wood,Old B...   
8       M9P  Etobicoke                                          Westmount   
9       M9R  Etobicoke  Kingsview Village,Martin Grove Gardens,Richvie...   
10      M9V  Etobicoke  Albion Gardens,Beaumond Heights,Humbergate,Jam...   
11      M9W  Etobicoke                        

In [25]:
address = 'Etobicoke, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Etobicoke are 43.6435559, -79.5656326.


In [26]:
# create map of Manhattan using latitude and longitude values
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], etobicoke_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)  
    
map_etobicoke

In [27]:
CLIENT_ID = 'FPL1JVXDNWV3OMJKFGSSJQN2EEPCF5WKKBWNZQ0BLLUK2NZN' # your Foursquare ID
CLIENT_SECRET = 'Y1MKMJGVEYL0WIV3C1121BEFFR4K0K3OADOO4Z4JMZBOLNR2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FPL1JVXDNWV3OMJKFGSSJQN2EEPCF5WKKBWNZQ0BLLUK2NZN
CLIENT_SECRET:Y1MKMJGVEYL0WIV3C1121BEFFR4K0K3OADOO4Z4JMZBOLNR2


In [28]:
etobicoke_data.loc[0, 'Neighbourhood']

'Humber Bay Shores,Mimico South,New Toronto'

In [29]:
neighbourhood_latitude = etobicoke_data.loc[0, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = etobicoke_data.loc[0, 'Longitude'] # neighbourhood longitude value

neighbourhood_name = etobicoke_data.loc[0, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Humber Bay Shores,Mimico South,New Toronto are 43.6056466, -79.50132070000001.


In [30]:
etobicoke_data.shape

(12, 5)

In [31]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=FPL1JVXDNWV3OMJKFGSSJQN2EEPCF5WKKBWNZQ0BLLUK2NZN&client_secret=Y1MKMJGVEYL0WIV3C1121BEFFR4K0K3OADOO4Z4JMZBOLNR2&v=20180605&ll=43.6056466,-79.50132070000001&radius=500&limit=100'

In [32]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c40be39f594df0e38a7af15'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4b119977f964a520488023e3-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/food_liquor_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d186941735',
         'name': 'Liquor Store',
         'pluralName': 'Liquor Stores',
         'primary': True,
         'shortName': 'Liquor Store'}],
       'id': '4b119977f964a520488023e3',
       'location': {'address': '2762 Lake Shore Blvd W',
        'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'crossStreet': 'btwn 1st & 2nd St',
        'distance': 408,
        'formattedAddress': ['2762 Lake Shore Blvd W (btwn 1st & 2nd St)',
         'Toronto ON M8V 1H1',
         'Cana

In [33]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [34]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,LCBO,Liquor Store,43.602281,-79.499302
1,New Toronto Fish & Chips,Restaurant,43.601849,-79.503281
2,Delicia Bakery & Pastry,Bakery,43.601403,-79.503012
3,Lucky Dice Restaurant,Café,43.601392,-79.503056
4,McDonald's,Fast Food Restaurant,43.60247,-79.498963
5,Subway,Sandwich Place,43.602382,-79.498275
6,Popeyes Louisiana Kitchen,Fried Chicken Joint,43.602069,-79.4994
7,Shoppers Drug Mart,Pharmacy,43.601611,-79.502164
8,Maple Leaf House,American Restaurant,43.60204,-79.498678
9,Halibut House Fish and Chips Inc.,Seafood Restaurant,43.60196,-79.501147


In [35]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

15 venues were returned by Foursquare.


#### Explore Neighbourhoods in Toronto - Etobicoke - Humber Bay Shores,Mimico South,New Toronto

In [36]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [37]:
etobicoke_venues = getNearbyVenues(names=etobicoke_data['Neighbourhood'],
                                   latitudes=etobicoke_data['Latitude'],
                                   longitudes=etobicoke_data['Longitude']
                                  )


Humber Bay Shores,Mimico South,New Toronto
Alderwood,Long Branch
The Kingsway,Montgomery Road,Old Mill North
Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea
Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor
Islington Avenue
Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park
Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe
Westmount
Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips
Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown
Northwest


Let's check the size of the resulting dataframe

In [38]:
print(etobicoke_venues.shape)
etobicoke_venues.head()

(71, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
1,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
2,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,Delicia Bakery & Pastry,43.601403,-79.503012,Bakery
3,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,Lucky Dice Restaurant,43.601392,-79.503056,Café
4,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,McDonald's,43.60247,-79.498963,Fast Food Restaurant


Let's check how many venues were returned for each neighbourhood

In [39]:
etobicoke_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",10,10,10,10,10,10
"Alderwood,Long Branch",10,10,10,10,10,10
"Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe",6,6,6,6,6,6
"Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park",2,2,2,2,2,2
"Humber Bay Shores,Mimico South,New Toronto",15,15,15,15,15,15
"Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea",1,1,1,1,1,1
"Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips",4,4,4,4,4,4
"Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",11,11,11,11,11,11
Northwest,2,2,2,2,2,2
"The Kingsway,Montgomery Road,Old Mill North",3,3,3,3,3,3


Let's find out how many unique categories can be curated from all the returned venues

In [40]:
print('There are {} uniques categories.'.format(len(etobicoke_venues['Venue Category'].unique())))

There are 39 uniques categories.


## 3. Analyze Each Neighbourhood

In [41]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
etobicoke_onehot['Neighbourhood'] = etobicoke_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Bakery,Bank,Baseball Field,Beer Store,Burger Joint,Bus Line,Café,Chinese Restaurant,Coffee Shop,Convenience Store,Dance Studio,Discount Store,Drugstore,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Golf Course,Grocery Store,Gym,Intersection,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Park,Pharmacy,Pizza Place,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Social Club,Supplement Shop,Wings Joint
0,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2,"Humber Bay Shores,Mimico South,New Toronto",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
etobicoke_onehot.shape

(71, 40)

#### Next, let's group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category

In [43]:
etobicoke_grouped = etobicoke_onehot.groupby('Neighbourhood').mean().reset_index()
etobicoke_grouped

Unnamed: 0,Neighbourhood,American Restaurant,Bakery,Bank,Baseball Field,Beer Store,Burger Joint,Bus Line,Café,Chinese Restaurant,Coffee Shop,Convenience Store,Dance Studio,Discount Store,Drugstore,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Golf Course,Grocery Store,Gym,Intersection,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Park,Pharmacy,Pizza Place,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Social Club,Supplement Shop,Wings Joint
0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.2,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0
1,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2,0.1,0.1,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0
2,"Bloordale Gardens,Eringate,Markland Wood,Old B...",0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cloverdale,Islington,Martin Grove,Princess Gar...",0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Humber Bay Shores,Mimico South,New Toronto",0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.066667,0.0,0.0,0.066667,0.0,0.066667,0.066667,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.066667,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0
5,"Humber Bay,King's Mill Park,Kingsway Park Sout...",0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Kingsview Village,Martin Grove Gardens,Richvie...",0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Kingsway Park South West,Mimico NW,The Queensw...",0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.090909,0.090909
8,Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"The Kingsway,Montgomery Road,Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
etobicoke_grouped.shape

(11, 40)

#### Let's print each neighbourhood along with the top 5 most common venues

In [45]:
num_top_venues = 5

for hood in etobicoke_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
            venue  freq
0   Grocery Store   0.2
1    Liquor Store   0.1
2      Beer Store   0.1
3        Pharmacy   0.1
4  Sandwich Place   0.1


----Alderwood,Long Branch----
            venue  freq
0     Pizza Place   0.2
1             Gym   0.1
2             Pub   0.1
3    Skating Rink   0.1
4  Sandwich Place   0.1


----Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe----
               venue  freq
0  Convenience Store  0.17
1         Beer Store  0.17
2           Pharmacy  0.17
3               Café  0.17
4        Pizza Place  0.17


----Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park----
                       venue  freq
0                       Bank   0.5
1                Golf Course   0.5
2        American Restaurant   0.0
3                        Pub   0.0
4  Middle Eastern Restaurant   0.0


----Humber Bay Shores,Mimico South,New Toronto---

### Let's put that into a pandas dataframe

First, let's write a function to sort the venues in descending order.

In [46]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [47]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = etobicoke_grouped['Neighbourhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pizza Place,Coffee Shop,Fast Food Restaurant,Sandwich Place,Beer Store,Liquor Store,Fried Chicken Joint,Pharmacy,Drugstore
1,"Alderwood,Long Branch",Pizza Place,Pub,Dance Studio,Coffee Shop,Pharmacy,Pool,Gym,Sandwich Place,Skating Rink,Beer Store
2,"Bloordale Gardens,Eringate,Markland Wood,Old B...",Pharmacy,Liquor Store,Beer Store,Café,Convenience Store,Pizza Place,Wings Joint,Flower Shop,Fast Food Restaurant,Drugstore
3,"Cloverdale,Islington,Martin Grove,Princess Gar...",Golf Course,Bank,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Dance Studio,Convenience Store
4,"Humber Bay Shores,Mimico South,New Toronto",Café,Gym,Pizza Place,Bakery,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Liquor Store,Mexican Restaurant,Pharmacy
5,"Humber Bay,King's Mill Park,Kingsway Park Sout...",Baseball Field,Wings Joint,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Dance Studio,Convenience Store
6,"Kingsview Village,Martin Grove Gardens,Richvie...",Pizza Place,Bus Line,Mobile Phone Shop,Park,Wings Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Dance Studio
7,"Kingsway Park South West,Mimico NW,The Queensw...",Wings Joint,Supplement Shop,Bakery,Burger Joint,Convenience Store,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Social Club
8,Northwest,Drugstore,Rental Car Location,Wings Joint,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Discount Store,Dance Studio,Convenience Store
9,"The Kingsway,Montgomery Road,Old Mill North",Pool,River,Park,Chinese Restaurant,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Dance Studio,Convenience Store


### Cluster Neighbourhoods

Run k-means to cluster the neighbourhood into 5 clusters.

In [48]:
# set number of clusters
kclusters = 5

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)
print (etobicoke_grouped_clustering.shape)
# check cluster labels generated for each row in the dataframe
kmeans.labels_ 
print (kmeans.labels_.shape)

(11, 39)
(11,)


Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighbourhood.



In [52]:
etobicoke_merged = etobicoke_data

print (etobicoke_data.shape)
# add clustering labels
print (kmeans.labels_.shape)
print (etobicoke_merged.shape)
print (etobicoke_grouped_clustering.shape)

etobicoke_merged.drop(etobicoke_merged.index[-1], axis=0, inplace=True)
etobicoke_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
etobicoke_merged = etobicoke_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

etobicoke_merged.head() # check the last columns!

(12, 5)
(11,)
(12, 5)
(11, 39)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,0,Café,Gym,Pizza Place,Bakery,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Liquor Store,Mexican Restaurant,Pharmacy
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,0,Pizza Place,Pub,Dance Studio,Coffee Shop,Pharmacy,Pool,Gym,Sandwich Place,Skating Rink,Beer Store
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944,0,Pool,River,Park,Chinese Restaurant,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Dance Studio,Convenience Store
3,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509,3,Baseball Field,Wings Joint,Coffee Shop,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Dance Studio,Convenience Store
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw...",43.628841,-79.520999,0,Wings Joint,Supplement Shop,Bakery,Burger Joint,Convenience Store,Discount Store,Fast Food Restaurant,Grocery Store,Gym,Social Club


In [54]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etobicoke_merged['Latitude'], etobicoke_merged['Longitude'], etobicoke_merged['Neighbourhood'], etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

:=)