# Segmenting and Clustering Neighborhoods in Toronto

# Step 1 ---------------------------------

#### Downloading the neccesary libraries for extraction of the wikipedia table

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

# Use SHIFT+TAB keys to popup inplace code help
%config IPCompleter.greedy = True

# Output multiple statements from one input cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

print('Libraries imported')

Libraries imported


## Customize Notebook

**table_from_top.** If the Wikipedia page has one table then use `table_from_top = 1` value. Otherwise count table number from top and replace value to get specific table.

**wikipedia_page.** Specify the wikipedia page name from where to source dataset. The CSV file will be saved with the same name.

**trace.** Set `trace = True` to trace how feature values are extracted. Does not save extracted dataset. Prefixes applied parsing/extraction rules to extracted values.

In [3]:
table_from_top = 1
wikipedia_page = 'List of postal codes of Canada: M'
trace = False

## Load and Parse

This section loads the Wikipedia page and parses the table data we are interested in converting to a dataset.

In [4]:
wikipedia_url = 'https://en.wikipedia.org/wiki/{}'.format(wikipedia_page)
page = requests.get(wikipedia_url)
soup = BeautifulSoup(page.content, 'lxml')
tables = soup.find_all('table', {'class': 'wikitable'})
table = tables[table_from_top - 1]

## Quick Preview

This section extracts the table header with feature or column names.

Use this section to quick preview if you have the right table in processing.

In [5]:
feature_names = []

header_row = table.find('tr')
for header in header_row.find_all('th'):
    feature_name = ' '.join(header.find_all(text=True))
    feature_name.replace('\n', '')
    feature_names.append(feature_name)

'Postcode'

'Borough'

'Neighbourhood'

## Data Wrangling

This section applies data wrangling rules based on exceptions found when parsing Wikipedia tables.

- If a feature value contains a link then extract text from the link.
- Ignore text which starts with `[` square brackets.
- Ignore image links (...flags) prefix link text.
- Ignore hidden text used for IDs.

In [6]:
def has_coords(tag):
    if tag.has_attr('class'):
        if tag['class'][0] == 'latitude' or tag['class'][0] == 'longitude':
            return True
    return False

def get_coords(child):
    coords = []
    for coord in child.find_all(has_coords):
        coords.append(coord.string)
    if coords:
        if trace:
            return 'C = {}'.format(' '.join(coords))
        else:
            return ' '.join(coords)
    else:
        return ''

samples = []
sample_rows = table.find_all('tr')[1:]
for sample_row in sample_rows:
    features = []
    for feature_col in sample_row.find_all('td'):
        feature_value = ''
        text = feature_col.string
        if text:
            if trace:
                features.append('T = {}'.format(text))
            else:
                features.append(text)
            continue
        
        for child in feature_col.children:
            if child.name == 'span':
                if child.has_attr('class'):
                    if child['class'] == 'display:none':
                        continue
                if child.find_all(has_coords):
                    feature_value = get_coords(child)
                    if feature_value:
                        break
                    else:
                        continue
            if child.name == 'sup':
                continue
            if child.name == 'a':
                if child.string[0] == '[':
                    continue            
            if child.name == 'a':
                if trace:
                    feature_value = 'A = {}'.format(child.string)
                else:
                    feature_value = child.string
                break
            if child.name == 'font':
                if trace:
                    feature_value = 'F = {}'.format(child.string)
                else:
                    feature_value = child.string
                break
            try:
                # feature_value = '' for any tags not covered above
                content = child.contents
            except AttributeError:
                # Handle whitespace between child tags, treated as a child string
                if child.isspace():
                    continue
                if trace:
                    feature_value = 'E = {}'.format(child)
                else:
                    feature_value = child
                break
        features.append(feature_value)
    samples.append(dict(zip(feature_names, features)))

## Preview Dataset

This section enables you to preview the parsed dataset.

In [7]:
df = pd.DataFrame(samples)
df.head()
#df.tail()

df_canada = df.iloc[:,[2,0,1]]
df_canada.head()


Unnamed: 0,Borough,Neighbourhood,Postcode
0,Not assigned,Not assigned,M1A
1,Not assigned,Not assigned,M2A
2,North York,Parkwoods,M3A
3,North York,Victoria Village,M4A
4,Downtown Toronto,Harbourfront,M5A


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Save Dataset

We can now save the dataset using the same Wikipedia page name we use earlier to extract the dataset.

In [8]:
#dataset_file_name = '../datasets/wikipedia/{}.csv'.format(wikipedia_page)
#if not trace:
 #   df.to_csv(dataset_file_name, index=False)

### Deleting all rows for which the borough is not assigned

In [9]:
df_canada = df_canada[df_canada.Borough != 'Not assigned']
df_canada = df_canada[df_canada.iloc[:,2] != 'Not assigned']
df_canada.head(5)
df_canada.tail(5)
df_canada.shape

length = df_canada.shape[0]





Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


Unnamed: 0,Postcode,Borough,Neighbourhood
283,M8Z,Etobicoke,Kingsway Park South West
284,M8Z,Etobicoke,Mimico NW
285,M8Z,Etobicoke,The Queensway West
286,M8Z,Etobicoke,Royal York South West
287,M8Z,Etobicoke,South of Bloor


(212, 3)

#### Making a new dataframe 'Df_new' with resetted indexes

In [10]:
#Df_new = df_canada.iloc[:,[0,1,2]]
Df_new = pd.DataFrame(np.zeros((212, 3)),columns = [['Postcode','Borough','Neighbourhood']])

for k in range(212):
    for i in range(3):
        Df_new.iloc[k,i] = df_canada.iloc[k,i]

Df_new.head(5)
Df_new.tail(5)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Unnamed: 0,Postcode,Borough,Neighbourhood
207,M8Z,Etobicoke,Kingsway Park South West
208,M8Z,Etobicoke,Mimico NW
209,M8Z,Etobicoke,The Queensway West
210,M8Z,Etobicoke,Royal York South West
211,M8Z,Etobicoke,South of Bloor


### Writing all neighbourhoods that belong to the same postcode

In [11]:
for k in range(212):
    if Df_new.iloc[211-k,0] == Df_new.iloc[211-k-1,0]:
        a = Df_new.iloc[211-k-1,2]
        c = Df_new.iloc[211-k,2]
        d = a.rstrip() + ", " + c.rstrip()

        
        Df_new.iloc[211-k-1,2] =  d
        Df_new.drop(211-k, inplace = True)
        

In [12]:
for k in range(Df_new.shape[0]):
    if Df_new.iloc[k,2].rstrip() == 'Not assigned':
        print(k)
        

4


As we can see, the fifth row contains such a value and in the next cell I'll therefore delete it. 

In [13]:
Df_new.iloc[4,2] = Df_new.iloc[4,1] 

# Step 2 --------------------------------

Checking that everything looks ok and downloading the 'Geospatial Coordinates file for addition of coordinates to existing dataframe

In [14]:
Df_new.head()

Coordinates = pd.read_csv('Geospatial_Coordinates.csv')
Coordinates.head()



Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
4,M6A,North York,"Lawrence Heights, Lawrence Manor"
6,M7A,Queen's Park,Queen's Park


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Sorting the two dataframes, so I can easily add the coordinates afterwards to the final 'DF" dataframe

In [15]:
a = Df_new.set_index('Postcode')
a = a.sort_index()
a = a.reset_index()

a = pd.DataFrame(data = a)
a.head()

b = Coordinates.set_index('Postal Code')
b = b.sort_index()
b = b.reset_index()

b = pd.DataFrame(data = b)
b.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,"(M1B,)",Scarborough,"Rouge, Malvern"
1,"(M1C,)",Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,"(M1E,)",Scarborough,"Guildwood, Morningside, West Hill"
3,"(M1G,)",Scarborough,Woburn
4,"(M1H,)",Scarborough,Cedarbrae


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
Borough = a['Borough']
b['Borough'] = Borough
b

Neighbourhood = a['Neighbourhood']
b['Neighbourhood'] = Neighbourhood
DF = b
DF

Unnamed: 0,Postal Code,Latitude,Longitude,Borough
0,M1B,43.806686,-79.194353,Scarborough
1,M1C,43.784535,-79.160497,Scarborough
2,M1E,43.763573,-79.188711,Scarborough
3,M1G,43.770992,-79.216917,Scarborough
4,M1H,43.773136,-79.239476,Scarborough
5,M1J,43.744734,-79.239476,Scarborough
6,M1K,43.727929,-79.262029,Scarborough
7,M1L,43.711112,-79.284577,Scarborough
8,M1M,43.716316,-79.239476,Scarborough
9,M1N,43.692657,-79.264848,Scarborough


Unnamed: 0,Postal Code,Latitude,Longitude,Borough,Neighbourhood
0,M1B,43.806686,-79.194353,Scarborough,"Rouge, Malvern"
1,M1C,43.784535,-79.160497,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,43.763573,-79.188711,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae
5,M1J,43.744734,-79.239476,Scarborough,Scarborough Village
6,M1K,43.727929,-79.262029,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,43.711112,-79.284577,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,43.716316,-79.239476,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,43.692657,-79.264848,Scarborough,"Birch Cliff, Cliffside West"


# Step 3 ------------------------------

### Importing the relevant libraries

In [18]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.5.12

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /anaconda3

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2018.11.29 |       ha4d7672_0         143 KB  conda-forge
    pyhamcrest-1.9.0           |             py_2          23 KB  conda-forge
    conda-4.5.12               |        py37_1000         652 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    twisted-18.9.0             |   py37h470a237_0         4.9 MB  conda-forge
    openssl-1.0.2p             |       h470a237_1         2.9 MB  conda-forge
    certifi-2018.11.29         |        py37_1000     

### Showing the map for all neighbourhoods

In [19]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighbourhood in zip(DF['Latitude'], DF['Longitude'], DF['Borough'], DF['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

<folium.features.CircleMarker at 0x1a2295eac8>

<folium.features.CircleMarker at 0x1a2295ea58>

<folium.features.CircleMarker at 0x1a22952fd0>

<folium.features.CircleMarker at 0x1a2295ec18>

<folium.features.CircleMarker at 0x1a22968be0>

<folium.features.CircleMarker at 0x1a2295ec88>

<folium.features.CircleMarker at 0x1a2295eda0>

<folium.features.CircleMarker at 0x1a22968a20>

<folium.features.CircleMarker at 0x1a22968d68>

<folium.features.CircleMarker at 0x1a2295e9b0>

<folium.features.CircleMarker at 0x1a22968048>

<folium.features.CircleMarker at 0x1a2296cb70>

<folium.features.CircleMarker at 0x1a2296c7b8>

<folium.features.CircleMarker at 0x1a22968470>

<folium.features.CircleMarker at 0x1a2297b7b8>

<folium.features.CircleMarker at 0x1a229681d0>

<folium.features.CircleMarker at 0x1a2297b7f0>

<folium.features.CircleMarker at 0x1a2296cc18>

<folium.features.CircleMarker at 0x1a2297b0b8>

<folium.features.CircleMarker at 0x1a2296c6d8>

<folium.features.CircleMarker at 0x1a2298ca58>

<folium.features.CircleMarker at 0x1a2296ccc0>

<folium.features.CircleMarker at 0x1a2297bac8>

<folium.features.CircleMarker at 0x1a2296cb38>

<folium.features.CircleMarker at 0x1a2297bd68>

<folium.features.CircleMarker at 0x1a2297b1d0>

<folium.features.CircleMarker at 0x1a2298c320>

<folium.features.CircleMarker at 0x1a2298c240>

<folium.features.CircleMarker at 0x1a229ada20>

<folium.features.CircleMarker at 0x1a2298c710>

<folium.features.CircleMarker at 0x1a229ad0f0>

<folium.features.CircleMarker at 0x1a2298cda0>

<folium.features.CircleMarker at 0x1a229ad358>

<folium.features.CircleMarker at 0x1a2298c518>

<folium.features.CircleMarker at 0x1a2298d3c8>

<folium.features.CircleMarker at 0x1a229ad710>

<folium.features.CircleMarker at 0x1a2298d0b8>

<folium.features.CircleMarker at 0x1a229ada90>

<folium.features.CircleMarker at 0x1a229ad748>

<folium.features.CircleMarker at 0x1a2298dd68>

<folium.features.CircleMarker at 0x1a2298d860>

<folium.features.CircleMarker at 0x1a229ad208>

<folium.features.CircleMarker at 0x1a2298def0>

<folium.features.CircleMarker at 0x1a229e90b8>

<folium.features.CircleMarker at 0x1a2298d630>

<folium.features.CircleMarker at 0x1a229e9e80>

<folium.features.CircleMarker at 0x1a2298d0f0>

<folium.features.CircleMarker at 0x1a229f37f0>

<folium.features.CircleMarker at 0x1a229e9198>

<folium.features.CircleMarker at 0x1a229f36d8>

<folium.features.CircleMarker at 0x1a229e9b00>

<folium.features.CircleMarker at 0x1a229f3ef0>

<folium.features.CircleMarker at 0x1a229e9ac8>

<folium.features.CircleMarker at 0x1a229e9588>

<folium.features.CircleMarker at 0x1a229f3898>

<folium.features.CircleMarker at 0x1a229e62e8>

<folium.features.CircleMarker at 0x1a229e6390>

<folium.features.CircleMarker at 0x1a229f3cc0>

<folium.features.CircleMarker at 0x1a229e9e48>

<folium.features.CircleMarker at 0x1a229e6cc0>

<folium.features.CircleMarker at 0x1a229f3400>

<folium.features.CircleMarker at 0x1a229e6d68>

<folium.features.CircleMarker at 0x1a229e6860>

<folium.features.CircleMarker at 0x1a229f3978>

<folium.features.CircleMarker at 0x1a229e6eb8>

<folium.features.CircleMarker at 0x1a229e6a20>

<folium.features.CircleMarker at 0x1a22a2c400>

<folium.features.CircleMarker at 0x1a22a097b8>

<folium.features.CircleMarker at 0x1a229e6518>

<folium.features.CircleMarker at 0x1a22a09400>

<folium.features.CircleMarker at 0x1a22a2c9e8>

<folium.features.CircleMarker at 0x1a22a09860>

<folium.features.CircleMarker at 0x1a22a09908>

<folium.features.CircleMarker at 0x1a22a2c2b0>

<folium.features.CircleMarker at 0x1a229e67f0>

<folium.features.CircleMarker at 0x1a22a09a58>

<folium.features.CircleMarker at 0x1a22a2c4a8>

<folium.features.CircleMarker at 0x1a22a31e48>

<folium.features.CircleMarker at 0x1a22a5ff98>

<folium.features.CircleMarker at 0x1a22a316a0>

<folium.features.CircleMarker at 0x1a22a5f2e8>

<folium.features.CircleMarker at 0x1a22a092e8>

<folium.features.CircleMarker at 0x1a229e66d8>

<folium.features.CircleMarker at 0x1a22a2cb38>

<folium.features.CircleMarker at 0x1a22a31b38>

<folium.features.CircleMarker at 0x1a22a31b70>

<folium.features.CircleMarker at 0x1a22a49710>

<folium.features.CircleMarker at 0x1a22a31ef0>

<folium.features.CircleMarker at 0x1a22a49ba8>

<folium.features.CircleMarker at 0x1a22a09390>

<folium.features.CircleMarker at 0x1a22a31d68>

<folium.features.CircleMarker at 0x1a22a5fc18>

<folium.features.CircleMarker at 0x1a22a57550>

<folium.features.CircleMarker at 0x1a22a49320>

<folium.features.CircleMarker at 0x1a22a570f0>

<folium.features.CircleMarker at 0x1a22a495c0>

<folium.features.CircleMarker at 0x1a22a49ac8>

<folium.features.CircleMarker at 0x1a22a49860>

<folium.features.CircleMarker at 0x1a22a57b00>

<folium.features.CircleMarker at 0x1a22a5fb00>

<folium.features.CircleMarker at 0x1a22a57dd8>

<folium.features.CircleMarker at 0x1a22a5e390>

<folium.features.CircleMarker at 0x1a22a57d68>

#### Creating a new dataframe 'df_toronto', which contains only boroughs that contain the word 'Toronto'.

In [20]:
df_toronto = DF[DF['Borough'].str.contains("Toronto")].reset_index(drop=True)
df_toronto.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude,Borough,Neighbourhood
0,M4E,43.676357,-79.293031,East Toronto,The Beaches
1,M4K,43.679557,-79.352188,East Toronto,"The Danforth West, Riverdale"
2,M4L,43.668999,-79.315572,East Toronto,"The Beaches West, India Bazaar"
3,M4M,43.659526,-79.340923,East Toronto,Studio District
4,M4N,43.72802,-79.38879,Central Toronto,Lawrence Park


#### Making a new map, this time for the df_toronto dataframe

In [21]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

<folium.features.CircleMarker at 0x1a20d81c18>

<folium.features.CircleMarker at 0x1a22b7b8d0>

<folium.features.CircleMarker at 0x1a22b68da0>

<folium.features.CircleMarker at 0x1a22b7b0b8>

<folium.features.CircleMarker at 0x1a22b68f60>

<folium.features.CircleMarker at 0x1a22b7b5f8>

<folium.features.CircleMarker at 0x1a22b68438>

<folium.features.CircleMarker at 0x1a22b7b860>

<folium.features.CircleMarker at 0x1a22b68780>

<folium.features.CircleMarker at 0x1a22b684e0>

<folium.features.CircleMarker at 0x1a22b7b6d8>

<folium.features.CircleMarker at 0x1a22b68518>

<folium.features.CircleMarker at 0x1a22b763c8>

<folium.features.CircleMarker at 0x1a22b686d8>

<folium.features.CircleMarker at 0x1a22b9f710>

<folium.features.CircleMarker at 0x1a22b9f400>

<folium.features.CircleMarker at 0x1a22b686a0>

<folium.features.CircleMarker at 0x1a22b68320>

<folium.features.CircleMarker at 0x1a22b76240>

<folium.features.CircleMarker at 0x1a22b9f898>

<folium.features.CircleMarker at 0x1a22b9f6a0>

<folium.features.CircleMarker at 0x1a22b766a0>

<folium.features.CircleMarker at 0x1a22b7bcc0>

<folium.features.CircleMarker at 0x1a22b76fd0>

<folium.features.CircleMarker at 0x1a22b9fa20>

<folium.features.CircleMarker at 0x1a22bba048>

<folium.features.CircleMarker at 0x1a22b769b0>

<folium.features.CircleMarker at 0x1a22b9f2e8>

<folium.features.CircleMarker at 0x1a22b97860>

<folium.features.CircleMarker at 0x1a22b97320>

<folium.features.CircleMarker at 0x1a22b9f1d0>

<folium.features.CircleMarker at 0x1a22b975c0>

<folium.features.CircleMarker at 0x1a22bba240>

<folium.features.CircleMarker at 0x1a22b97d68>

<folium.features.CircleMarker at 0x1a22b97b38>

<folium.features.CircleMarker at 0x1a22bbafd0>

<folium.features.CircleMarker at 0x1a22b8a828>

<folium.features.CircleMarker at 0x1a22bba710>

#### Define Foursquare Credentials and Version

In [22]:
CLIENT_ID = '3XQ3ZRFJHG1ZHFW0PVBKPUNQO0YFN1AGGEVFY3O5YGEQSI4M' # your Foursquare ID
CLIENT_SECRET = 'NHK22QAHONYF1BZ2XILVEB4BXVTQMWBE5N1RGCTBY2VE2F1X' # your Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3XQ3ZRFJHG1ZHFW0PVBKPUNQO0YFN1AGGEVFY3O5YGEQSI4M
CLIENT_SECRET:NHK22QAHONYF1BZ2XILVEB4BXVTQMWBE5N1RGCTBY2VE2F1X


### Let us explore the first neighbourhood in Toronto

In [23]:
df_toronto.loc[0, 'Neighbourhood']

'The Beaches'

Get the neighborhood's latitude and longitude values.

In [24]:
neighbourhood_latitude = df_toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = df_toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = df_toronto.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


#### Now, let's get the top 100 venues that are in Marble Hill within a radius of 500 meters.

First, let's create the GET request URL. Name your URL **url**.

In [25]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=3XQ3ZRFJHG1ZHFW0PVBKPUNQO0YFN1AGGEVFY3O5YGEQSI4M&client_secret=NHK22QAHONYF1BZ2XILVEB4BXVTQMWBE5N1RGCTBY2VE2F1X&v=20180604&ll=43.67635739999999,-79.2930312&radius=500&limit=100'

Send the GET request and examine the resutls

In [26]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c1b82206a607133f3bb124b'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e77e3861f6ecf8d3648300c',
       'name': 'Starbucks',
       'location': {'address': '637 Kingston Rd.',
        'crossStreet': 'at Main St.',
        'lat': 43.67879837444001,
        'lng': -79.2980449760153,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67879837444001,
          'lng': -79.2980449760153}],
        'distance'

From the Foursquare lab in the previous module, we know that all the information is in the *items* key. Before we proceed, let's borrow the **get_category_type** function from the Foursquare lab.

In [27]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a *pandas* dataframe.

In [28]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Starbucks,Coffee Shop,43.678798,-79.298045
1,Grover Pub and Grub,Pub,43.679181,-79.297215
2,Upper Beaches,Neighborhood,43.680563,-79.292869
3,Beaches Fitness,Gym / Fitness Center,43.680319,-79.290991


And how many venues were returned by Foursquare?

In [29]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


## 2. Explore Neighborhoods in Toronto

#### Let's create a function to repeat the same process to all the neighborhoods in Manhattan

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called *toronto_venues*.

In [31]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District

Lawrence Park
Davisville North

North Toronto West

Davisville

Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street

Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn

Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade

First Canadian Place, Underground city
Christie

Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Pa

#### Let's check the size of the resulting dataframe

In [32]:
print(toronto_venues.shape)
toronto_venues.head()

(1696, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
2,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
3,The Beaches,43.676357,-79.293031,Beaches Fitness,43.680319,-79.290991,Gym / Fitness Center
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


Let's check how many venues were returned for each neighbourhood

In [33]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,54,54,54,54,54,54
"Brockton, Exhibition Place, Parkdale Village",23,23,23,23,23,23
Business reply mail Processing Centre969 Eastern,16,16,16,16,16,16
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",51,51,51,51,51,51
Central Bay Street,79,79,79,79,79,79
"Chinatown, Grange Park, Kensington Market",97,97,97,97,97,97
Christie,15,15,15,15,15,15
Church and Wellesley,84,84,84,84,84,84


#### Let's find out how many unique categories can be curated from all the returned venues

In [34]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 231 uniques categories.


## 3. Analyze Each Neighborhood

In [36]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hospital,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Optical Shop,Organic Grocery,Park,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Piano Bar,Pizza Place,Plane,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Summer Camp,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [37]:
toronto_onehot.shape

(1696, 232)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [38]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hospital,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Optical Shop,Organic Grocery,Park,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Piano Bar,Pizza Place,Plane,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Summer Camp,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.02,0.01,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.037037,0.0,0.0,0.0,0.018519,0.018519,0.0,0.018519,0.0,0.018519,0.018519,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.018519,0.055556,0.074074,0.0,0.0,0.0,0.018519,0.0,0.018519,0.0,0.018519,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018519,0.0,0.0,0.0,0.018519,0.0,0.018519,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.043478,0.0,0.0,0.086957,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.130435,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
3,Business reply mail Processing Centre969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.039216,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.019608,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.019608,0.039216,0.0,0.019608,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.078431,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.019608,0.0,0.0,0.019608,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.039216,0.0,0.0,0.0,0.039216,0.019608,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.019608,0.019608,0.0,0.058824,0.0,0.019608,0.019608,0.0,0.0,0.0,0.0,0.039216,0.0,0.0,0.078431,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.037975,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.037975,0.0,0.0,0.0,0.037975,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.012658,0.0,0.164557,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.012658,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.025316,0.0,0.0,0.0,0.050633,0.025316,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.025316,0.0,0.037975,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.012658
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.010309,0.0,0.0,0.0,0.0,0.010309,0.041237,0.0,0.061856,0.0,0.0,0.0,0.010309,0.010309,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.010309,0.010309,0.010309,0.020619,0.010309,0.0,0.0,0.061856,0.0,0.020619,0.010309,0.041237,0.0,0.0,0.0,0.0,0.010309,0.041237,0.0,0.0,0.0,0.020619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.020619,0.020619,0.0,0.0,0.0,0.010309,0.010309,0.030928,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.010309,0.0,0.010309,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.010309,0.020619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.010309,0.010309,0.010309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.010309,0.010309,0.0,0.0,0.030928,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.010309,0.010309,0.0,0.0,0.0,0.010309,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.010309,0.010309,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.010309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.010309,0.0,0.0,0.0,0.051546,0.0,0.051546,0.010309,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.011905,0.0,0.011905,0.035714,0.011905,0.0,0.0,0.02381,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.059524,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.047619,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.059524,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.02381,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.011905,0.0,0.035714,0.011905,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.059524,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.011905,0.0,0.011905


In [39]:
toronto_grouped.shape

(38, 232)

#### Let's print each neighborhood along with the top 5 most common venues

In [40]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2      Thai Restaurant  0.04
3  American Restaurant  0.04
4           Steakhouse  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.06
2          Restaurant  0.06
3  Italian Restaurant  0.04
4  Seafood Restaurant  0.04


----Brockton, Exhibition Place, Parkdale Village----
                   venue  freq
0            Coffee Shop  0.13
1         Breakfast Spot  0.09
2                   Café  0.09
3  Performing Arts Venue  0.04
4          Burrito Place  0.04


----Business reply mail Processing Centre969 Eastern
----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3          Comic Shop  0.06
4          Restaurant  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                v

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,American Restaurant,Hotel,Gym,Clothing Store,Restaurant,Asian Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Farmers Market,Café,Pub,Cheese Shop,Seafood Restaurant,Bakery,Steakhouse
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Breakfast Spot,Café,Yoga Studio,Bar,Burrito Place,Caribbean Restaurant,Climbing Gym,Gym / Fitness Center,Convenience Store
3,Business reply mail Processing Centre969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Pizza Place,Restaurant,Burrito Place,Brewery,Skate Park,Smoke Shop,Spa
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Terminal,Airport Lounge,Boutique,Sculpture Garden,Harbor / Marina,Boat or Ferry,Plane,Airport Gate,Airport Food Court
5,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Pizza Place,Market,Café,Indian Restaurant,Italian Restaurant,Pub,Bakery,Playground
6,Central Bay Street,Coffee Shop,Italian Restaurant,Bubble Tea Shop,Café,Sandwich Place,Ice Cream Shop,Burger Joint,Bar,Salad Place,Spa
7,"Chinatown, Grange Park, Kensington Market",Café,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bakery,Coffee Shop,Chinese Restaurant,Dumpling Restaurant,Mexican Restaurant,Dessert Shop
8,Christie,Grocery Store,Café,Park,Italian Restaurant,Baby Store,Nightclub,Restaurant,Diner,Coffee Shop,Convenience Store
9,Church and Wellesley,Sushi Restaurant,Japanese Restaurant,Coffee Shop,Gay Bar,Burger Joint,Restaurant,Pub,Men's Store,Gastropub,Fast Food Restaurant


In [43]:


# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [45]:
toronto_merged = df_toronto

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Latitude,Longitude,Borough,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,43.676357,-79.293031,East Toronto,The Beaches,0,Gym / Fitness Center,Neighborhood,Coffee Shop,Pub,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
1,M4K,43.679557,-79.352188,East Toronto,"The Danforth West, Riverdale",0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Yoga Studio,Furniture / Home Store,Pub,Pizza Place,Liquor Store
2,M4L,43.668999,-79.315572,East Toronto,"The Beaches West, India Bazaar",0,Park,Sandwich Place,Liquor Store,Pub,Burger Joint,Burrito Place,Fast Food Restaurant,Fish & Chips Shop,Steakhouse,Food & Drink Shop
3,M4M,43.659526,-79.340923,East Toronto,Studio District,0,Café,Coffee Shop,Gastropub,Italian Restaurant,American Restaurant,Bakery,Yoga Studio,Park,Seafood Restaurant,Sandwich Place
4,M4N,43.72802,-79.38879,Central Toronto,Lawrence Park,0,Park,Swim School,Dim Sum Restaurant,Bus Line,Yoga Studio,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


Finally, let's visualize the resulting clusters

In [47]:
# create map

latitude = 43.6532; longitude = -79.3832
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<folium.features.CircleMarker at 0x1a2378bf98>

<folium.features.CircleMarker at 0x1a23783588>

<folium.features.CircleMarker at 0x1a2378b5f8>

<folium.features.CircleMarker at 0x1a23783da0>

<folium.features.CircleMarker at 0x1a2378b048>

<folium.features.CircleMarker at 0x1a23783a58>

<folium.features.CircleMarker at 0x1a23783860>

<folium.features.CircleMarker at 0x1a237a0dd8>

<folium.features.CircleMarker at 0x1a23783828>

<folium.features.CircleMarker at 0x1a237a0198>

<folium.features.CircleMarker at 0x1a2378be10>

<folium.features.CircleMarker at 0x1a237a0940>

<folium.features.CircleMarker at 0x1a237a0668>

<folium.features.CircleMarker at 0x1a2378b710>

<folium.features.CircleMarker at 0x1a237a0a20>

<folium.features.CircleMarker at 0x1a2378b748>

<folium.features.CircleMarker at 0x1a237a0eb8>

<folium.features.CircleMarker at 0x10c722b38>

<folium.features.CircleMarker at 0x10c722588>

<folium.features.CircleMarker at 0x1a237a0390>

<folium.features.CircleMarker at 0x1a237a07f0>

<folium.features.CircleMarker at 0x10c722630>

<folium.features.CircleMarker at 0x10c736860>

<folium.features.CircleMarker at 0x10c7221d0>

<folium.features.CircleMarker at 0x10c736470>

<folium.features.CircleMarker at 0x10c736550>

<folium.features.CircleMarker at 0x10c736cf8>

<folium.features.CircleMarker at 0x10c7223c8>

<folium.features.CircleMarker at 0x10c736c88>

<folium.features.CircleMarker at 0x10c738dd8>

<folium.features.CircleMarker at 0x10c736b70>

<folium.features.CircleMarker at 0x10c7368d0>

<folium.features.CircleMarker at 0x10c7388d0>

<folium.features.CircleMarker at 0x10c759550>

<folium.features.CircleMarker at 0x10c738748>

<folium.features.CircleMarker at 0x10c7369b0>

<folium.features.CircleMarker at 0x10c738ac8>

<folium.features.CircleMarker at 0x10c759278>

## 5. Examine Clusters

Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1

In [48]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43.676357,0,Gym / Fitness Center,Neighborhood,Coffee Shop,Pub,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
1,43.679557,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Yoga Studio,Furniture / Home Store,Pub,Pizza Place,Liquor Store
2,43.668999,0,Park,Sandwich Place,Liquor Store,Pub,Burger Joint,Burrito Place,Fast Food Restaurant,Fish & Chips Shop,Steakhouse,Food & Drink Shop
3,43.659526,0,Café,Coffee Shop,Gastropub,Italian Restaurant,American Restaurant,Bakery,Yoga Studio,Park,Seafood Restaurant,Sandwich Place
4,43.72802,0,Park,Swim School,Dim Sum Restaurant,Bus Line,Yoga Studio,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant
5,43.712751,0,Gym / Fitness Center,Park,Burger Joint,Food & Drink Shop,Clothing Store,Hotel,Breakfast Spot,Grocery Store,Sandwich Place,Electronics Store
6,43.715383,0,Sporting Goods Shop,Coffee Shop,Yoga Studio,Chinese Restaurant,Fast Food Restaurant,Mexican Restaurant,Diner,Sandwich Place,Salon / Barbershop,Dessert Shop
7,43.704324,0,Dessert Shop,Sandwich Place,Pizza Place,Café,Italian Restaurant,Sushi Restaurant,Seafood Restaurant,Coffee Shop,Greek Restaurant,Diner
8,43.689574,0,Playground,Summer Camp,Tennis Court,Diner,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
9,43.686412,0,Coffee Shop,Pub,Fried Chicken Joint,Supermarket,Sports Bar,Sushi Restaurant,American Restaurant,Light Rail Station,Pizza Place,Vietnamese Restaurant


#### Cluster 2

In [49]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,43.657952,1,Coffee Shop,Italian Restaurant,Bubble Tea Shop,Café,Sandwich Place,Ice Cream Shop,Burger Joint,Bar,Salad Place,Spa
27,43.628947,1,Airport Service,Airport Terminal,Airport Lounge,Boutique,Sculpture Garden,Harbor / Marina,Boat or Ferry,Plane,Airport Gate,Airport Food Court


#### Cluster 3

In [50]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,43.711695,2,Garden,Pool,Yoga Studio,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 4

In [51]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,43.646435,3,Coffee Shop,Café,Restaurant,Pub,Seafood Restaurant,Hotel,Italian Restaurant,Cocktail Bar,Farmers Market,Bakery


#### Cluster 5

In [52]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,43.67271,4,Coffee Shop,Sandwich Place,Café,Pizza Place,Indian Restaurant,Pharmacy,Cosmetics Shop,Pub,Burger Joint,Flower Shop
