# Toronto Neighborhood Clustering and Data Preparation

## Importing Libraries and Installing Packages

In [2]:
import numpy as np # library to handle data in a vectorized manner
print("numpy imported")
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print("pandas imported")

numpy imported
pandas imported


In [3]:
import json # library to handle JSON files
print("json imported")
!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

json imported


In [4]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [5]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

In [6]:
!pip install folium
import folium # map rendering library

print('Libraries imported.')

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 8.5MB/s ta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Libraries imported.


# Part 1: Scraping the Wikipedia page and making the pandas dataframe

### Scraping The Wikipedia Page

In [7]:
import urllib.request
url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050."
page = urllib.request.urlopen(url)
from bs4 import BeautifulSoup
soup = BeautifulSoup(page,"lxml")
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"2487bafc-f712-4250-a368-afd2e903d6ab","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":969510799,"wgRevisionId":945633050,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communicati

In [8]:
correct_table = soup.find('table', class_ = 'wikitable sortable')
correct_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Harbourfront</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Lawrence_Heights" title="Lawrence Heights">Lawrence Heights</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North

### Creating Dataframe from scraped Data

In [9]:
A = []
B = []
C = []

for row in correct_table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells) == 3:
        A.append(cells[0].find(text = True))
        B.append(cells[1].find(text = True))
        C.append(cells[2].find(text = True))
stuff = 0
while stuff < 77:
    unassigned_id = B.index('Not assigned')
    stuff = stuff + 1
    del A[unassigned_id]
    del B[unassigned_id]
    del C[unassigned_id]
# C.index('Not assigned') I used this to test out whether any neighborhood was not assigned after removing all not assigned boroughs.


In [10]:
df = pd.DataFrame(A, columns = ['PostalCode'])
df['Borough'] = B
df['Neighborhood'] = C
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Etobicoke,Islington Avenue
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


### Separating Duplicate Postal Code Entries for processing

In [11]:
duplicateRowsDF = df[df.duplicated(subset = 'PostalCode', keep = False)]
duplicateRowsDF

Unnamed: 0,PostalCode,Borough,Neighborhood
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
10,M4B,East York,Woodbine Gardens
11,M4B,East York,Parkview Hill
12,M5B,Downtown Toronto,Ryerson
13,M5B,Downtown Toronto,Garden District
15,M9B,Etobicoke,Cloverdale
16,M9B,Etobicoke,Islington



### Creating A list of all unique postal codes in the separated dataframe

In [12]:
unique_list = list(set(duplicateRowsDF['PostalCode']))
unique_list

['M5P',
 'M8W',
 'M4B',
 'M2M',
 'M5K',
 'M3C',
 'M6H',
 'M5J',
 'M5S',
 'M5H',
 'M3J',
 'M1L',
 'M5T',
 'M6R',
 'M4V',
 'M6A',
 'M4X',
 'M3H',
 'M5X',
 'M6J',
 'M9R',
 'M4K',
 'M5L',
 'M1N',
 'M1K',
 'M9C',
 'M4T',
 'M6N',
 'M6S',
 'M1V',
 'M1R',
 'M6L',
 'M5R',
 'M8Y',
 'M1E',
 'M1B',
 'M9V',
 'M1P',
 'M8X',
 'M1C',
 'M6M',
 'M8Z',
 'M9B',
 'M2L',
 'M2J',
 'M5B',
 'M9M',
 'M5M',
 'M1T',
 'M8V',
 'M1M',
 'M3K',
 'M6P',
 'M5V',
 'M6K',
 'M4L']

### Determining the Name of Each Borough

In [13]:
# Find Borough Names
dictionary_set_boroughs = {}
for Post_code in unique_list:
    df_for_postal_code = duplicateRowsDF.loc[duplicateRowsDF['PostalCode'] == Post_code]
    Borough_name_with_duplicates = df_for_postal_code['Borough'].to_list()
    df_no_duplicates = list(set(Borough_name_with_duplicates))
    dictionary_set_boroughs[Post_code] = df_no_duplicates[0]
dictionary_set_boroughs

{'M5P': 'Central Toronto',
 'M8W': 'Etobicoke',
 'M4B': 'East York',
 'M2M': 'North York',
 'M5K': 'Downtown Toronto',
 'M3C': 'North York',
 'M6H': 'West Toronto',
 'M5J': 'Downtown Toronto',
 'M5S': 'Downtown Toronto',
 'M5H': 'Downtown Toronto',
 'M3J': 'North York',
 'M1L': 'Scarborough',
 'M5T': 'Downtown Toronto',
 'M6R': 'West Toronto',
 'M4V': 'Central Toronto',
 'M6A': 'North York',
 'M4X': 'Downtown Toronto',
 'M3H': 'North York',
 'M5X': 'Downtown Toronto',
 'M6J': 'West Toronto',
 'M9R': 'Etobicoke',
 'M4K': 'East Toronto',
 'M5L': 'Downtown Toronto',
 'M1N': 'Scarborough',
 'M1K': 'Scarborough',
 'M9C': 'Etobicoke',
 'M4T': 'Central Toronto',
 'M6N': 'York',
 'M6S': 'West Toronto',
 'M1V': 'Scarborough',
 'M1R': 'Scarborough',
 'M6L': 'North York',
 'M5R': 'Central Toronto',
 'M8Y': 'Etobicoke',
 'M1E': 'Scarborough',
 'M1B': 'Scarborough',
 'M9V': 'Etobicoke',
 'M1P': 'Scarborough',
 'M8X': 'Etobicoke',
 'M1C': 'Scarborough',
 'M6M': 'York',
 'M8Z': 'Etobicoke',
 'M9B': '

### Determining the Neighborhood Names

In [14]:
#Find Neighborhood Names
dictionary_set_neighborhoods = {}
for Post_code in unique_list:
    df_for_neighborhood = duplicateRowsDF.loc[duplicateRowsDF['PostalCode'] == Post_code]
    neighborhood_names = df_for_neighborhood['Neighborhood'].to_list()
    dictionary_set_neighborhoods[Post_code] = neighborhood_names
dictionary_set_neighborhoods

{'M5P': ['Forest Hill North', 'Forest Hill West\n'],
 'M8W': ['Alderwood', 'Long Branch'],
 'M4B': ['Woodbine Gardens', 'Parkview Hill'],
 'M2M': ['Newtonbrook', 'Willowdale'],
 'M5K': ['Design Exchange', 'Toronto Dominion Centre'],
 'M3C': ['Flemingdon Park', 'Don Mills South\n'],
 'M6H': ['Dovercourt Village', 'Dufferin\n'],
 'M5J': ['Harbourfront East\n', 'Toronto Islands', 'Union Station'],
 'M5S': ['Harbord\n', 'University of Toronto'],
 'M5H': ['Adelaide\n', 'King\n', 'Richmond\n'],
 'M3J': ['Northwood Park', 'York University'],
 'M1L': ['Clairlea', 'Golden Mile', 'Oakridge'],
 'M5T': ['Chinatown', 'Grange Park', 'Kensington Market'],
 'M6R': ['Parkdale', 'Roncesvalles'],
 'M4V': ['Deer Park',
  'Forest Hill SE\n',
  'Rathnelly',
  'South Hill',
  'Summerhill West\n'],
 'M6A': ['Lawrence Heights', 'Lawrence Manor'],
 'M4X': ['Cabbagetown', 'St. James Town'],
 'M3H': ['Bathurst Manor', 'Downsview North\n', 'Wilson Heights'],
 'M5X': ['First Canadian Place', 'Underground city'],
 '

### Removing Whitespace and concentrating into strings for neighborhood titles

In [15]:
# remove whitespace
dict_neigh_refined = {}
for Post_code in unique_list:
    refined_list = []
    for item in dictionary_set_neighborhoods[Post_code]:
        refined = item.strip()
        refined_list.append(refined)
    dict_neigh_refined[Post_code] = refined_list
print(dict_neigh_refined)
dict_neigh_sorted = {}
for Post_code in unique_list:
    temp_list = dict_neigh_refined[Post_code]
    sorted_list = sorted(temp_list)
    dict_neigh_sorted[Post_code] = sorted_list
dict_neigh_sorted

{'M5P': ['Forest Hill North', 'Forest Hill West'], 'M8W': ['Alderwood', 'Long Branch'], 'M4B': ['Woodbine Gardens', 'Parkview Hill'], 'M2M': ['Newtonbrook', 'Willowdale'], 'M5K': ['Design Exchange', 'Toronto Dominion Centre'], 'M3C': ['Flemingdon Park', 'Don Mills South'], 'M6H': ['Dovercourt Village', 'Dufferin'], 'M5J': ['Harbourfront East', 'Toronto Islands', 'Union Station'], 'M5S': ['Harbord', 'University of Toronto'], 'M5H': ['Adelaide', 'King', 'Richmond'], 'M3J': ['Northwood Park', 'York University'], 'M1L': ['Clairlea', 'Golden Mile', 'Oakridge'], 'M5T': ['Chinatown', 'Grange Park', 'Kensington Market'], 'M6R': ['Parkdale', 'Roncesvalles'], 'M4V': ['Deer Park', 'Forest Hill SE', 'Rathnelly', 'South Hill', 'Summerhill West'], 'M6A': ['Lawrence Heights', 'Lawrence Manor'], 'M4X': ['Cabbagetown', 'St. James Town'], 'M3H': ['Bathurst Manor', 'Downsview North', 'Wilson Heights'], 'M5X': ['First Canadian Place', 'Underground city'], 'M6J': ['Little Portugal', 'Trinity'], 'M9R': ['Ki

{'M5P': ['Forest Hill North', 'Forest Hill West'],
 'M8W': ['Alderwood', 'Long Branch'],
 'M4B': ['Parkview Hill', 'Woodbine Gardens'],
 'M2M': ['Newtonbrook', 'Willowdale'],
 'M5K': ['Design Exchange', 'Toronto Dominion Centre'],
 'M3C': ['Don Mills South', 'Flemingdon Park'],
 'M6H': ['Dovercourt Village', 'Dufferin'],
 'M5J': ['Harbourfront East', 'Toronto Islands', 'Union Station'],
 'M5S': ['Harbord', 'University of Toronto'],
 'M5H': ['Adelaide', 'King', 'Richmond'],
 'M3J': ['Northwood Park', 'York University'],
 'M1L': ['Clairlea', 'Golden Mile', 'Oakridge'],
 'M5T': ['Chinatown', 'Grange Park', 'Kensington Market'],
 'M6R': ['Parkdale', 'Roncesvalles'],
 'M4V': ['Deer Park',
  'Forest Hill SE',
  'Rathnelly',
  'South Hill',
  'Summerhill West'],
 'M6A': ['Lawrence Heights', 'Lawrence Manor'],
 'M4X': ['Cabbagetown', 'St. James Town'],
 'M3H': ['Bathurst Manor', 'Downsview North', 'Wilson Heights'],
 'M5X': ['First Canadian Place', 'Underground city'],
 'M6J': ['Little Portuga

In [16]:
#Make Neighborhoods into single string
dictionary_fin_neighborhood = {}
for Post_code in unique_list:
    neighborhood_string = ''
    neighborhood_list = dict_neigh_refined[Post_code]
    for item in neighborhood_list:
        neighborhood_string = neighborhood_string +', ' + item
    dictionary_fin_neighborhood[Post_code] = neighborhood_string[2:]
dictionary_final_neighborhood = dictionary_fin_neighborhood

### I created a list identifying duplicate rows in the original to create a dataframe without the original duplicates

In [17]:
# remove original duplicates from dataframe
duplicate_t_or_f_dict = {}
list_post = df['PostalCode'].to_list()
index = 0
while index <= 209:
    duplicate_counter = 0
    for item in unique_list:
        if list_post[index] == item:
            duplicate_counter += 1
    if duplicate_counter < 1:
        duplicate_t_or_f_dict[index] = False
    else:
        duplicate_t_or_f_dict[index] = True
    index += 1

duplicate_t_or_f_dict

{0: False,
 1: False,
 2: False,
 3: True,
 4: True,
 5: False,
 6: False,
 7: True,
 8: True,
 9: False,
 10: True,
 11: True,
 12: True,
 13: True,
 14: False,
 15: True,
 16: True,
 17: True,
 18: True,
 19: True,
 20: True,
 21: True,
 22: True,
 23: True,
 24: True,
 25: False,
 26: False,
 27: False,
 28: True,
 29: True,
 30: True,
 31: True,
 32: True,
 33: True,
 34: True,
 35: False,
 36: False,
 37: False,
 38: False,
 39: False,
 40: False,
 41: False,
 42: False,
 43: False,
 44: True,
 45: True,
 46: True,
 47: False,
 48: True,
 49: True,
 50: True,
 51: True,
 52: True,
 53: False,
 54: True,
 55: True,
 56: True,
 57: True,
 58: True,
 59: False,
 60: True,
 61: True,
 62: True,
 63: True,
 64: True,
 65: True,
 66: True,
 67: True,
 68: False,
 69: True,
 70: True,
 71: True,
 72: True,
 73: True,
 74: True,
 75: True,
 76: True,
 77: True,
 78: True,
 79: True,
 80: True,
 81: True,
 82: True,
 83: False,
 84: True,
 85: True,
 86: True,
 87: True,
 88: True,
 89: Tr

### I used the list I previously created to remove all duplicates

In [18]:
#Take out Duplicate postal_code indexes
counter = 0
df_no_duplicates = df
while counter < len(duplicate_t_or_f_dict):
    if duplicate_t_or_f_dict[counter] == True:
        df_no_duplicates = df_no_duplicates.drop(df.index[counter])
    counter += 1
df_no_duplicates

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Etobicoke,Islington Avenue
9,M3B,North York,Don Mills North
14,M6B,North York,Glencairn
25,M4C,East York,Woodbine Heights
26,M5C,Downtown Toronto,St. James Town
27,M6C,York,Humewood-Cedarvale


### I created a dataframe with the formerly duplicated values

In [19]:
#Creating Final dataframe with formerly duplicated value
Postal_codes = unique_list
boroughs_list = list(dictionary_set_boroughs.values())
Neighborhoods_list = list(dictionary_final_neighborhood.values())
unduplicated_data = {'PostalCode': Postal_codes, 'Borough': boroughs_list, 'Neighborhood': Neighborhoods_list}
unduplicated_df = pd.DataFrame(unduplicated_data)
unduplicated_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5P,Central Toronto,"Forest Hill North, Forest Hill West"
1,M8W,Etobicoke,"Alderwood, Long Branch"
2,M4B,East York,"Woodbine Gardens, Parkview Hill"
3,M2M,North York,"Newtonbrook, Willowdale"
4,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre"
5,M3C,North York,"Flemingdon Park, Don Mills South"
6,M6H,West Toronto,"Dovercourt Village, Dufferin"
7,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station"
8,M5S,Downtown Toronto,"Harbord, University of Toronto"
9,M5H,Downtown Toronto,"Adelaide, King, Richmond"


### I combined the dataframes to create a final dataframe

In [20]:
#combine Dataframes
frames = [df_no_duplicates, unduplicated_df]
result = pd.concat(frames)
final_dataframe = result.sort_values(by = ['PostalCode'])
final_dataframe.reset_index(inplace = True)
actual_final = final_dataframe.drop(columns=['index'])
actual_final

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### I determined that the list has 103 rows and 3 columns

In [21]:
actual_final.shape

(103, 3)

# Part 2: adding latitude and longitude

### The geocoder API was not working for me, so I used the CSV file.


In [22]:
postal_codes = pd.read_csv("http://cocl.us/Geospatial_data")
postal_codes

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### I changed latitude and longitude into lists as they were already in the same order as my dataframe

In [23]:
latitude = postal_codes['Latitude'].to_list()
longitude = postal_codes['Longitude'].to_list()

### I then created new columns using the list values

In [24]:
actual_final['Latitude'] = latitude
actual_final['Longitude'] = longitude
actual_final

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Part 3: Clustering Neighborhoods by venue

### I removed all rows containing multiple neighborhoods and instead clustered only rows which had one neighborhood

In [25]:

duplicate_indices = []
neighborhood_rows = actual_final['Neighborhood'].to_list()
for iterater in range(0,103):
    if ',' in neighborhood_rows[iterater]:
        duplicate_indices.append(iterater)
        iterater += 1
    else:
        iterater += 1
actual_no_duplicates = actual_final.drop(df.index[duplicate_indices])
actual_no_duplicates.reset_index(inplace = True)
final_no_duplicates = actual_no_duplicates.drop(columns=['index'])
final_no_duplicates

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1G,Scarborough,Woburn,43.770992,-79.216917
1,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
2,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
3,M1S,Scarborough,Agincourt,43.7942,-79.262029
4,M1W,Scarborough,L'Amoreaux West,43.799525,-79.318389
5,M1X,Scarborough,Upper Rouge,43.836125,-79.205636
6,M2H,North York,Hillcrest Village,43.803762,-79.363452
7,M2K,North York,Bayview Village,43.786947,-79.385975
8,M2N,North York,Willowdale South,43.77012,-79.408493
9,M2P,North York,York Mills West,43.752758,-79.400049


In [26]:
# The code was removed by Watson Studio for sharing.

### I used the foursquare API to determine the venues in each neighborhood

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            100)
         # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
toronto_venues = getNearbyVenues(names=final_no_duplicates['Neighborhood'],
                                   latitudes=final_no_duplicates['Latitude'],
                                   longitudes=final_no_duplicates['Longitude']
                                  )

Woburn
Cedarbrae

Scarborough Village
Agincourt
L'Amoreaux West

Upper Rouge
Hillcrest Village
Bayview Village
Willowdale South

York Mills West

Willowdale West
Parkwoods
Don Mills North

Downsview West
Downsview Central

Downsview Northwest

Victoria Village
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
Studio District

Lawrence Park
Davisville North

North Toronto West

Davisville

Rosedale
Church and Wellesley
Harbourfront
St. James Town
Berczy Park
Central Bay Street

Roselawn

Stn A PO Boxes 25 The Esplanade

Glencairn

Humewood-Cedarvale
Caledonia-Fairbanks

Christie

Queen's Park
Canada Post Gateway Processing Centre

Business Reply Mail Processing Centre 969 Eastern

Islington Avenue
Humber Summit
Weston
Westmount

Northwest



### I changed the presence of each type of venue to a binary variable which I then used to make a pandas dataframe for each neighborhood

In [29]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Stop,Business Service,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Auditorium,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Financial or Legal Service,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Health & Beauty Service,Health Food Store,Historic Site,Hobby Shop,Hockey Arena,Home Service,Hostel,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Indoor Play Area,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Office,Optical Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Recording Studio,Rental Car Location,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Soccer Field,Soup Place,Spa,Sporting Goods Shop,Sports Bar,Stationery Store,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Swim School,Tailor Shop,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Woburn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Woburn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Woburn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Woburn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Cedarbrae,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### In the following cells, I determined the frequency of each type of venue in each neighborhood and I found the most common venues in each neighborhood.

In [30]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beach,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Stop,Business Service,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Auditorium,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Financial or Legal Service,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Health & Beauty Service,Health Food Store,Historic Site,Hobby Shop,Hockey Arena,Home Service,Hostel,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Indoor Play Area,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,Movie Theater,Museum,Music Venue,New American Restaurant,Nightclub,Office,Optical Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Recording Studio,Rental Car Location,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Soccer Field,Soup Place,Spa,Sporting Goods Shop,Sports Bar,Stationery Store,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Swim School,Tailor Shop,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Berczy Park,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.035088,0.0,0.0,0.0,0.017544,0.017544,0.035088,0.0,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.035088,0.105263,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 East...,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
5,Canada Post Gateway Processing Centre,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Central Bay Street,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029851,0.029851,0.0,0.0,0.0,0.014925,0.059701,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.164179,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029851,0.014925,0.0,0.014925,0.014925,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.014925,0.0,0.0,0.0,0.0,0.044776,0.044776,0.0,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.029851,0.0,0.059701,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.025974,0.012987,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.025974,0.012987,0.012987,0.0,0.0,0.0,0.025974,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.103896,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.025974,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.038961,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.025974,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.064935,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.025974,0.025974,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.025974,0.012987,0.0,0.0,0.038961,0.012987,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.051948,0.0,0.0,0.0,0.012987,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0                     Lounge  0.25
1             Breakfast Spot  0.25
2  Latin American Restaurant  0.25
3               Skating Rink  0.25
4        Martial Arts School  0.00


----Bayview Village----
                 venue  freq
0                 Café  0.25
1  Japanese Restaurant  0.25
2   Chinese Restaurant  0.25
3                 Bank  0.25
4          Yoga Studio  0.00


----Berczy Park----
                venue  freq
0         Coffee Shop  0.11
1          Restaurant  0.04
2              Bakery  0.04
3      Farmers Market  0.04
4  Seafood Restaurant  0.04


----Business Reply Mail Processing Centre 969 Eastern
----
                venue  freq
0  Light Rail Station  0.11
1         Yoga Studio  0.05
2             Brewery  0.05
3                 Spa  0.05
4          Smoke Shop  0.05


----Caledonia-Fairbanks
----
                 venue  freq
0                 Park  0.50
1        Women's Store  0.25
2                 Pool  0.25
3  Mor

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Coworking Space,Creperie,Convenience Store,Ethiopian Restaurant,Electronics Store,Cosmetics Shop
1,Bayview Village,Japanese Restaurant,Café,Chinese Restaurant,Bank,Women's Store,Discount Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
2,Berczy Park,Coffee Shop,Beer Bar,Seafood Restaurant,Farmers Market,Restaurant,Bakery,Cocktail Bar,Café,Cheese Shop,Pharmacy
3,Business Reply Mail Processing Centre 969 East...,Light Rail Station,Gym / Fitness Center,Garden Center,Skate Park,Restaurant,Recording Studio,Pizza Place,Park,Garden,Spa
4,Caledonia-Fairbanks,Park,Women's Store,Pool,Dessert Shop,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center


### I then clustered the neighborhoods into 5 different clusters by venue

In [42]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 0, 1, 1, 1, 1, 1], dtype=int32)

### I was not sure what to call each cluster, so I continued to use number labels for them

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = final_no_duplicates

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged # check the last columns


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Soccer Field,Korean Restaurant,Diner,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
1,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Bank,Gas Station,Bakery,Hakka Restaurant,Caribbean Restaurant,Thai Restaurant,Fried Chicken Joint,Athletics & Sports,Diner,Discount Store
2,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2.0,Women's Store,Playground,Dim Sum Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
3,M1S,Scarborough,Agincourt,43.7942,-79.262029,1.0,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Coworking Space,Creperie,Convenience Store,Ethiopian Restaurant,Electronics Store,Cosmetics Shop
4,M1W,Scarborough,L'Amoreaux West,43.799525,-79.318389,1.0,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Bank,Gym Pool,Indian Restaurant,Discount Store,Pizza Place,Coffee Shop,Sandwich Place
5,M1X,Scarborough,Upper Rouge,43.836125,-79.205636,,,,,,,,,,,
6,M2H,North York,Hillcrest Village,43.803762,-79.363452,1.0,Golf Course,Fast Food Restaurant,Pool,Mediterranean Restaurant,Dog Run,Women's Store,Dim Sum Restaurant,Eastern European Restaurant,Drugstore,Donut Shop
7,M2K,North York,Bayview Village,43.786947,-79.385975,1.0,Japanese Restaurant,Café,Chinese Restaurant,Bank,Women's Store,Discount Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
8,M2N,North York,Willowdale South,43.77012,-79.408493,1.0,Ramen Restaurant,Coffee Shop,Pizza Place,Sushi Restaurant,Café,Sandwich Place,Restaurant,Shopping Mall,Japanese Restaurant,Ice Cream Shop
9,M2P,North York,York Mills West,43.752758,-79.400049,0.0,Convenience Store,Park,Women's Store,Diner,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run


### Two rows did not have the data required for clustering, so I excluded them from my analysis

In [38]:
toronto_merged.dropna(inplace = True)
toronto_merged.reset_index(inplace = True)
toronto_merged.drop(columns=['index'], inplace = True)
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Soccer Field,Korean Restaurant,Diner,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
1,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Bank,Gas Station,Bakery,Hakka Restaurant,Caribbean Restaurant,Thai Restaurant,Fried Chicken Joint,Athletics & Sports,Diner,Discount Store
2,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2.0,Women's Store,Playground,Dim Sum Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
3,M1S,Scarborough,Agincourt,43.7942,-79.262029,1.0,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Coworking Space,Creperie,Convenience Store,Ethiopian Restaurant,Electronics Store,Cosmetics Shop
4,M1W,Scarborough,L'Amoreaux West,43.799525,-79.318389,1.0,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Bank,Gym Pool,Indian Restaurant,Discount Store,Pizza Place,Coffee Shop,Sandwich Place
5,M2H,North York,Hillcrest Village,43.803762,-79.363452,1.0,Golf Course,Fast Food Restaurant,Pool,Mediterranean Restaurant,Dog Run,Women's Store,Dim Sum Restaurant,Eastern European Restaurant,Drugstore,Donut Shop
6,M2K,North York,Bayview Village,43.786947,-79.385975,1.0,Japanese Restaurant,Café,Chinese Restaurant,Bank,Women's Store,Discount Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
7,M2N,North York,Willowdale South,43.77012,-79.408493,1.0,Ramen Restaurant,Coffee Shop,Pizza Place,Sushi Restaurant,Café,Sandwich Place,Restaurant,Shopping Mall,Japanese Restaurant,Ice Cream Shop
8,M2P,North York,York Mills West,43.752758,-79.400049,0.0,Convenience Store,Park,Women's Store,Diner,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
9,M2R,North York,Willowdale West,43.782736,-79.442259,1.0,Pizza Place,Bank,Coffee Shop,Pharmacy,German Restaurant,Electronics Store,Drugstore,Donut Shop,Dog Run,Distribution Center


### I then mapped the clusters and found that most neighborhoods were cluster 1 with some neighborhoods being in cluster 0. Scarborough Village, Downsview Central, and Humber Summit all were distinct enough to have their own cluster.

In [40]:
# create map
map_clusters = folium.Map(location=[43.70011, -79.4163], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

latitute = toronto_merged['Latitude']
longitude = toronto_merged['Longitude']

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(latitude, longitude, toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters