# Assignment Segmenting and Clustering Neighborhoods in Toronto - Part 3 - Final

## Assignment Part 1  - Create Postal Code Data frame

### Load necessary Libraries

In [1]:
import pandas as pd
import numpy as np

### Import html table into the list "PostCodeList"

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
TorontoPostalCodeList = pd.read_html(url)
TorontoPostalCodeList[0]

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


### Create Pandas Data Frame "TorontoPostalCodeDataFrameTemp" from html list "TorontoPostalCodeList" and sort according to the postal code

In [3]:
TorontoPostalCodeDataFrameTemp = pd.DataFrame(TorontoPostalCodeList[0])
TorontoPostalCodeDataFrameTemp.rename(columns = {'Postal Code': 'PostalCode'}, inplace = True)
TorontoPostalCodeDataFrameTemp.sort_values(by = ['PostalCode'], inplace = True)

#### Verify shape

In [4]:
TorontoPostalCodeDataFrameTemp.shape

(180, 3)

### Drop rows with Borough that is "Not assigned"

In [5]:
TorontoPostalCodeDataFrameTemp = TorontoPostalCodeDataFrameTemp.replace('Not assigned', np.nan, regex = True)
TorontoPostalCodeDataFrameTemp.dropna(subset=['Borough'], inplace = True)
TorontoPostalCodeDataFrameTemp.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
9,M1B,Scarborough,"Malvern, Rouge"
18,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
27,M1E,Scarborough,"Guildwood, Morningside, West Hill"
36,M1G,Scarborough,Woburn
45,M1H,Scarborough,Cedarbrae


#### Verify shape

In [6]:
TorontoPostalCodeDataFrameTemp.shape

(103, 3)

### Check if Neighbourhood is "Not assigned" and assign the same as Borough

In [7]:
TorontoPostalCodeDataFrameTemp.loc[TorontoPostalCodeDataFrameTemp['Neighborhood'] == 'Not assigned', 'Neighborhood'] = TorontoPostalCodeDataFrameTemp['Borough']
TorontoPostalCodeDataFrameTemp.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
9,M1B,Scarborough,"Malvern, Rouge"
18,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
27,M1E,Scarborough,"Guildwood, Morningside, West Hill"
36,M1G,Scarborough,Woburn
45,M1H,Scarborough,Cedarbrae


#### Verify Shape

In [8]:
TorontoPostalCodeDataFrameTemp.shape

(103, 3)

### Combine Neighbourhoods if PostalCode is the same and reset index

In [9]:
TorontoPostalCodeDataFrameGroup = TorontoPostalCodeDataFrameTemp.groupby(['PostalCode','Borough'],sort = False).agg(','.join)
TorontoPostalCodeDataFrame = TorontoPostalCodeDataFrameGroup.reset_index()
TorontoPostalCodeDataFrame.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Verify Shape (Assignment Part 1 conclusion)

In [10]:
TorontoPostalCodeDataFrame.shape

(103, 3)

## Assigment Part 2 - Add coordinates to Data frame "TorontoData"

### Get the coordinates

In [11]:
TorontoCoordinatesDataFrame = pd.read_csv('https://cocl.us/Geospatial_data')
TorontoCoordinatesDataFrame.rename(columns = {'Postal Code': 'PostalCode'}, inplace = True)
TorontoCoordinatesDataFrame.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge Data Frame "TorontoPostalCodeDataFrame" with Data Frame "TorontoCoordinatesDataFrame"

In [12]:
TorontoData = pd.merge(TorontoPostalCodeDataFrame, TorontoCoordinatesDataFrame, on = 'PostalCode')
TorontoData.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Verify shape

In [13]:
TorontoData.shape

(103, 5)

## Assignment Part 3 - Explore and cluster the neighborhoods in Toronto

### Download addicional dependecies needed

In [14]:
# Latitude and Longitude handling
#!conda install -c conda-forge geopy --yes #Uncomment this if not yet installed!
from geopy.geocoders import Nominatim

# To handle JSON files and requests
import requests
import json

# To handle plotting
import matplotlib.cm as cm
import matplotlib.colors as colors

# For K-Means clustering handling
from sklearn.cluster import KMeans

# For map handling
#!conda install -c conda-forge folium=0.5.0 --yes #Uncomment this if not yet installer!
import folium

print('Dependecies downloaded')

Dependecies downloaded


### Get the coordinates of Toronto

In [15]:
TorontoAddress = 'Toronto, TO'
TorontoGeolocator = Nominatim(user_agent = 'TorontoExplorer')
TorontoLocation = TorontoGeolocator.geocode(TorontoAddress)
TorontoLatitude = TorontoLocation.latitude
TorontoLongitude = TorontoLocation.longitude
print('The geographical coordinates of Toronto are {}, {}.'.format(TorontoLatitude, TorontoLongitude))

The geographical coordinates of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto

In [16]:
# Create map of Toronto
TorontoMap = folium.Map(location =[TorontoLatitude, TorontoLongitude], zoom_start = 11)
# Add markes to map
for TorontoLatitude, TorontoLongitude, TorontoBorough, TorontoNeighborhood in zip(
        TorontoData['Latitude'],
        TorontoData['Longitude'],
        TorontoData['Borough'],
        TorontoData['Neighborhood']):
    label = '{}, {}'.format(TorontoNeighborhood, TorontoBorough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([TorontoLatitude,
                         TorontoLongitude],
                        radius = 5,
                        popup = label,
                        color = 'blue',
                        fill = True,
                        fill_color = '#3186cc',
                        fill_opacity = 0.7,
                        parse_html = False).add_to(TorontoMap)
TorontoMap

### Create Data Frame "TorontoCityData" as a subset from Data Frame "TorontoData"

In [17]:
TorontoCityData = TorontoData[TorontoData['Borough'].str.contains('Toronto')].reset_index(drop = True)
TorontoCityData.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


#### Verify shape

In [18]:
TorontoCityData.shape

(39, 5)

### Get the coordinates of Toronto city

In [19]:
TorontoCityAddress = 'Toronto City, TO'
TorontoGeolocator = Nominatim(user_agent = 'TorontoExplorer')
TorontoCityLocation = TorontoGeolocator.geocode(TorontoCityAddress)
TorontoCityLatitude = TorontoCityLocation.latitude
TorontoCityLongitude = TorontoCityLocation.longitude
print('The geographical coordinates of Toronto City are {}, {}.'.format(TorontoCityLatitude, TorontoCityLongitude))

The geographical coordinates of Toronto City are 43.6534817, -79.3839347.


### Create a map of Toronto City

In [20]:
TorontoCityMap = folium.Map(location = [TorontoCityLatitude, TorontoCityLongitude], zoom_start = 12)
# Add markes to map
for TorontoCityLatitude, TorontoCityLongitude, TorontoCityBorough, TorontoCityNeighborhood in zip(
        TorontoCityData['Latitude'],
        TorontoCityData['Longitude'],
        TorontoCityData['Borough'],
        TorontoCityData['Neighborhood']):
    label = '{}, {}'.format(TorontoCityNeighborhood, TorontoCityBorough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([TorontoCityLatitude,
                         TorontoCityLongitude],
                        radius = 5,
                        popup = label,
                        color = 'blue',
                        fill = True,
                        fill_color = '#3186cc',
                        fill_opacity = 0.7,
                        parse_html = False).add_to(TorontoCityMap)
TorontoCityMap

### Define Foursquare Credentials and Version

In [21]:
# @Hidden_Credentials
CLIENT_ID = '5ZFTKSMHL3CRCN4N2KRZSSBWCXR1ENL230ZGVTVA1QEITZKD'
CLIENT_SECRET = 'WDSN0USS2CNSKC2ZT1YKJ1UGP4HOGB4TNHTPDIES0ZYO02VH'
VERSION = '20200611'

### Explore the first Neighborhood in "TorontoCityData"

In [22]:
TorontoCityData.loc[0,'Neighborhood']

'The Beaches'

In [23]:
TorontoCityNeighborhoodName = TorontoCityData.loc[0,'Neighborhood']
TorontoCityNeighborhoodLatitude = TorontoCityData.loc[0,'Latitude']
TorontoCityNeighborhoodLongitude = TorontoCityData.loc[0,'Longitude']
print('Latitude and Longitude values of {} are {}, {}.'.format(TorontoCityNeighborhoodName, TorontoCityNeighborhoodLatitude,TorontoCityNeighborhoodLongitude))

Latitude and Longitude values of The Beaches are 43.67635739999999, -79.2930312.


In [24]:
TorontoCityLimit = 100
TorontoCityRadius = 500
TorontoCityUrl = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            TorontoCityNeighborhoodLatitude,
            TorontoCityNeighborhoodLongitude,
            TorontoCityRadius,
            TorontoCityLimit)
TorontoCityUrl

'https://api.foursquare.com/v2/venues/explore?&client_id=5ZFTKSMHL3CRCN4N2KRZSSBWCXR1ENL230ZGVTVA1QEITZKD&client_secret=WDSN0USS2CNSKC2ZT1YKJ1UGP4HOGB4TNHTPDIES0ZYO02VH&v=20200611&ll=43.67635739999999,-79.2930312&radius=500&limit=100'

In [25]:
TorontoCityResults = requests.get(TorontoCityUrl).json()
TorontoCityResults

{'meta': {'code': 200, 'requestId': '5f0a4a908e7ad76092c2fd10'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67682094413784,
          'lng': -79.29394208780985}],
        'distanc

### Extract the category of the venue

In [26]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Clean json and structure it into a Pandas Data Frame

In [27]:
TorontoCityVenues = TorontoCityResults['response']['groups'][0]['items']
TorontoCityNearbyVenues = pd.json_normalize(TorontoCityVenues)
filtered_columns = ['venue.name','venue.categories','venue.location.lat','venue.location.lng']
TorontoCityNearbyVenues = TorontoCityNearbyVenues.loc[:, filtered_columns]
TorontoCityNearbyVenues['venue.categories'] = TorontoCityNearbyVenues.apply(get_category_type, axis =1)
TorontoCityNearbyVenues.columns = [col.split('.')[-1] for col in TorontoCityNearbyVenues.columns]
TorontoCityNearbyVenues.head()

Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869


### Explore Neighborhoods in Toronto City

In [28]:
TorontoCityLimit = 100
def getNearbyVenues(TorontoCityNames, TorontoCityLatitudes, TorontoCityLongitudes, TorontoCityRadius = 500):
    TorontoCityVenuesList=[]
    for TorontoCityName, TorontoCityLatitude, TorontoCityLongitude in zip(TorontoCityNames, TorontoCityLatitudes, TorontoCityLongitudes):
        #print(name)
        TorontoCityUrl = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            TorontoCityLatitude,
            TorontoCityLongitude,
            TorontoCityRadius,
            TorontoCityLimit)
        TorontoCityResults = requests.get(TorontoCityUrl).json()['response']['groups'][0]['items']
        TorontoCityVenuesList.append([(
            TorontoCityName,
            TorontoCityLatitude,
            TorontoCityLongitude,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in TorontoCityResults])
    TorontoCityNearbyVenues = pd.DataFrame([item for TorontoCityVenueList in TorontoCityVenuesList for item in TorontoCityVenueList])
    TorontoCityNearbyVenues.columns = ['Neighborhood',
                                 'Neighborhood Latitude',
                                 'Neighborhood Longitude',
                                 'Venue',
                                 'Venue Latitude',
                                 'Venue Longitude',
                                 'Venue Category']   
    return(TorontoCityNearbyVenues)

### Create a Data Frame "TorontoCityVenues" for Toronto City

In [29]:
TorontoCityVenues = getNearbyVenues(TorontoCityNames = TorontoCityData['Neighborhood'],
                                TorontoCityLatitudes = TorontoCityData['Latitude'],
                                TorontoCityLongitudes = TorontoCityData['Longitude']
                                )
TorontoCityVenues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


#### Verify shape

In [30]:
TorontoCityVenues.shape

(1625, 7)

####  Verify the number of venues for each neighborhood

In [31]:
TorontoCityVenues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,58,58,58,58,58,58
"Brockton, Parkdale Village, Exhibition Place",23,23,23,23,23,23
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",18,18,18,18,18,18
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",14,14,14,14,14,14
Central Bay Street,70,70,70,70,70,70
Christie,16,16,16,16,16,16
Church and Wellesley,71,71,71,71,71,71
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,33,33,33,33,33,33
Davisville North,9,9,9,9,9,9


#### Verify the number of unique categories

In [32]:
print('There are {} unique categories.'.format(len(TorontoCityVenues['Venue Category'].unique())))

There are 232 unique categories.


### Analyse Each Neighborhood

In [33]:
TorontoCityOnehot = pd.get_dummies(TorontoCityVenues[['Venue Category']],prefix = '', prefix_sep = '')
TorontoCityOnehot['Neighborhood'] = TorontoCityVenues['Neighborhood']
fixed_columns = [TorontoCityOnehot.columns[-1]] + list(TorontoCityOnehot.columns[:-1])
TorontoCityOnehot = TorontoCityOnehot[fixed_columns]
TorontoCityOnehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Verify shape

In [34]:
TorontoCityVenues.shape

(1625, 7)

#### Group rows by neighborhood by taking the mean of occurrence frequency

In [35]:
TorontoCityVenueGrouped = TorontoCityOnehot.groupby('Neighborhood').mean().reset_index()
TorontoCityVenueGrouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.071429,0.071429,0.071429,0.214286,0.071429,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.014286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014286,0.0,0.0,0.014286,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.028169,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,...,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Print the top most common venues for each neighborhood

In [36]:
NumberOfTorontoCityTopVenues = 5
for neighborhood in TorontoCityVenueGrouped['Neighborhood']:
    print('----'+neighborhood+'----')
    temp = TorontoCityVenueGrouped[TorontoCityVenueGrouped['Neighborhood'] == neighborhood].T.reset_index()
    temp.columns = ['Venue','Mean of Occurrence Frequency']
    temp = temp.iloc[1:]
    temp['Mean of Occurrence Frequency'] = temp['Mean of Occurrence Frequency'].astype(float)
    temp = temp.round({'Mean of Occurrence Frequency':2})
    print(temp.sort_values('Mean of Occurrence Frequency', ascending = False).reset_index(drop = True).head(NumberOfTorontoCityTopVenues))
    print('\n')

----Berczy Park----
          Venue  Mean of Occurrence Frequency
0   Coffee Shop                          0.09
1  Cocktail Bar                          0.05
2    Restaurant                          0.03
3      Beer Bar                          0.03
4          Café                          0.03


----Brockton, Parkdale Village, Exhibition Place----
            Venue  Mean of Occurrence Frequency
0            Café                          0.13
1  Breakfast Spot                          0.09
2     Coffee Shop                          0.09
3     Yoga Studio                          0.04
4      Restaurant                          0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                Venue  Mean of Occurrence Frequency
0  Light Rail Station                          0.11
1          Comic Shop                          0.06
2       Garden Center                          0.06
3                Park                          0.06
4    R

#### Sort the venues in descending order

In [37]:
def return_most_common_venues(row, NumberOfTorontoCityTopVenues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    return row_categories_sorted.index.values[0:NumberOfTorontoCityTopVenues]

#### Create a Data Frame for the top 10 most common venues for each neighborhood

In [38]:
NumberOfTorontoCityTopVenues = 10
indicators = ['st','nd','rd']
columns = ['Neighborhood']
for ind in np.arange(NumberOfTorontoCityTopVenues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
NeighborhoodsVenuesSorted = pd.DataFrame(columns = columns)
NeighborhoodsVenuesSorted['Neighborhood'] = TorontoCityVenueGrouped['Neighborhood']
for ind in np.arange(TorontoCityVenueGrouped.shape[0]):
    NeighborhoodsVenuesSorted.iloc[ind, 1:] = return_most_common_venues(TorontoCityVenueGrouped.iloc[ind, :],NumberOfTorontoCityTopVenues)
NeighborhoodsVenuesSorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Café,Restaurant,Pharmacy,Bakery,Farmers Market,Beer Bar,Seafood Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Yoga Studio,Bakery,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Garden,Brewery,Burrito Place,Spa,Farmers Market,Fast Food Restaurant,Butcher,Restaurant,Recording Studio
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Harbor / Marina,Boat or Ferry,Coffee Shop,Plane,Rental Car Location,Sculpture Garden,Boutique,Airport Terminal,Airport
4,Central Bay Street,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Japanese Restaurant,Bubble Tea Shop,Burger Joint,Thai Restaurant,Bar,Salad Place


### Cluster Neighborhoods

#### Run k-maens

In [39]:
kclusters = 4
TorontoCityGroupedClustering = TorontoCityVenueGrouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(TorontoCityGroupedClustering)
kmeans.labels_[0:1000]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

#### Create a Data Frame for the top 10 most common venues for each neighborhood

In [40]:
NeighborhoodsVenuesSorted.insert(0, 'Cluster Labels', kmeans.labels_)
TorontoCityMerged = TorontoCityData
TorontoCityMerged = TorontoCityMerged.join(NeighborhoodsVenuesSorted.set_index('Neighborhood'), on = 'Neighborhood')
TorontoCityMerged.dropna(subset = ['Cluster Labels'], inplace = True)
TorontoCityMerged['Cluster Labels'] = TorontoCityMerged['Cluster Labels'].astype(int)
TorontoCityMerged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Health Food Store,Trail,Pub,Women's Store,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Italian Restaurant,Coffee Shop,Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Liquor Store,Spa,Juice Bar
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Park,Pub,Sandwich Place,Burrito Place,Board Shop,Restaurant,Fast Food Restaurant,Fish & Chips Shop,Italian Restaurant,Steakhouse
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Brewery,Gastropub,Bakery,American Restaurant,Convenience Store,Sandwich Place,Cheese Shop,Clothing Store
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Park,Bus Line,Swim School,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


#### Visualize the resulting clusters

In [41]:
MapTorontoCityClusters = folium.Map(location=[TorontoCityLatitude, TorontoCityLongitude], zoom_start = 12)
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0,1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
for latitude, longitude, neigborhood, cluster in zip(
        TorontoCityMerged['Latitude'],
        TorontoCityMerged['Longitude'],
        TorontoCityMerged['Neighborhood'],
        TorontoCityMerged['Cluster Labels']):
    label = folium.Popup(str(neigborhood) + ' Cluster ' + str(cluster), parse_html = True)
    folium.CircleMarker(
        [latitude,longitude],
        radius = 5,
        popup = label,
        color = rainbow[cluster-1],
        fill = True,
        fill_color = rainbow[cluster-1],
        fill_opacity = 0.7).add_to(MapTorontoCityClusters)
    
MapTorontoCityClusters

### Examine Clusters

#### Cluster 1

In [42]:
TorontoCityMerged.loc[TorontoCityMerged['Cluster Labels'] == 0, TorontoCityMerged.columns[[1] + list(range(5, TorontoCityMerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,0,Health Food Store,Trail,Pub,Women's Store,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
1,East Toronto,0,Greek Restaurant,Italian Restaurant,Coffee Shop,Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Liquor Store,Spa,Juice Bar
2,East Toronto,0,Park,Pub,Sandwich Place,Burrito Place,Board Shop,Restaurant,Fast Food Restaurant,Fish & Chips Shop,Italian Restaurant,Steakhouse
3,East Toronto,0,Café,Coffee Shop,Brewery,Gastropub,Bakery,American Restaurant,Convenience Store,Sandwich Place,Cheese Shop,Clothing Store
5,Central Toronto,0,Hotel,Department Store,Gym / Fitness Center,Park,Pizza Place,Breakfast Spot,Sandwich Place,Food & Drink Shop,Dog Run,Diner
6,Central Toronto,0,Clothing Store,Sporting Goods Shop,Coffee Shop,Gym / Fitness Center,Fast Food Restaurant,Diner,Mexican Restaurant,Park,Pet Store,Chinese Restaurant
7,Central Toronto,0,Sandwich Place,Dessert Shop,Coffee Shop,Gym,Café,Italian Restaurant,Sushi Restaurant,Pizza Place,Pharmacy,Seafood Restaurant
8,Central Toronto,0,Park,Lawyer,Trail,Restaurant,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
9,Central Toronto,0,Pub,Coffee Shop,Bagel Shop,Liquor Store,Restaurant,Sports Bar,Bank,Supermarket,Sushi Restaurant,Fried Chicken Joint
11,Downtown Toronto,0,Coffee Shop,Restaurant,Café,Bakery,Pub,Pizza Place,Italian Restaurant,Playground,Pet Store,Pharmacy


#### Cluster 2

In [43]:
TorontoCityMerged.loc[TorontoCityMerged['Cluster Labels'] == 1, TorontoCityMerged.columns[[1] + list(range(5, TorontoCityMerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,1,Park,Bus Line,Swim School,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop
10,Downtown Toronto,1,Park,Playground,Trail,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


#### Cluster 3

In [44]:
TorontoCityMerged.loc[TorontoCityMerged['Cluster Labels'] == 2, TorontoCityMerged.columns[[1] + list(range(5, TorontoCityMerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,2,Garden,Home Service,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


#### Cluster 4

In [45]:
TorontoCityMerged.loc[TorontoCityMerged['Cluster Labels'] == 3, TorontoCityMerged.columns[[1] + list(range(5, TorontoCityMerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,3,Trail,Mexican Restaurant,Jewelry Store,Sushi Restaurant,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
