## Part III - Explore & Cluster 
## Segmenting and Clustering Neighborhoods in Toronto

 ###### Code to Scrape Wikipedia page

In [4]:
# Import Libraries
import pandas as pd
import numpy as np
import requests

In [5]:
# Form URL to fetch the file
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
file = requests.get(url)

In [6]:
# Webpage is extracted into a Text file
file

<Response [200]>

##### Use pandas read_html() command to scrape and parse 'HTML and XML code into Pandas Dataframe

In [7]:
file_df = pd.read_html(file.text)

##### Use the first element of the list which contains required data.

In [8]:
toronto_df = file_df[0]
print(toronto_df.head())
toronto_df.shape


  Postcode           Borough     Neighbourhood
0      M1A      Not assigned      Not assigned
1      M2A      Not assigned      Not assigned
2      M3A        North York         Parkwoods
3      M4A        North York  Victoria Village
4      M5A  Downtown Toronto      Harbourfront


(288, 3)

##### Processing cells with assigned 'Borough'. Filter out 'Not assigned'

In [9]:
toronto_df_filtered =toronto_df[toronto_df['Borough'] != 'Not assigned']

In [10]:
print (toronto_df_filtered[0:10])
toronto_df_filtered.shape

   Postcode           Borough     Neighbourhood
2       M3A        North York         Parkwoods
3       M4A        North York  Victoria Village
4       M5A  Downtown Toronto      Harbourfront
5       M5A  Downtown Toronto       Regent Park
6       M6A        North York  Lawrence Heights
7       M6A        North York    Lawrence Manor
8       M7A      Queen's Park      Not assigned
10      M9A         Etobicoke  Islington Avenue
11      M1B       Scarborough             Rouge
12      M1B       Scarborough           Malvern


(211, 3)

##### Further replace cells having value as 'Not Assigned' Neighborhood, with the same as the Borough.


In [11]:
toronto_df_neigh_replaced = toronto_df_filtered[:]
cond = toronto_df_neigh_replaced.Neighbourhood == 'Not assigned'
toronto_df_neigh_replaced.loc[cond, 'Neighbourhood'] = toronto_df_neigh_replaced.Borough
print(toronto_df_neigh_replaced[0:10])

   Postcode           Borough     Neighbourhood
2       M3A        North York         Parkwoods
3       M4A        North York  Victoria Village
4       M5A  Downtown Toronto      Harbourfront
5       M5A  Downtown Toronto       Regent Park
6       M6A        North York  Lawrence Heights
7       M6A        North York    Lawrence Manor
8       M7A      Queen's Park      Queen's Park
10      M9A         Etobicoke  Islington Avenue
11      M1B       Scarborough             Rouge
12      M1B       Scarborough           Malvern


##### Concatenate Neighbourhoods with same Postcodes and Boroughs.
##### Use Groupby method on 'Postcode' and 'Borough' and apply 'function lambda' to concatenate Neighbourhood (separated by ',')

In [12]:
toronto_final_df = toronto_df_neigh_replaced.groupby(["Postcode","Borough"])["Neighbourhood"].apply(lambda x:"%s" % ', '.join(x)).reset_index()
toronto_final_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [13]:
toronto_final_df.shape

(103, 3)

#### Use Google API or CSV file to get latitude and longitude for the latitude and the longitude coordinates of each neighborhood.

In [14]:
url1 = 'https://geocoder.readthedocs.io/index.html'
url2 ='https://cocl.us/Geospatial_data'

In [15]:
ll_file2 = requests.get(url2)
df_geo = pd.read_csv(url2)

In [16]:
# Use geodecoder to fetch  
import geocoder
for p in df_geo['Postal Code'][:5]:
     g1 = geocoder.google('{}, Toronto'.format(p))
      

#### Fetch latitude and longitude coordinates for a postal code from csv file 

In [17]:
# Fetch latitude and longitude coordinates for a postal code from csv file , store them as a list and append lists to the dataframe.
lat =[]
lng=[]

for pcode in toronto_final_df['Postcode']:
    i=0
    for i, p in enumerate(df_geo['Postal Code']):
             if pcode == p: 
                lat.append(df_geo['Latitude'][i])
                lng.append(df_geo['Longitude'][i])
                
                #print(pcode, i,p,df_geo['Latitude'][i],df_geo['Longitude'][i])

df_toronto_new= toronto_final_df.assign(Latitude=lat,Longitude=lng)
             
  

Toronto dataframe after appending with coordinates data

In [18]:
df_toronto_new.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [None]:
import json # library to handle JSON files
import geocoder

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans



In [65]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [40]:
!pip install folium
import folium # map rendering library

Collecting folium
  Downloading https://files.pythonhosted.org/packages/72/ff/004bfe344150a064e558cb2aedeaa02ecbf75e60e148a55a9198f0c41765/folium-0.10.0-py2.py3-none-any.whl (91kB)
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.0


#### Find the number of Boroughs and Neighbourhoods in the data set of Toronto

In [30]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_toronto_new['Borough'].unique()),
        df_toronto_new.shape[0])
     )

The dataframe has 11 boroughs and 103 neighborhoods.


#### Explore Neighbourhood of Toronto city. Create a map of Toronto with neighborhoods superimposed on top.

In [51]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[43.652900,-79.384900], zoom_start=10)


for lat, lng, borough, neighbourhood in zip(df_toronto_new['Latitude'], df_toronto_new['Longitude'], df_toronto_new['Borough'], df_toronto_new['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto  

**Folium** is a visualization library. Zoom into the above map, and click on each circle mark to reveal the name of the neighborhood and its respective borough.

#### Choose Borough - **'East Toronto'** .    Segment and cluster only the neighborhoods in **East Toronto**. So let's slice the original dataframe and create a new dataframe of the **East Toronto_data**.

In [129]:
eastToronto_data = df_toronto_new[df_toronto_new['Borough'] == 'East Toronto'].reset_index(drop=True)
eastToronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558


#### Lets get geograhical coordinates of  ** East Toronto **

In [126]:
address = 'East Toronto, M4M'

geolocator = Nominatim(user_agent="ny_explorer")
#location = geolocator.geocode(address)
#latitude = location.latitude
#longitude = location.longitude



latitude = 43.659526
longitude = -79.340923
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.659526, -79.340923.


#### Create map of **East Toronto** using latitude and longitude values with its Neighbourhoods

In [130]:
# create map of eastToronto using latitude and longitude values
map_eastToronto = folium.Map(location=[latitude, longitude], zoom_start=11)
# add markers to map
for lat, lng, neighbourhood in zip(eastToronto_data['Latitude'], eastToronto_data['Longitude'],eastToronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        #popup = label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_eastToronto)  
    
map_eastToronto

#### Define Foursquare Credentials and Version

In [34]:
CLIENT_ID = 'YBCPSEG3FO2VBAYYH11GH2OF0NL3YTZMKF34WRMIPL2RTHFH' # your Foursquare ID
CLIENT_SECRET = '4A3UAJ1EIUPABM4NY3TG2YE4OBQQEJ3IDEZLH31QK1SZ5GI0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YBCPSEG3FO2VBAYYH11GH2OF0NL3YTZMKF34WRMIPL2RTHFH
CLIENT_SECRET:4A3UAJ1EIUPABM4NY3TG2YE4OBQQEJ3IDEZLH31QK1SZ5GI0


#### Let's explore the neighborhood 'Studio District'  in our dataframe. Get Neighbourhood Name .   Get the Neighbourhood's latitue and Longitude

In [183]:
eastToronto_data.loc[3,'Neighbourhood']

'Studio District'

#### Get Latitude and  of the selected Neighbourhood : Studio District

In [134]:
neighbourhood_latitude = eastToronto_data.loc[3, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = eastToronto_data.loc[3, 'Longitude'] # neighborhood longitude value

neighbourhood_name = eastToronto_data.loc[3, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Studio District are 43.6595255, -79.340923.


#### Top 10 venues in 'Studio District'. Create URL for get request

In [135]:
LIMIT = 20
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT,
    )
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=YBCPSEG3FO2VBAYYH11GH2OF0NL3YTZMKF34WRMIPL2RTHFH&client_secret=4A3UAJ1EIUPABM4NY3TG2YE4OBQQEJ3IDEZLH31QK1SZ5GI0&v=20180605&ll=43.6595255,-79.340923&radius=500&limit=20'

In [136]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d993e67c53093002cd0671e'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Leslieville',
  'headerFullLocation': 'Leslieville, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 40,
  'suggestedBounds': {'ne': {'lat': 43.6640255045, 'lng': -79.33471445573701},
   'sw': {'lat': 43.6550254955, 'lng': -79.347131544263}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad7e958f964a520001021e3',
       'name': "Ed's Real Scoop",
       'location': {'address': '920 Queen St. E',
        'crossStreet': 'btwn Logan Ave. & Morse St.',
        'lat': 43.660655832455014,
        'lng': -79.3420187548006,
        'labeledLatLngs': 

In [138]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [139]:
# How many venues
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

NameError: name 'nearby_venues' is not defined

#### Repeat process for all other neighbourhoods in 'East Toronto '

In [153]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [152]:
# Run the above funtion on each neighbourhoos in East Toronto

eastToronto_venues = getNearbyVenues(names=eastToronto_data['Neighbourhood'],
                                   latitudes=eastToronto_data['Latitude'],
                                   longitudes=eastToronto_data['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Business Reply Mail Processing Centre 969 Eastern


In [156]:
# Size of dataframe
print(eastToronto_venues.shape)
eastToronto_venues.head(25)

(81, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
4,The Beaches,43.676357,-79.293031,Glen Stewart Ravine,43.6763,-79.294784,Other Great Outdoors
5,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
6,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant
7,"The Danforth West, Riverdale",43.679557,-79.352188,Dolce Gelato,43.677773,-79.351187,Ice Cream Shop
8,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop
9,"The Danforth West, Riverdale",43.679557,-79.352188,Mezes,43.677962,-79.350196,Greek Restaurant


In [155]:
#How many venues returned by each neighbourhood
eastToronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Business Reply Mail Processing Centre 969 Eastern,15,15,15,15,15,15
Studio District,20,20,20,20,20,20
The Beaches,6,6,6,6,6,6
"The Beaches West, India Bazaar",20,20,20,20,20,20
"The Danforth West, Riverdale",20,20,20,20,20,20


In [157]:
# How many unique caegories returned by venues
print('There are {} uniques categories.'.format(len(eastToronto_venues['Venue Category'].unique())))

There are 50 uniques categories.


#### Analyse each neighbourhood of East Toronto

In [161]:
# one hot encoding
eastToronto_onehot = pd.get_dummies(eastToronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add Neighbourhood column back to dataframe
eastToronto_onehot['Neighbourhood'] = eastToronto_venues['Neighbourhood'] 

# move Neighbourhood column to the first column
fixed_columns = [eastToronto_onehot.columns[-1]] + list(eastToronto_onehot.columns[:-1])
eastToronto_onehot = eastToronto_onehot[fixed_columns]

eastToronto_onehot.head(10)

Unnamed: 0,Neighbourhood,Auto Workshop,Bakery,Bookstore,Brewery,Burger Joint,Burrito Place,Café,Cheese Shop,Chinese Restaurant,...,Restaurant,Sandwich Place,Seafood Restaurant,Skate Park,Stationery Store,Steakhouse,Sushi Restaurant,Thai Restaurant,Trail,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [162]:
# Size of dataframe
eastToronto_onehot.shape

(81, 51)

#### Group categoroes by neighbourhoods by taking mean of frequncies of occurance of each category

In [166]:
eastToronto_grouped = eastToronto_onehot.groupby('Neighbourhood').mean().reset_index()
eastToronto_grouped

Unnamed: 0,Neighbourhood,Auto Workshop,Bakery,Bookstore,Brewery,Burger Joint,Burrito Place,Café,Cheese Shop,Chinese Restaurant,...,Restaurant,Sandwich Place,Seafood Restaurant,Skate Park,Stationery Store,Steakhouse,Sushi Restaurant,Thai Restaurant,Trail,Yoga Studio
0,Business Reply Mail Processing Centre 969 Eastern,0.066667,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,...,0.066667,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667
1,Studio District,0.0,0.1,0.05,0.0,0.0,0.0,0.1,0.05,0.05,...,0.0,0.05,0.05,0.0,0.05,0.0,0.0,0.05,0.0,0.0
2,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0
3,"The Beaches West, India Bazaar",0.0,0.0,0.0,0.05,0.05,0.05,0.0,0.0,0.0,...,0.0,0.05,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0
4,"The Danforth West, Riverdale",0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,...,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05


#### Print Neighbourhoods along with 5 top categories

In [167]:
num_top_venues = 5

for hood in eastToronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = eastToronto_grouped[eastToronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Business Reply Mail Processing Centre 969 Eastern----
              venue  freq
0     Auto Workshop  0.07
1    Farmers Market  0.07
2        Skate Park  0.07
3        Restaurant  0.07
4  Recording Studio  0.07


----Studio District----
              venue  freq
0            Bakery  0.10
1              Café  0.10
2       Coffee Shop  0.10
3    Ice Cream Shop  0.05
4  Stationery Store  0.05


----The Beaches----
                  venue  freq
0                   Pub  0.17
1                  Park  0.17
2          Neighborhood  0.17
3  Other Great Outdoors  0.17
4     Health Food Store  0.17


----The Beaches West, India Bazaar----
                venue  freq
0         Pizza Place  0.10
1      Ice Cream Shop  0.05
2  Light Rail Station  0.05
3  Italian Restaurant  0.05
4        Intersection  0.05


----The Danforth West, Riverdale----
                venue  freq
0    Greek Restaurant  0.30
1      Ice Cream Shop  0.10
2  Italian Restaurant  0.10
3      Cosmetics Shop  0.05
4          Res

#### Put that into pandas dataframe

In [168]:
# Define a funtion to sort venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [170]:
# Create a dataframe and display top 10 venues for each Neighbourhoods

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
Neighbourhoods_venues_sorted['Neighbourhood'] = eastToronto_grouped['Neighbourhood']

for ind in np.arange(eastToronto_grouped.shape[0]):
    Neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(eastToronto_grouped.iloc[ind, :], num_top_venues)

Neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Pizza Place,Brewery,Burrito Place,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center,Light Rail Station
1,Studio District,Coffee Shop,Café,Bakery,Fish Market,Ice Cream Shop,Italian Restaurant,Middle Eastern Restaurant,Neighborhood,Comfort Food Restaurant,Chinese Restaurant
2,The Beaches,Health Food Store,Park,Trail,Pub,Neighborhood,Other Great Outdoors,Garden,Fruit & Vegetable Store,Fish Market,Fish & Chips Shop
3,"The Beaches West, India Bazaar",Pizza Place,Fish & Chips Shop,Intersection,Ice Cream Shop,Light Rail Station,Liquor Store,Movie Theater,Gym,Park,Pet Store
4,"The Danforth West, Riverdale",Greek Restaurant,Ice Cream Shop,Italian Restaurant,Pub,Fruit & Vegetable Store,Juice Bar,Dessert Shop,Cosmetics Shop,Pizza Place,Yoga Studio


####  Cluster Neighbourhoods
#### Run K-Mean to cluster Neighbourhood in 5 clusters

In [171]:
# set number of clusters
kclusters = 5

eastToronto_grouped_clustering = eastToronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(eastToronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 4, 2, 1, 0])

In [172]:
# create a dataframe that includes clusters as well as top 10 venues

# add clustering labels
Neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

eastToronto_merged = eastToronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each Neighbourhood
eastToronto_merged = eastToronto_merged.join(Neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

eastToronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Park,Trail,Pub,Neighborhood,Other Great Outdoors,Garden,Fruit & Vegetable Store,Fish Market,Fish & Chips Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Pub,Fruit & Vegetable Store,Juice Bar,Dessert Shop,Cosmetics Shop,Pizza Place,Yoga Studio
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,1,Pizza Place,Fish & Chips Shop,Intersection,Ice Cream Shop,Light Rail Station,Liquor Store,Movie Theater,Gym,Park,Pet Store
3,M4M,East Toronto,Studio District,43.659526,-79.340923,4,Coffee Shop,Café,Bakery,Fish Market,Ice Cream Shop,Italian Restaurant,Middle Eastern Restaurant,Neighborhood,Comfort Food Restaurant,Chinese Restaurant
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3,Yoga Studio,Pizza Place,Brewery,Burrito Place,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center,Light Rail Station


#### Visualize resulting clusters

In [181]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(eastToronto_merged['Latitude'], eastToronto_merged['Longitude'], eastToronto_merged['Neighbourhood'], eastToronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examin Clusters

### Cluster 1

In [182]:
eastToronto_merged.loc[eastToronto_merged['Cluster Labels'] == 0, eastToronto_merged.columns[[1] + list(range(5, eastToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,0,Greek Restaurant,Ice Cream Shop,Italian Restaurant,Pub,Fruit & Vegetable Store,Juice Bar,Dessert Shop,Cosmetics Shop,Pizza Place,Yoga Studio


### Cluster 2

In [175]:
eastToronto_merged.loc[eastToronto_merged['Cluster Labels'] == 1, eastToronto_merged.columns[[1] + list(range(5, eastToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,East Toronto,1,Pizza Place,Fish & Chips Shop,Intersection,Ice Cream Shop,Light Rail Station,Liquor Store,Movie Theater,Gym,Park,Pet Store


### Cluster 3

In [176]:
eastToronto_merged.loc[eastToronto_merged['Cluster Labels'] == 2, eastToronto_merged.columns[[1] + list(range(5, eastToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,2,Health Food Store,Park,Trail,Pub,Neighborhood,Other Great Outdoors,Garden,Fruit & Vegetable Store,Fish Market,Fish & Chips Shop


### Cluster 4

In [177]:
eastToronto_merged.loc[eastToronto_merged['Cluster Labels'] == 3, eastToronto_merged.columns[[1] + list(range(5, eastToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,3,Yoga Studio,Pizza Place,Brewery,Burrito Place,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center,Light Rail Station


### Cluster 5

In [179]:
eastToronto_merged.loc[eastToronto_merged['Cluster Labels'] == 4, eastToronto_merged.columns[[1] + list(range(5, eastToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,East Toronto,4,Coffee Shop,Café,Bakery,Fish Market,Ice Cream Shop,Italian Restaurant,Middle Eastern Restaurant,Neighborhood,Comfort Food Restaurant,Chinese Restaurant
