# Task 3

###  We going to expore and cluster the neighborhoods/borough in Toronto

### Also we will create *visualisation* of data and clusters




### _import all needed libraries.._

In [116]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Getting Data from Internet¶


In [117]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
Data_url = requests.get(url).text
Data=BeautifulSoup(Data_url, 'xml')

### Creating of new DataFrame

In [118]:
table=Data.find('table')
columns_names = ['Postal Code','Borough','Neighborhood']
df_toronto=pd.DataFrame(columns = columns_names)
table_rows = table.find_all('tr')

for tr in table_rows:
    row_data=[]
    for td in tr.find_all('td'):
        row_data.append(td.text.strip())
    if len(row_data)==3:
        df_toronto.loc[len(df_toronto)] = row_data
        
df_toronto=df_toronto[df_toronto['Borough']!='Not assigned']
df_toronto=df_toronto.reset_index(drop=True)
df_toronto['Neighborhood']=df_toronto['Neighborhood'].str.replace('/',',')

In [119]:
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [120]:
df_toronto.shape

(103, 3)

### Downloading of coordinates from cocl.us and creating new data frame with coordinates

In [121]:
coordinates=pd.read_csv("http://cocl.us/Geospatial_data")
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merging of Data frames 

In [122]:
df_merge = pd.merge(df_toronto,coordinates, on = 'Postal Code')

In [123]:

df_merge.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [124]:
df_merge.shape

(103, 5)

### Importing of additional libraries

In [125]:
import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim 

Solving environment: done

# All requested packages already installed.



In [126]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Use library to get the latitude and longitude values of New York City

In [127]:
address = 'Toronto'

geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Creating Map of Toronto using latitude and longitude values

In [128]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

In [129]:
for lat, lng, borough, neighborhood in zip(df_merge['Latitude'], df_merge['Longitude'], df_merge['Borough'], df_merge['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Now we are going to use  Foursquare API for  exploring  the neighborhoods/boroughs
### Also we will create segmentation of results



#### *lets to analyse only one segment - East York and create data frame for it*

In [130]:
eastyork_data = df_merge[df_merge['Borough'] == 'East York'].reset_index(drop=True)
eastyork_data.head()
address_ey = 'East York, Toronto'

geolocator_ey = Nominatim(user_agent="explorer")
location_ey = geolocator.geocode(address_ey)
latitude_ey = location_ey.latitude
longitude_ey = location_ey.longitude
print('The geograpical coordinate of East York are {}, {}.'.format(latitude_ey, longitude_ey))

The geograpical coordinate of East York are 43.699971000000005, -79.33251996261595.


### Visualising...


In [131]:
map_eastyork = folium.Map(location=[latitude_ey, longitude_ey], zoom_start=11)

for lat, lng, label in zip(eastyork_data['Latitude'], eastyork_data['Longitude'], eastyork_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_eastyork)  
    
map_eastyork

#### Let's explore the first neighborhood in our dataframe

In [132]:
eastyork_data

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937
1,M4C,East York,Woodbine Heights,43.695344,-79.318389
2,M4G,East York,Leaside,43.70906,-79.363452
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372
4,M4J,East York,East Toronto,43.685347,-79.338106


In [133]:
eastyork_data.loc[1, 'Neighborhood']

'Woodbine Heights'

In [134]:
neighborhood_latitude = eastyork_data.loc[1, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = eastyork_data.loc[1, 'Longitude'] # neighborhood longitude value

neighborhood_name = eastyork_data.loc[1, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Woodbine Heights are 43.695343900000005, -79.3183887.


#### Define Foursquare Credentials and Version get the top 100 venues that within a radius of 500 meters.

In [141]:
# The code was removed by Watson Studio for sharing.

In [142]:

VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit= {}'.format(CLIENT_ID, CLIENT_SECRET, latitude_ey, longitude_ey, VERSION, radius, LIMIT)

##### Send the GET request and examine the resutls

In [144]:
results = requests.get(url).json()


In [145]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a  dataframe.

In [146]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Mon K Patisserie,Pastry Shop,43.696922,-79.32952
1,LCBO,Liquor Store,43.696728,-79.328875
2,Remarks Bar & Grill,Pub,43.696726,-79.329219
3,Pizza Hut,Pizza Place,43.696383,-79.328778
4,Taylor Creek Park - West,Park,43.70126,-79.331726
5,Cullen Bryant Park,Park,43.700417,-79.32823
6,Kouzina,Greek Restaurant,43.697407,-79.329175
7,Old East York Village,Plaza,43.696855,-79.329302


In [147]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

8 venues were returned by Foursquare.


## Exploring all Neighborhoods in East York

In [148]:
 
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

eastyork_venues = getNearbyVenues(names=eastyork_data['Neighborhood'],
                                   latitudes=eastyork_data['Latitude'],
                                   longitudes=eastyork_data['Longitude']
                                  )


Parkview Hill , Woodbine Gardens
Woodbine Heights
Leaside
Thorncliffe Park
East Toronto


In [59]:
eastyork_venues.head(20)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,Jawny Bakers,43.705783,-79.312913,Gastropub
1,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,East York Gymnastics,43.710654,-79.309279,Gym / Fitness Center
2,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,Shoppers Drug Mart,43.705933,-79.312825,Pharmacy
3,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,TD Canada Trust,43.70574,-79.31227,Bank
4,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,Pizza Pizza,43.705159,-79.31313,Pizza Place
5,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,East York Animal Clinic,43.705921,-79.312196,Pet Store
6,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,St. Clair Ave E & O'Connor Dr,43.705233,-79.313274,Intersection
7,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,Venice Pizza,43.705921,-79.313957,Pizza Place
8,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,91 Woodbine Bus (south),43.707646,-79.313808,Bus Line
9,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,Harvey's,43.708136,-79.314105,Fast Food Restaurant


In [149]:
eastyork_venues.shape

(82, 7)

#### Let's check how many venues were returned for each neighborhood

In [150]:
eastyork_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
East Toronto,4,4,4,4,4,4
Leaside,34,34,34,34,34,34
"Parkview Hill , Woodbine Gardens",11,11,11,11,11,11
Thorncliffe Park,22,22,22,22,22,22
Woodbine Heights,11,11,11,11,11,11


In [151]:
print('There are {} uniques categories of venue.'.format(len(eastyork_venues['Venue Category'].unique())))

There are 48 uniques categories of venue.


## Analyze Each Neighborhood

In [152]:
# one hot encoding
eastyork_onehot = pd.get_dummies(eastyork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
eastyork_onehot['Neighborhood'] = eastyork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [eastyork_onehot.columns[-1]] + list(eastyork_onehot.columns[:-1])
eastyork_onehot = eastyork_onehot[fixed_columns]

eastyork_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Athletics & Sports,Bagel Shop,Bank,Beer Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Coffee Shop,Convenience Store,Cosmetics Shop,Curling Ice,Dance Studio,Department Store,Dessert Shop,Discount Store,Electronics Store,Fast Food Restaurant,Fish & Chips Shop,Furniture / Home Store,Gas Station,Gastropub,Grocery Store,Gym,Gym / Fitness Center,Housing Development,Indian Restaurant,Intersection,Juice Bar,Liquor Store,Mexican Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Shopping Mall,Skating Rink,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Video Store,Warehouse Store,Yoga Studio
0,"Parkview Hill , Woodbine Gardens",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Parkview Hill , Woodbine Gardens",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Parkview Hill , Woodbine Gardens",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,"Parkview Hill , Woodbine Gardens",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Parkview Hill , Woodbine Gardens",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [153]:
eastyork_onehot.shape

(82, 49)

### Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [154]:
eastyork_grouped = eastyork_onehot.groupby('Neighborhood').mean().reset_index()
eastyork_grouped

Unnamed: 0,Neighborhood,Asian Restaurant,Athletics & Sports,Bagel Shop,Bank,Beer Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Coffee Shop,Convenience Store,Cosmetics Shop,Curling Ice,Dance Studio,Department Store,Dessert Shop,Discount Store,Electronics Store,Fast Food Restaurant,Fish & Chips Shop,Furniture / Home Store,Gas Station,Gastropub,Grocery Store,Gym,Gym / Fitness Center,Housing Development,Indian Restaurant,Intersection,Juice Bar,Liquor Store,Mexican Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Shopping Mall,Skating Rink,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Video Store,Warehouse Store,Yoga Studio
0,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Leaside,0.0,0.0,0.029412,0.058824,0.029412,0.029412,0.029412,0.029412,0.058824,0.0,0.088235,0.0,0.0,0.0,0.0,0.029412,0.029412,0.0,0.029412,0.0,0.029412,0.058824,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.029412,0.029412,0.029412,0.0,0.029412,0.0,0.0,0.029412,0.029412,0.029412,0.0,0.088235,0.029412,0.029412,0.058824,0.0,0.0,0.0
2,"Parkview Hill , Woodbine Gardens",0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.090909,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Thorncliffe Park,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.045455,0.045455,0.090909,0.045455,0.0,0.045455,0.0,0.045455,0.0,0.045455,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.045455
4,Woodbine Heights,0.090909,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.090909,0.0,0.0


In [155]:
eastyork_grouped.shape

(5, 49)

#### Let's print each neighborhood along with the top 10 most common venues

In [156]:
num_top_venues = 10

for hood in eastyork_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = eastyork_grouped[eastyork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----East Toronto----
                 venue  freq
0                 Park  0.50
1          Coffee Shop  0.25
2    Convenience Store  0.25
3     Asian Restaurant  0.00
4          Pizza Place  0.00
5  Housing Development  0.00
6    Indian Restaurant  0.00
7         Intersection  0.00
8            Juice Bar  0.00
9         Liquor Store  0.00


----Leaside----
                    venue  freq
0             Coffee Shop  0.09
1     Sporting Goods Shop  0.09
2                    Bank  0.06
3            Burger Joint  0.06
4  Furniture / Home Store  0.06
5        Sushi Restaurant  0.06
6           Grocery Store  0.03
7      Mexican Restaurant  0.03
8               Juice Bar  0.03
9               Pet Store  0.03


----Parkview Hill , Woodbine Gardens----
                  venue  freq
0           Pizza Place  0.18
1  Gym / Fitness Center  0.09
2              Pharmacy  0.09
3                  Bank  0.09
4    Athletics & Sports  0.09
5             Gastropub  0.09
6          Intersection  0.09
7      

### Take it to DataFrame..

In [157]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Lets Display the top 10 venues for each neighborhood.

In [158]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = eastyork_grouped['Neighborhood']

for ind in np.arange(eastyork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(eastyork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,Park,Convenience Store,Coffee Shop,Cosmetics Shop,Fish & Chips Shop,Fast Food Restaurant,Electronics Store,Discount Store,Dessert Shop,Department Store
1,Leaside,Sporting Goods Shop,Coffee Shop,Sushi Restaurant,Furniture / Home Store,Bank,Burger Joint,Liquor Store,Juice Bar,Dessert Shop,Electronics Store
2,"Parkview Hill , Woodbine Gardens",Pizza Place,Gastropub,Bus Line,Gym / Fitness Center,Intersection,Pharmacy,Pet Store,Fast Food Restaurant,Bank,Athletics & Sports
3,Thorncliffe Park,Indian Restaurant,Yoga Studio,Park,Discount Store,Fast Food Restaurant,Gas Station,Warehouse Store,Grocery Store,Gym,Gym / Fitness Center
4,Woodbine Heights,Skating Rink,Asian Restaurant,Video Store,Athletics & Sports,Beer Store,Dance Studio,Pharmacy,Curling Ice,Park,Cosmetics Shop


 ## Clustering Neighborhoods

### Run *k*-means to cluster the neighborhood into 3 clusters.

In [159]:
kclusters = 3

eastyork_grouped_clustering = eastyork_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(eastyork_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 1, 1, 0], dtype=int32)

Lets create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [160]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

eastyork_merged = eastyork_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
eastyork_merged = eastyork_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

eastyork_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937,1,Pizza Place,Gastropub,Bus Line,Gym / Fitness Center,Intersection,Pharmacy,Pet Store,Fast Food Restaurant,Bank,Athletics & Sports
1,M4C,East York,Woodbine Heights,43.695344,-79.318389,0,Skating Rink,Asian Restaurant,Video Store,Athletics & Sports,Beer Store,Dance Studio,Pharmacy,Curling Ice,Park,Cosmetics Shop
2,M4G,East York,Leaside,43.70906,-79.363452,1,Sporting Goods Shop,Coffee Shop,Sushi Restaurant,Furniture / Home Store,Bank,Burger Joint,Liquor Store,Juice Bar,Dessert Shop,Electronics Store
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372,1,Indian Restaurant,Yoga Studio,Park,Discount Store,Fast Food Restaurant,Gas Station,Warehouse Store,Grocery Store,Gym,Gym / Fitness Center
4,M4J,East York,East Toronto,43.685347,-79.338106,2,Park,Convenience Store,Coffee Shop,Cosmetics Shop,Fish & Chips Shop,Fast Food Restaurant,Electronics Store,Discount Store,Dessert Shop,Department Store


### And we ready to visualize it..

In [161]:
# create map
map_clusters = folium.Map(location=[latitude_ey, longitude_ey], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(eastyork_merged['Latitude'], eastyork_merged['Longitude'], eastyork_merged['Neighborhood'], eastyork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

 ## Examine Clusters
 
 ### determination of the discriminating venues categories for each cluster

### Cluster 1

In [163]:
eastyork_merged.loc[eastyork_merged['Cluster Labels'] == 0, eastyork_merged.columns[[1] + list(range(5, eastyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East York,0,Skating Rink,Asian Restaurant,Video Store,Athletics & Sports,Beer Store,Dance Studio,Pharmacy,Curling Ice,Park,Cosmetics Shop


### Cluster 2

In [165]:
eastyork_merged.loc[eastyork_merged['Cluster Labels'] == 1, eastyork_merged.columns[[1] + list(range(5, eastyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East York,1,Pizza Place,Gastropub,Bus Line,Gym / Fitness Center,Intersection,Pharmacy,Pet Store,Fast Food Restaurant,Bank,Athletics & Sports
2,East York,1,Sporting Goods Shop,Coffee Shop,Sushi Restaurant,Furniture / Home Store,Bank,Burger Joint,Liquor Store,Juice Bar,Dessert Shop,Electronics Store
3,East York,1,Indian Restaurant,Yoga Studio,Park,Discount Store,Fast Food Restaurant,Gas Station,Warehouse Store,Grocery Store,Gym,Gym / Fitness Center


### Cluster 3

In [166]:
eastyork_merged.loc[eastyork_merged['Cluster Labels'] == 2, eastyork_merged.columns[[1] + list(range(5, eastyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East York,2,Park,Convenience Store,Coffee Shop,Cosmetics Shop,Fish & Chips Shop,Fast Food Restaurant,Electronics Store,Discount Store,Dessert Shop,Department Store


### Thanks!