# IBM Data Science Specialization Capstone Project Notebook

In [80]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

## Part 1
### Get HTML data from wikipedia

In [2]:
Canada_M = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

### Scrape PostalCode, Borough, Neighborhood from HTML using BeautifulSoup

In [3]:
# soup = BeautifulSoup(open("Canada_M.htm"), "lxml")
soup = BeautifulSoup(Canada_M, "lxml")

In [4]:
PostalCode = []
Borough = []
Neighborhood = []

for data in soup.tbody.find_all('tr')[1:]:
    PostalCode.append(data.find('td').text)
    Borough.append(data.find_all('td')[1].text)
    Neighborhood.append(data.find_all('td')[2].text[:-1])

In [5]:
data = {'PostalCode': PostalCode,
        'Borough': Borough,
        'Neighborhood': Neighborhood}
df = pd.DataFrame(data)
# Drop rows where Borough is 'Not assigned', then reset index
df = df[df.Borough != 'Not assigned'].reset_index().drop('index', axis = 1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Combine Neighborhood with same PostalCode

In [6]:
i = 1
while(i < len(df)):
    if df['PostalCode'].iloc[i] == df['PostalCode'].iloc[i - 1]:
        df.at[i - 1, 'Neighborhood'] = df.Neighborhood.iloc[i - 1] +', ' + df.Neighborhood.iloc[i]
        df.drop(index = i, inplace = True)
        df = df.reset_index().drop('index', axis = 1)
    else:
        i += 1

In [7]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Not assigned


### Get the number of rows in the datafrome which is 103

In [8]:
df.shape

(103, 3)

## Part 2

### Achieve Latitude and Longitude of each PostalCode

In [9]:
# import geocoder # import geocoder

# # initialize your variable to None
# lat_lng_coords = None

# # loop until you get the coordinates
# while(lat_lng_coords is None):
#     g = geocoder.google('{}, Toronto, Ontario'.format('PGQ7+MV'))
#     lat_lng_coords = g.latlng
#     print('1')

# latitude = lat_lng_coords[0]
# longitude = lat_lng_coords[1]


In [10]:
# Rename the 'Postal Code' column
Geo = pd.read_csv("Geospatial_Coordinates.csv") 
Geo.rename({'Postal Code': 'PostalCode'}, axis='columns', inplace=True)

In [11]:
# Merge Latitude and longitude to original dataframe
df = pd.merge(df, Geo, on='PostalCode')
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Not assigned,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


## Part 3

### Create a map of Toronto with neighborhoods superimposed on top.

In [31]:
import folium
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[43.706204, -79.398752], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#e182f2',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
# Show map Toronto with borough and neighborhood 
map_Toronto

#### Define Foursquare Credentials and Version

In [29]:
CLIENT_ID = 'PESQIFIZTNDBTMZPSPLZXDCQVGVQT50IE3K1RP3QCQALSDCV' # your Foursquare ID
CLIENT_SECRET = 'ODUYPM1K0ZG1Z0LZTM4GE5IRA0KSMBARQMVDJWAD0HC04UBW' # your Foursquare Secret
VERSION = '20181018' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PESQIFIZTNDBTMZPSPLZXDCQVGVQT50IE3K1RP3QCQALSDCV
CLIENT_SECRET:ODUYPM1K0ZG1Z0LZTM4GE5IRA0KSMBARQMVDJWAD0HC04UBW


In [116]:
df[df.Neighborhood == 'Upper Rouge']

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
95,M1X,Scarborough,Upper Rouge,43.836125,-79.205636


In [117]:
neighborhood_latitude = df.loc[95, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[95, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[95, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Upper Rouge are 43.836124700000006, -79.20563609999999.


### Now, let's get the top 100 venues that are in Parkwoods within a radius of 500 meters.

In [126]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 2000# define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=PESQIFIZTNDBTMZPSPLZXDCQVGVQT50IE3K1RP3QCQALSDCV&client_secret=ODUYPM1K0ZG1Z0LZTM4GE5IRA0KSMBARQMVDJWAD0HC04UBW&v=20181018&ll=43.836124700000006,-79.20563609999999&radius=2000&limit=100'

In [127]:
results = requests.get(url).json()

In [128]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [129]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Pumpkinland (Whittamore Farms),Farm,43.849236,-79.206647
1,Clownfish Village,Sculpture Garden,43.821,-79.205396
2,Taj supermarket,Grocery Store,43.818746,-79.210172
3,Cedar Brae Golf and Country Club,Golf Course,43.839154,-79.230089


And how many venues were returned by Foursquare?

In [66]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

13 venues were returned by Foursquare.


## 2. Explore Neighborhoods in Toronto

#### Let's create a function to repeat the same process to all the neighborhoods in Toronto

In [78]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    print("Finished!")
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called *Toronto_venues*.

In [130]:
# type your answer here

Toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                 latitudes=df['Latitude'],
                                 longitudes=df['Longitude'],
                                 radius = 2000
                                )



Finished!


#### Let's check the size of the resulting dataframe

In [131]:
print(Toronto_venues.shape)
Toronto_venues.head()

(8511, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Donalda Golf & Country Club,43.752816,-79.342741,Golf Course
2,Parkwoods,43.753259,-79.329656,Darband Restaurant,43.755194,-79.348498,Middle Eastern Restaurant
3,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
4,Parkwoods,43.753259,-79.329656,LA Fitness,43.747665,-79.347077,Gym / Fitness Center


In [132]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,100,100,100,100,100,100
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",100,100,100,100,100,100
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",46,46,46,46,46,46
"Alderwood, Long Branch",100,100,100,100,100,100
"Bathurst Manor, Downsview North, Wilson Heights",55,55,55,55,55,55
Bayview Village,47,47,47,47,47,47
"Bedford Park, Lawrence Manor East",100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100
"Birch Cliff, Cliffside West",44,44,44,44,44,44


#### Let's find out how many unique categories can be curated from all the returned venues

In [133]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 323 uniques categories.


<a id='item3'></a>

## 3. Analyze Each Neighborhood

In [134]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Zoo Exhibit,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [135]:
Toronto_onehot.shape

(8511, 323)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [136]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped.head()

Unnamed: 0,Neighborhood,Zoo Exhibit,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,...,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0


#### Let's confirm the new size

In [137]:
Toronto_grouped.shape

(103, 323)

#### Let's print each neighborhood along with the top 5 most common venues

In [138]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.06
1        Hotel  0.04
2         Café  0.04
3      Theater  0.04
4    Gastropub  0.03


----Agincourt----
                venue  freq
0  Chinese Restaurant  0.12
1         Coffee Shop  0.07
2            Pharmacy  0.04
3              Bakery  0.04
4   Indian Restaurant  0.03


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                venue  freq
0  Chinese Restaurant  0.19
1         Coffee Shop  0.07
2     Bubble Tea Shop  0.04
3              Bakery  0.04
4                Park  0.04


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0           Coffee Shop  0.17
1  Fast Food Restaurant  0.11
2     Indian Restaurant  0.09
3           Pizza Place  0.09
4         Grocery Store  0.07


----Alderwood, Long Branch----
                    venue  freq
0             Coffee Shop  0.11
1    Fast Food Restau

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [139]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [140]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Hotel,Café,Theater,Italian Restaurant,Pizza Place,Concert Hall,Thai Restaurant,Gastropub,Burrito Place
1,Agincourt,Chinese Restaurant,Coffee Shop,Bakery,Pharmacy,Japanese Restaurant,Indian Restaurant,Restaurant,Sandwich Place,Cantonese Restaurant,Caribbean Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Chinese Restaurant,Coffee Shop,Park,Pizza Place,Bubble Tea Shop,Bakery,Cantonese Restaurant,Vietnamese Restaurant,Dessert Shop,BBQ Joint
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Coffee Shop,Fast Food Restaurant,Pizza Place,Indian Restaurant,Grocery Store,Park,Italian Restaurant,Fried Chicken Joint,Beer Store,Sandwich Place
4,"Alderwood, Long Branch",Coffee Shop,Pizza Place,Fast Food Restaurant,Department Store,Seafood Restaurant,Café,Pharmacy,Clothing Store,Sandwich Place,Furniture / Home Store


<a id='item4'></a>

## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [145]:
# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:] 

array([0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 4, 4, 1, 4, 4, 1, 1, 1, 0, 4, 4,
       0, 1, 1, 1, 1, 0, 4, 4, 4, 1, 0, 1, 1, 4, 1, 1, 1, 1, 4, 1, 1, 0,
       1, 4, 1, 1, 4, 0, 4, 4, 1, 1, 4, 4, 1, 4, 1, 1, 1, 1, 1, 4, 1, 4,
       1, 1, 4, 1, 4, 1, 1, 0, 4, 1, 4, 4, 3, 4, 0, 1, 1, 0, 0, 4, 4, 4,
       4, 4, 4, 4, 1, 2, 1, 1, 1, 0, 1, 1, 1, 4, 1], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [146]:
Toronto_merged = df

# add clustering labels
Toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Chinese Restaurant,Supermarket,Intersection,Sandwich Place,Gym,Café
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Coffee Shop,Fast Food Restaurant,Gym,Grocery Store,Clothing Store,Sandwich Place,Japanese Restaurant,Middle Eastern Restaurant,Beer Store,Mediterranean Restaurant
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,1,Coffee Shop,Café,Park,Italian Restaurant,Restaurant,Pub,Gastropub,Bakery,Japanese Restaurant,Farmers Market
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,1,Clothing Store,Coffee Shop,Vietnamese Restaurant,Fast Food Restaurant,Furniture / Home Store,Grocery Store,Cosmetics Shop,Fried Chicken Joint,Liquor Store,Sandwich Place
4,M7A,Queen's Park,Not assigned,43.662301,-79.389494,1,Café,Coffee Shop,Japanese Restaurant,Park,Mexican Restaurant,Vegetarian / Vegan Restaurant,Pizza Place,Concert Hall,Bookstore,Sushi Restaurant


Finally, let's visualize the resulting clusters

In [149]:
# create map
map_clusters = folium.Map(location=[43.706204, -79.398752], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<a id='item5'></a>

## 5. Examine Clusters

Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1

In [150]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Chinese Restaurant,Supermarket,Intersection,Sandwich Place,Gym,Café
8,East York,0,Pizza Place,Coffee Shop,Park,Sandwich Place,Pharmacy,Beer Store,Bakery,Athletics & Sports,Restaurant,Grocery Store
19,East Toronto,0,Coffee Shop,Pub,Café,Bar,Bakery,Breakfast Spot,Beach,Ice Cream Shop,Thai Restaurant,Sandwich Place
22,Scarborough,0,Coffee Shop,Fast Food Restaurant,Pharmacy,Sandwich Place,Furniture / Home Store,Supermarket,Chinese Restaurant,Discount Store,Pizza Place,Beer Store
27,North York,0,Coffee Shop,Japanese Restaurant,Bank,Chinese Restaurant,Sandwich Place,Pizza Place,Sushi Restaurant,Pharmacy,Grocery Store,Asian Restaurant
32,Scarborough,0,Fast Food Restaurant,Coffee Shop,Pizza Place,Sandwich Place,Discount Store,Pharmacy,Grocery Store,Chinese Restaurant,Beer Store,Ice Cream Shop
43,West Toronto,0,Café,Coffee Shop,Restaurant,Bar,Asian Restaurant,Vegetarian / Vegan Restaurant,Bakery,Mexican Restaurant,French Restaurant,Tea Room
49,North York,0,Coffee Shop,Vietnamese Restaurant,Fast Food Restaurant,Pizza Place,Furniture / Home Store,Sandwich Place,Supermarket,Bank,Grocery Store,Clothing Store
73,Central Toronto,0,Coffee Shop,Italian Restaurant,Café,Sushi Restaurant,Bakery,Fast Food Restaurant,Burger Joint,Japanese Restaurant,Indian Restaurant,Deli / Bodega
80,Downtown Toronto,0,Café,Coffee Shop,Park,Bar,Sandwich Place,Pizza Place,Vegetarian / Vegan Restaurant,Italian Restaurant,Mexican Restaurant,Ramen Restaurant


#### Cluster 2

In [151]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,1,Coffee Shop,Fast Food Restaurant,Gym,Grocery Store,Clothing Store,Sandwich Place,Japanese Restaurant,Middle Eastern Restaurant,Beer Store,Mediterranean Restaurant
2,Downtown Toronto,1,Coffee Shop,Café,Park,Italian Restaurant,Restaurant,Pub,Gastropub,Bakery,Japanese Restaurant,Farmers Market
3,North York,1,Clothing Store,Coffee Shop,Vietnamese Restaurant,Fast Food Restaurant,Furniture / Home Store,Grocery Store,Cosmetics Shop,Fried Chicken Joint,Liquor Store,Sandwich Place
4,Queen's Park,1,Café,Coffee Shop,Japanese Restaurant,Park,Mexican Restaurant,Vegetarian / Vegan Restaurant,Pizza Place,Concert Hall,Bookstore,Sushi Restaurant
5,Etobicoke,1,Grocery Store,Pharmacy,Coffee Shop,Bank,Liquor Store,Park,Café,Shopping Mall,Bus Line,Golf Course
6,Scarborough,1,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Athletics & Sports,Bus Station,Zoo,Mediterranean Restaurant,Grocery Store,Spa,Liquor Store
7,North York,1,Coffee Shop,Japanese Restaurant,Park,Supermarket,Restaurant,Bank,Pizza Place,Burger Joint,Italian Restaurant,Café
9,Downtown Toronto,1,Gastropub,Café,Coffee Shop,Thai Restaurant,Japanese Restaurant,Italian Restaurant,Pizza Place,Diner,Ramen Restaurant,Sushi Restaurant
10,North York,1,Clothing Store,Coffee Shop,Furniture / Home Store,Grocery Store,Bakery,Fried Chicken Joint,Cosmetics Shop,Discount Store,Japanese Restaurant,Bank
13,North York,1,Coffee Shop,Park,Japanese Restaurant,Sandwich Place,Middle Eastern Restaurant,Restaurant,Gym,Grocery Store,Turkish Restaurant,Indian Restaurant


#### Cluster 3

In [152]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
93,Etobicoke,2,Coffee Shop,Pizza Place,Fast Food Restaurant,Department Store,Seafood Restaurant,Café,Pharmacy,Clothing Store,Sandwich Place,Furniture / Home Store


#### Cluster 4

In [153]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
78,Scarborough,3,Chinese Restaurant,Coffee Shop,Bakery,Pharmacy,Japanese Restaurant,Indian Restaurant,Restaurant,Sandwich Place,Cantonese Restaurant,Caribbean Restaurant


#### Cluster 5

In [154]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,4,Coffee Shop,Convenience Store,Pizza Place,Bank,Grocery Store,Park,Fish & Chips Shop,Sandwich Place,Sushi Restaurant,Pharmacy
12,Scarborough,4,Coffee Shop,Sandwich Place,Breakfast Spot,Bank,Fish & Chips Shop,Fried Chicken Joint,Beer Store,Food & Drink Shop,Discount Store,Bar
14,East York,4,Café,Park,Coffee Shop,Pizza Place,Thai Restaurant,Gastropub,Ice Cream Shop,Skating Rink,Bakery,Sandwich Place
15,Downtown Toronto,4,Café,Coffee Shop,Hotel,Gastropub,Thai Restaurant,Concert Hall,Italian Restaurant,Steakhouse,Farmers Market,Japanese Restaurant
20,Downtown Toronto,4,Café,Italian Restaurant,Coffee Shop,Gastropub,Hotel,Farmers Market,American Restaurant,Japanese Restaurant,Restaurant,Steakhouse
21,York,4,Italian Restaurant,Coffee Shop,Bakery,Sandwich Place,Furniture / Home Store,Pizza Place,Bank,Mexican Restaurant,Breakfast Spot,BBQ Joint
28,North York,4,Coffee Shop,Pharmacy,Pizza Place,Sandwich Place,Park,Café,Deli / Bodega,Restaurant,Bank,Convenience Store
29,East York,4,Coffee Shop,Sandwich Place,Park,Bakery,Grocery Store,Restaurant,Pizza Place,Japanese Restaurant,Greek Restaurant,Pharmacy
30,Downtown Toronto,4,Coffee Shop,Hotel,Café,Theater,Italian Restaurant,Pizza Place,Concert Hall,Thai Restaurant,Gastropub,Burrito Place
35,East York,4,Greek Restaurant,Café,Bakery,Coffee Shop,Indian Restaurant,Burger Joint,Park,Pizza Place,Gastropub,Sandwich Place
