In [71]:
import pandas as pd # library for data analsysis
from bs4 import BeautifulSoup # library for pulling data out of HTML and XML files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Libraries imported.


In [72]:
# Upload the Wikipedia page
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')

In [73]:
# Find the Table on the Wikipedia page and iterate through tags for required information
table_can = soup.find('table')
col = table_can.find_all('td')

elem_cnt = len(col)

# create three lists to store table data
postcode = []
borough = []
neighborhood = []

for i in range(0, elem_cnt, 3):
    postcode.append(col[i].text.strip())
    borough.append(col[i+1].text.strip())
    neighborhood.append(col[i+2].text.strip())

In [74]:
# create a new DataFrame from our lists
toronto_df = pd.DataFrame({"PostalCode": postcode,
                           "Borough": borough,
                           "Neighborhood": neighborhood})

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [75]:
# Normalize data and transform per given requirements
toronto_df_dropna = toronto_df[toronto_df.Borough != "Not assigned"].reset_index(drop=True)
toronto_df.loc[toronto_df.Neighborhood == 'Not assigned', "Neighborhood"] = toronto_df.Borough

#Group data by Postcode & Borough
toronto_df_grouped = toronto_df_dropna.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))

# for Neighborhood="Not assigned", make the value the same as Borough
for index, row in toronto_df_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]

In [76]:
# create a new dataframe to check whether we met the assignement's requirements
column_names = ["PostalCode", "Borough", "Neighborhood"]
neighborhoods_df = pd.DataFrame(columns=column_names)

neighborhoods_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in neighborhoods_list:
    neighborhoods_df = neighborhoods_df.append(toronto_df_grouped[toronto_df_grouped["PostalCode"]==postcode], ignore_index=True)
    
neighborhoods_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Woodbine Gardens, Parkview Hill"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Maryvale, Wexford"
7,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo..."


In [77]:
# print the number of rows of the cleaned dataframe
toronto_df_grouped.shape

(103, 3)

In [78]:
# load coordinates from the csv file 
coordinates = pd.read_csv("http://cocl.us/Geospatial_data")
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [79]:
# rename the column "PostalCode"
coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)

# merge two table on the column "PostalCode" 
toronto_df_new = toronto_df_grouped.merge(coordinates, on="PostalCode", how="left")
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [80]:
# create a new dataframe to check whether we met the assignement's requirements
column_names = ["PostalCode", "Borough", "Neighborhood", "Latitude", "Longitude"]
neighborhoods_coord_df = pd.DataFrame(columns=column_names)

neighborhoods_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in neighborhoods_list:
    neighborhoods_coord_df = neighborhoods_coord_df.append(toronto_df_new[toronto_df_new["PostalCode"]==postcode], ignore_index=True)
    
neighborhoods_coord_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452
5,M4M,East Toronto,Studio District,43.659526,-79.340923
6,M1R,Scarborough,"Maryvale, Wexford",43.750072,-79.295849
7,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
8,M9L,North York,Humber Summit,43.756303,-79.565963
9,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442


# Use geopy library to get the latitude and longitude values of Toronto

In [81]:
address = 'Toronto, Ca'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Create a map of Toronto with neighborhoods superimposed on top

In [82]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods_coord_df['Latitude'], neighborhoods_coord_df['Longitude'], neighborhoods_coord_df['Borough'], neighborhoods_coord_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

# Define Foursquare Credentials and Version

In [83]:
CLIENT_ID = 'BUOE05Z45WXQOSHK4RUW5GRA3Q35U5IQVU4SV1F22SRFGD5M' # your Foursquare ID
CLIENT_SECRET = 'Q0UDIIJKW3PIPRRD5ZDX0LL3HUVVXWVNORJPCIPWVMSE1WGB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: BUOE05Z45WXQOSHK4RUW5GRA3Q35U5IQVU4SV1F22SRFGD5M
CLIENT_SECRET:Q0UDIIJKW3PIPRRD5ZDX0LL3HUVVXWVNORJPCIPWVMSE1WGB


# Let's explore the first neighborhood in our dataframe.

Get the neighborhood's name.

In [84]:
neighborhoods_coord_df.loc[0, 'Neighborhood']

'Central Bay Street'

Get the neighborhood's latitude and longitude values.

In [85]:

neighborhood_latitude = neighborhoods_coord_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods_coord_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods_coord_df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Central Bay Street are 43.6579524, -79.3873826.


# Explore Neighborhoods in Toronto

### Let's explore all the neighborhoods in Toronto (let's get the top 50 venues that are in each Neighborhood within a radius of 500 meters)

In [86]:
# Set the limit of venues
LIMIT = 50 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# A function to explore the neighborhoods in Toronto
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


### Now let's write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues.

In [87]:
toronto_venues = getNearbyVenues(names=neighborhoods_coord_df['Neighborhood'],
                                   latitudes=neighborhoods_coord_df['Latitude'],
                                   longitudes=neighborhoods_coord_df['Longitude']
                                  )

Central Bay Street
Hillcrest Village
Woodbine Gardens, Parkview Hill
Scarborough Village
Leaside
Studio District
Maryvale, Wexford
Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown
Humber Summit
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rouge, Malvern
Harbourfront, Regent Park


### Let's check the size of the resulting dataframe

In [88]:
print(toronto_venues.shape)
toronto_venues.head()

(222, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central Bay Street,43.657952,-79.387383,Jimmy's Coffee,43.658421,-79.385613,Coffee Shop
1,Central Bay Street,43.657952,-79.387383,Tim Hortons,43.65857,-79.385123,Coffee Shop
2,Central Bay Street,43.657952,-79.387383,Hailed Coffee,43.658833,-79.383684,Coffee Shop
3,Central Bay Street,43.657952,-79.387383,Mercatto,43.660391,-79.387664,Italian Restaurant
4,Central Bay Street,43.657952,-79.387383,The Queen and Beaver Public House,43.657472,-79.383524,Gastropub


Let's check how many venues were returned for each neighborhood

In [89]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",8,8,8,8,8,8
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",15,15,15,15,15,15
Central Bay Street,50,50,50,50,50,50
"Harbourfront, Regent Park",49,49,49,49,49,49
Hillcrest Village,4,4,4,4,4,4
Humber Summit,2,2,2,2,2,2
Leaside,34,34,34,34,34,34
"Maryvale, Wexford",7,7,7,7,7,7
"Rouge, Malvern",1,1,1,1,1,1
Scarborough Village,2,2,2,2,2,2


### Let's find out how many unique categories can be curated from all the returned venues

In [90]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 108 uniques categories.


# Analyze Each Neighborhood

In [91]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Video Store,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [92]:
toronto_onehot.shape

(222, 108)

### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [93]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Video Store,Wine Bar
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
1,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.02,0.0,0.02,0.02,0.02,0.0,0.02,0.0,0.02
3,"Harbourfront, Regent Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,...,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0
4,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Leaside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.029412,0.058824,0.0,0.0,0.0,0.0,0.0,0.0
7,"Maryvale, Wexford",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Rouge, Malvern",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Let's confirm the new size

In [94]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.12
1  Fast Food Restaurant  0.12
2        Sandwich Place  0.12
3            Beer Store  0.12
4           Pizza Place  0.12


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                venue  freq
0      Airport Lounge  0.13
1     Airport Service  0.13
2    Airport Terminal  0.13
3             Airport  0.07
4  Airport Food Court  0.07


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.14
1      Ice Cream Shop  0.06
2  Italian Restaurant  0.06
3  Chinese Restaurant  0.04
4                Café  0.04


----Harbourfront, Regent Park----
                venue  freq
0         Coffee Shop  0.16
1                 Pub  0.06
2              Bakery  0.06
3                Park  0.06
4  Mexican Restaurant  0.04


----Hillcrest 

### Let's put that into a pandas dataframe
First, let's write a function to sort the venues in descending order.

In [95]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [96]:
import numpy as np # library to handle data in a vectorized manner

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Pharmacy,Sandwich Place,Video Store,Fast Food Restaurant,Grocery Store,Pizza Place,Beer Store,Fried Chicken Joint,American Restaurant,Antique Shop
1,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport Service,Airport Terminal,Airport,Airport Food Court,Airport Gate,Coffee Shop,Bar,Boat or Ferry,Sculpture Garden
2,Central Bay Street,Coffee Shop,Italian Restaurant,Ice Cream Shop,Spa,Bubble Tea Shop,Café,Chinese Restaurant,French Restaurant,Office,Modern European Restaurant
3,"Harbourfront, Regent Park",Coffee Shop,Pub,Bakery,Park,Theater,Restaurant,Café,Mexican Restaurant,Breakfast Spot,Italian Restaurant
4,Hillcrest Village,Golf Course,Pool,Dog Run,Mediterranean Restaurant,Wine Bar,Coworking Space,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store


# Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.

In [97]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [98]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Label', kmeans.labels_)

toronto_merged = neighborhoods_coord_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,1,Coffee Shop,Italian Restaurant,Ice Cream Shop,Spa,Bubble Tea Shop,Café,Chinese Restaurant,French Restaurant,Office,Modern European Restaurant
1,M2H,North York,Hillcrest Village,43.803762,-79.363452,1,Golf Course,Pool,Dog Run,Mediterranean Restaurant,Wine Bar,Coworking Space,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store
2,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937,1,Fast Food Restaurant,Pizza Place,Gastropub,Athletics & Sports,Gym / Fitness Center,Intersection,Café,Bus Line,Pet Store,Bank
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2,Playground,Cosmetics Shop,Department Store,Café,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant
4,M4G,East York,Leaside,43.70906,-79.363452,1,Coffee Shop,Sporting Goods Shop,Burger Joint,Sushi Restaurant,Furniture / Home Store,Record Shop,Brewery,Beer Store,Bank,Clothing Store


Finally, let's visualize the resulting clusters

In [99]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Label']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters
Now, let's examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, let's then assign a name to each cluster.

### Cluster 1

In [68]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Scarborough,0,Fast Food Restaurant,Wine Bar,Department Store,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop


### Cluster 2

In [69]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Coffee Shop,Italian Restaurant,Ice Cream Shop,Spa,Bubble Tea Shop,Café,Chinese Restaurant,French Restaurant,Office,Modern European Restaurant
1,North York,1,Golf Course,Pool,Dog Run,Mediterranean Restaurant,Wine Bar,Coworking Space,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store
2,East York,1,Fast Food Restaurant,Pizza Place,Gastropub,Athletics & Sports,Gym / Fitness Center,Intersection,Café,Bus Line,Pet Store,Bank
4,East York,1,Coffee Shop,Sporting Goods Shop,Burger Joint,Sushi Restaurant,Furniture / Home Store,Record Shop,Brewery,Beer Store,Bank,Clothing Store
5,East Toronto,1,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Convenience Store,Latin American Restaurant,Ice Cream Shop,Gym / Fitness Center,Gay Bar
6,Scarborough,1,Middle Eastern Restaurant,Bakery,Breakfast Spot,Sandwich Place,Auto Garage,Shopping Mall,Wine Bar,Chinese Restaurant,Chocolate Shop,Clothing Store
7,Etobicoke,1,Pharmacy,Sandwich Place,Video Store,Fast Food Restaurant,Grocery Store,Pizza Place,Beer Store,Fried Chicken Joint,American Restaurant,Antique Shop
8,North York,1,Shopping Mall,Empanada Restaurant,Wine Bar,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop
9,Downtown Toronto,1,Airport Lounge,Airport Service,Airport Terminal,Airport,Airport Food Court,Airport Gate,Coffee Shop,Bar,Boat or Ferry,Sculpture Garden
11,Downtown Toronto,1,Coffee Shop,Pub,Bakery,Park,Theater,Restaurant,Café,Mexican Restaurant,Breakfast Spot,Italian Restaurant


### Cluster 3

In [70]:
toronto_merged.loc[toronto_merged['Cluster Label'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,2,Playground,Cosmetics Shop,Department Store,Café,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant
