## 1. Install and import libreries

In [85]:
!pip install folium



In [225]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

## 2. Import the data from Wikipedia

In [200]:
# import data
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [201]:
soup = BeautifulSoup(data ,'html.parser')

In [202]:
# create list to store the data 
postal_code = []
borough = []
neighborhood =[]

In [203]:
for row in soup.find('table').find_all('tr'):
    cell = row.find_all('td')
    if (len(cell)) > 0:
        postal_code.append(cell[0].text)
        borough.append(cell[1].text)
        neighborhood.append(cell[2].text) 

In [204]:
Toronto = pd.DataFrame({'PostalCode' : postal_code,
                       'borough' : borough,
                       'neighborhood': neighborhood})
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


## 3. Drop the last 2 letters (/n) from the column: postal_code and borough

In [205]:
Toronto['PostalCode'] = Toronto['PostalCode'].map(lambda x: str(x)[:-1])
Toronto['borough'] = Toronto['borough'].map(lambda x: str(x)[:-1])
Toronto['neighborhood'] = Toronto['neighborhood'].map(lambda x: str(x)[:-1])
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## 4. Drop the rows with postal_code not assigned

In [206]:
Toronto.drop(Toronto[Toronto['borough']=='Not assigned'].index, inplace=True)
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## 5. Group neighborhood with the same borough

In [207]:
Toronto = Toronto.groupby(['PostalCode','borough'])['neighborhood'].apply(', '.join).reset_index()
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 6. If Not assigned neighborhood, then neighborhood = borough

In [208]:
for index, row in Toronto.iterrows():
    if row['neighborhood'] == 'Not assigned':
        row['neighborhood'] == Toronto['borough']
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 7. Shape

In [209]:
Toronto.shape

(103, 3)

## 8. Load the coordinates from CSV file, provided by Coursera

In [210]:
coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 9. Rename postal code column

In [214]:
coordinates.rename(columns={'Postal Code' : 'PostalCode'}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 10. Merge the 2 tables 

In [215]:
pd.set_option("display.max_rows", None, "display.max_columns", None) 
Toronto_coord = pd.merge(Toronto, coordinates, on='PostalCode', how='left')
Toronto_coord

Unnamed: 0,PostalCode,borough,neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## 11. Use geopy library to get the latitude and longitude of Toronto

In [216]:
address = 'Toronto'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## 12. Create a map of Toronto with neighborhoods superimposed on top

In [101]:
# create map of Toronto using latitude and longitude values
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_coord['Latitude'], Toronto_coord['Longitude'], Toronto_coord['borough'], Toronto_coord['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng], radius=5, popup=label, color='blue', fill=True, fill_color='#3186cc', fill_opacity=0.7, parse_html=False).add_to(Toronto_map)
Toronto_map

## 13. Use the Foursquare API to explore the neighborhoods 

In [102]:
# define Foursquare Credentials and Version
CLIENT_ID = '2K1DXCDEC1U310OW5J4W13Z5EZVG05FCTFK0JV1YOTZXCRUP' # your Foursquare ID
CLIENT_SECRET = 'PQ0P1SSWQJZFJKSMXMNHCPGP35GLKQYMDK5EHSKMLY2JPZTW' # your Foursquare Secret
VERSION = '20200511' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2K1DXCDEC1U310OW5J4W13Z5EZVG05FCTFK0JV1YOTZXCRUP
CLIENT_SECRET:PQ0P1SSWQJZFJKSMXMNHCPGP35GLKQYMDK5EHSKMLY2JPZTW


## 14. get the top 100 venues that are within a radius of 500 meters

In [103]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(Toronto_coord['Latitude'], Toronto_coord['Longitude'], Toronto_coord['postal_code'], Toronto_coord['borough'], Toronto_coord['neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [170]:
# convert the venues list into a new DataFrame
Toronto_venues = pd.DataFrame(venues)

# define the column names
Toronto_venues.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
Toronto_venues.head()

(2115, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


## 15. Number of venues for each neighborhood

In [168]:
Toronto_venues.groupby(['PostalCode','Borough','Neighborhood']).count().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M1B,Scarborough,"Malvern, Rouge",1,1,1,1,1,1
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",2,2,2,2,2,2
M1E,Scarborough,"Guildwood, Morningside, West Hill",7,7,7,7,7,7
M1G,Scarborough,Woburn,4,4,4,4,4,4
M1H,Scarborough,Cedarbrae,8,8,8,8,8,8


## 16. Number and type of unique categories from all the returned venues

In [128]:
print('There are {} uniques categories.'.format(len(Toronto_venues['VenueCategory'].unique())))

There are 266 uniques categories.


In [135]:
Toronto_venues['VenueCategory'].unique()[:50]

array(['Fast Food Restaurant', 'Bar', 'History Museum', 'Bank',
       'Electronics Store', 'Mexican Restaurant', 'Rental Car Location',
       'Medical Center', 'Intersection', 'Breakfast Spot', 'Coffee Shop',
       'Korean Restaurant', 'Soccer Field', 'Hakka Restaurant',
       'Caribbean Restaurant', 'Thai Restaurant', 'Athletics & Sports',
       'Bakery', 'Gas Station', 'Fried Chicken Joint', 'Playground',
       'Convenience Store', 'Department Store', 'Discount Store',
       'Chinese Restaurant', 'Train Station', 'Bus Station',
       'Ice Cream Shop', 'Bus Line', 'Metro Station', 'Park', 'Motel',
       'American Restaurant', 'Café', 'General Entertainment',
       'Skating Rink', 'College Stadium', 'Indian Restaurant',
       'Vietnamese Restaurant', 'Pet Store', 'Sandwich Place',
       'Middle Eastern Restaurant', 'Shopping Mall', 'Auto Garage',
       'Latin American Restaurant', 'Lounge', 'Clothing Store',
       'Italian Restaurant', 'Noodle House', 'Pizza Place'], dtyp

## 17. Analyze Each Neighborhood

In [136]:
# one hot encoding
toronto_onehot = pd.get_dummies(Toronto_venues[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = Toronto_venues['PostalCode'] 
toronto_onehot['Borough'] = Toronto_venues['Borough'] 
toronto_onehot['Neighborhoods'] = Toronto_venues['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(2115, 269)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Malvern, Rouge",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1E,Scarborough,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## 18. Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [137]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped.head()

(100, 269)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Malvern, Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,Scarborough,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,Scarborough,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 19. Top 10 most common venues for each postal code

In [160]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(100, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",Fast Food Restaurant,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",Bar,History Museum,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",Rental Car Location,Electronics Store,Breakfast Spot,Medical Center,Intersection,Bank,Mexican Restaurant,Distribution Center,Diner,Discount Store
3,M1G,Scarborough,Woburn,Coffee Shop,Soccer Field,Korean Restaurant,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Drugstore
4,M1H,Scarborough,Cedarbrae,Bakery,Hakka Restaurant,Bank,Athletics & Sports,Caribbean Restaurant,Thai Restaurant,Gas Station,Fried Chicken Joint,Discount Store,Dim Sum Restaurant
5,M1J,Scarborough,Scarborough Village,Convenience Store,Playground,Yoga Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",Discount Store,Train Station,Convenience Store,Chinese Restaurant,Department Store,Bus Station,Coffee Shop,Drugstore,Eastern European Restaurant,Donut Shop
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",Bus Line,Bakery,Ice Cream Shop,Intersection,Bus Station,Metro Station,Soccer Field,Park,Drugstore,Donut Shop
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",American Restaurant,Motel,Yoga Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
9,M1N,Scarborough,"Birch Cliff, Cliffside West",College Stadium,Skating Rink,Café,General Entertainment,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant,Deli / Bodega


## 20. Cluster Neighborhoods

In [151]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0], dtype=int32)

In [217]:
# drop the rows of Toronto.coord that aren't present in the neighborhoods_venues_sorted
Toronto_coord1 = Toronto_coord[Toronto_coord['PostalCode']!='M1X']
Toronto_coord2 = Toronto_coord1[Toronto_coord['PostalCode']!='M2M']
Toronto_coord3 = Toronto_coord2[Toronto_coord['PostalCode']!='M9A']
Toronto_coord3.shape

  app.launch_new_instance()


(100, 5)

In [218]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = Toronto_coord3.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index('PostalCode'), on='PostalCode')

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(100, 16)


Unnamed: 0,PostalCode,borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,2,Fast Food Restaurant,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,4,Bar,History Museum,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,Rental Car Location,Electronics Store,Breakfast Spot,Medical Center,Intersection,Bank,Mexican Restaurant,Distribution Center,Diner,Discount Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Soccer Field,Korean Restaurant,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Bakery,Hakka Restaurant,Bank,Athletics & Sports,Caribbean Restaurant,Thai Restaurant,Gas Station,Fried Chicken Joint,Discount Store,Dim Sum Restaurant


In [231]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged.head()

(100, 16)


Unnamed: 0,PostalCode,borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,0,Coffee Shop,Bakery,Park,Italian Restaurant,Pharmacy,Pizza Place,Café,Restaurant,Pub,Liquor Store
10,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town...",43.75741,-79.273304,0,Indian Restaurant,Chinese Restaurant,Pet Store,Vietnamese Restaurant,Yoga Studio,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
11,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849,0,Middle Eastern Restaurant,Auto Garage,Shopping Mall,Breakfast Spot,Bakery,Sandwich Place,Dog Run,Diner,Discount Store,Distribution Center
12,M1S,Scarborough,Agincourt,43.7942,-79.262029,0,Lounge,Latin American Restaurant,Breakfast Spot,Clothing Store,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
13,M1T,Scarborough,"Clarks Corners, Tam O'Shanter, Sullivan",43.781638,-79.304302,0,Pizza Place,Italian Restaurant,Fast Food Restaurant,Thai Restaurant,Chinese Restaurant,Bank,Convenience Store,Gas Station,Fried Chicken Joint,Pharmacy


## 21. Visualize the resulting clusters

In [236]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['borough'], toronto_merged['neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 22.  Examine Clusters

### Cluster 1

In [229]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,Downtown Toronto,0,Coffee Shop,Bakery,Park,Italian Restaurant,Pharmacy,Pizza Place,Café,Restaurant,Pub,Liquor Store
11,Scarborough,0,Middle Eastern Restaurant,Auto Garage,Shopping Mall,Breakfast Spot,Bakery,Sandwich Place,Dog Run,Diner,Discount Store,Distribution Center
12,Scarborough,0,Lounge,Latin American Restaurant,Breakfast Spot,Clothing Store,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
13,Scarborough,0,Pizza Place,Italian Restaurant,Fast Food Restaurant,Thai Restaurant,Chinese Restaurant,Bank,Convenience Store,Gas Station,Fried Chicken Joint,Pharmacy
15,Scarborough,0,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Burger Joint,Sandwich Place,Nail Salon,Pizza Place,Camera Store,Coffee Shop,Grocery Store
17,North York,0,Fast Food Restaurant,Golf Course,Athletics & Sports,Mediterranean Restaurant,Pool,Dog Run,Yoga Studio,Distribution Center,Dessert Shop,Dim Sum Restaurant
18,North York,0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Restaurant,Bakery,Toy / Game Store,Shoe Store,Bank,Burger Joint
7,Scarborough,0,Bus Line,Bakery,Ice Cream Shop,Intersection,Bus Station,Metro Station,Soccer Field,Park,Drugstore,Donut Shop
28,North York,0,Coffee Shop,Bank,Pizza Place,Pharmacy,Middle Eastern Restaurant,Sushi Restaurant,Ice Cream Shop,Shopping Mall,Restaurant,Fried Chicken Joint
10,Scarborough,0,Indian Restaurant,Chinese Restaurant,Pet Store,Vietnamese Restaurant,Yoga Studio,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


### Cluster 2

In [232]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Downtown Toronto,1,Park,Trail,Playground,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
14,Scarborough,1,Park,Playground,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
44,Central Toronto,1,Park,Bus Line,Swim School,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant,Department Store
30,North York,1,Park,Airport,Snack Place,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Department Store
25,North York,1,Park,Fast Food Restaurant,Food & Drink Shop,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
98,York,1,Park,Convenience Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Deli / Bodega,Doner Restaurant
23,North York,1,Park,Bank,Convenience Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Department Store,Doner Restaurant
20,North York,1,Park,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run
90,Etobicoke,1,River,Park,College Stadium,Colombian Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
74,York,1,Park,Pool,Women's Store,Colombian Restaurant,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore


### Cluster 3

In [233]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2,Fast Food Restaurant,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant


### Cluster 4

In [234]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
97,North York,3,Baseball Field,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Fast Food Restaurant


### Cluster 5

In [235]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,4,Bar,History Museum,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop


## 22. Observation 

Most of the neighborhoods fall into Cluster 1 which are mostly business areas with cafe, restaurants, Clothing Store... Cluster 2 is just a garden, Cluster 3 are Fast Food Restaurant and Department Store, Cluster 4 Baseball Field and Yoga Studio, and lastly Cluster 5 park and museum.