### Import necessary libraries

In [73]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.cm as cm
import matplotlib.colors as colors

# import Beautiful soup for web scraping
from urllib.request import urlopen
from bs4 import BeautifulSoup

# Import Folium
!conda install -c conda-forge folium=0.5.0 --yes
import folium

import json
import requests 
from pandas.io.json import json_normalize

### Web scraping using Beautiful Soup library

In [2]:
# Defining url and passing it to urlopen
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urlopen(url)

In [3]:
# Create a beautiful soup object from html
soup = BeautifulSoup(html, 'lxml')
type(soup)

bs4.BeautifulSoup

In [4]:
rows = soup.find_all('tr')
print(rows[:10])

[<tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighborhood
</th></tr>, <tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>
</td></tr>, <tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>
</td></tr>, <tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>, <tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>, <tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>, <tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>, <tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>, <tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>
</td></tr>, <tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>]


In [6]:
list_rows=[]
list_rows

[]

In [7]:
for row in rows:
    str_cells = str(row)
    cleantext = BeautifulSoup(str_cells, "lxml").get_text()
    print(cleantext)
    list_rows.append(cleantext)


Postal Code

Borough

Neighborhood


M1A

Not assigned




M2A

Not assigned




M3A

North York

Parkwoods


M4A

North York

Victoria Village


M5A

Downtown Toronto

Regent Park, Harbourfront


M6A

North York

Lawrence Manor, Lawrence Heights


M7A

Downtown Toronto

Queen's Park, Ontario Provincial Government


M8A

Not assigned




M9A

Etobicoke

Islington Avenue, Humber Valley Village


M1B

Scarborough

Malvern, Rouge


M2B

Not assigned




M3B

North York

Don Mills


M4B

East York

Parkview Hill, Woodbine Gardens


M5B

Downtown Toronto

Garden District, Ryerson


M6B

North York

Glencairn


M7B

Not assigned




M8B

Not assigned




M9B

Etobicoke

West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale


M1C

Scarborough

Rouge Hill, Port Union, Highland Creek


M2C

Not assigned




M3C

North York

Don Mills


M4C

East York

Woodbine Heights


M5C

Downtown Toronto

St. James Town


M6C

York

Humewood-Cedarvale


M7C

Not assigned




M8C

Not assig

In [8]:
type(cleantext)

str

### create a dataframe and clean it using Pandas

In [9]:
df = pd.DataFrame(list_rows)

In [10]:
df.head()

Unnamed: 0,0
0,\nPostal Code\n\nBorough\n\nNeighborhood\n
1,\nM1A\n\nNot assigned\n\n\n
2,\nM2A\n\nNot assigned\n\n\n
3,\nM3A\n\nNorth York\n\nParkwoods\n
4,\nM4A\n\nNorth York\n\nVictoria Village\n


In [11]:
df = df[0].str.split('\n', expand=True)

In [12]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,56,57,58,59,60,61,62,63,64,65
0,,Postal Code,,Borough,,Neighborhood,,,,,...,,,,,,,,,,
1,,M1A,,Not assigned,,,,,,,...,,,,,,,,,,
2,,M2A,,Not assigned,,,,,,,...,,,,,,,,,,
3,,M3A,,North York,,Parkwoods,,,,,...,,,,,,,,,,
4,,M4A,,North York,,Victoria Village,,,,,...,,,,,,,,,,


In [13]:
# Drop the unnecessary columns
df = df.drop(df.iloc[:,6:],axis=1)

In [14]:
# assign the first row as the column header
header = df.iloc[0]
df = df.iloc[1:]
df.columns = header
df.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood
1,,M1A,,Not assigned,,
2,,M2A,,Not assigned,,
3,,M3A,,North York,,Parkwoods
4,,M4A,,North York,,Victoria Village
5,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront"


In [15]:
# Dropping the rows with Borough as Not assigned
df = df[df["Borough"] != "Not assigned"]
df.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood
3,,M3A,,North York,,Parkwoods
4,,M4A,,North York,,Victoria Village
5,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront"
6,,M6A,,North York,,"Lawrence Manor, Lawrence Heights"
7,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government"


In [16]:
# Resetting the index
df.set_index(np.arange(0,len(df)), inplace = True)
df.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood
0,,M3A,,North York,,Parkwoods
1,,M4A,,North York,,Victoria Village
2,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront"
3,,M6A,,North York,,"Lawrence Manor, Lawrence Heights"
4,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government"


In [17]:
df.tail()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood
102,,M8Z,,Etobicoke,,"Mimico NW, The Queensway West, South of Bloor,..."
103,,,,Canadian postal codes,,
104,,,,,NL,
105,,NL,,NS,,PE
106,,A,,B,,C


In [18]:
# Dropping the unnecessary rows
df.drop(df.tail(4).index, inplace=True)
df.tail()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood
98,,M8X,,Etobicoke,,"The Kingsway, Montgomery Road, Old Mill North"
99,,M4Y,,Downtown Toronto,,Church and Wellesley
100,,M7Y,,East Toronto,,"Business reply mail Processing Centre, South C..."
101,,M8Y,,Etobicoke,,"Old Mill South, King's Mill Park, Sunnylea, Hu..."
102,,M8Z,,Etobicoke,,"Mimico NW, The Queensway West, South of Bloor,..."


In [19]:
df.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood
0,,M3A,,North York,,Parkwoods
1,,M4A,,North York,,Victoria Village
2,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront"
3,,M6A,,North York,,"Lawrence Manor, Lawrence Heights"
4,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government"


In [20]:
df.shape

(103, 6)

### Adding the coordinates and creating a new data frame

In [21]:
coordinates = pd.read_csv("https://cocl.us/Geospatial_data")

In [22]:
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [23]:
df_coordinates = pd.merge(df,coordinates, on="Postal Code")

In [24]:
df_coordinates.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Latitude,Longitude
0,,M3A,,North York,,Parkwoods,43.753259,-79.329656
1,,M4A,,North York,,Victoria Village,43.725882,-79.315572
2,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront",43.65426,-79.360636
3,,M6A,,North York,,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### create a map of Toronto with neighborhoods

In [25]:
# Create map of Toronto 
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add neighborhoods to map
for lat, long, borough, neighborhood in zip(df_coordinates["Latitude"], df_coordinates["Longitude"], df_coordinates["Borough"], df_coordinates["Neighborhood"]):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat,long],
                        radius=5,
                        popup=label,
                        color="blue",
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False).add_to(map_toronto)
map_toronto

### Exploring only the boroughs that has Toronto in it

In [26]:
# creating a new dataframe with boroughs has Toronto
toronto_df = df_coordinates[df_coordinates["Borough"].str.contains("Toronto")].reset_index(drop=True)
toronto_df.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Latitude,Longitude
0,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront",43.65426,-79.360636
1,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,,M5B,,Downtown Toronto,,"Garden District, Ryerson",43.657162,-79.378937
3,,M5C,,Downtown Toronto,,St. James Town,43.651494,-79.375418
4,,M4E,,East Toronto,,The Beaches,43.676357,-79.293031


In [27]:
# create a map with only the Toronto_df
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat,long,borough,neighborhood in zip(toronto_df["Latitude"], toronto_df["Longitude"], toronto_df["Borough"], toronto_df["Neighborhood"]):
    label='{}, {}'.format(neighborhood,borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, long],
                        radius=5,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color="#3186cc",
                        fill_opacity=0.7,
                        parse_html=False).add_to(toronto_map)
toronto_map

### Define Forursquare credentials

In [28]:
CLIENT_ID = 'U3VIHO2TO5QJCKVIC2VHXKWASY4SF1JK1S5NTKVZTTLVQNFL' 
CLIENT_SECRET = 'ZC121BB5ZDKQXFF5VUXDD3HDDDRO5IPIA3V5PVKHIB4WIM0K' 
VERSION = '20180605' 

### Create a function to list the top 100 venues within the radius of 500

In [29]:
LIMIT=100
def get_nearby_venues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, long in zip(names, latitudes, longitudes):
        url = "https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            long,
            radius,
            LIMIT)
        

        results = requests.get(url).json()["response"]["groups"][0]["items"]
        
        venues_list.append([(
            name,
            lat,
            long,
            v["venue"]["name"],
            v["venue"]["location"]["lat"],
            v["venue"]["location"]["lng"],
            v["venue"]["categories"][0]["name"]) for v in results])
    
    nearby_venues = pd.DataFrame(item for venue_list in venues_list for item in venue_list)
    nearby_venues.columns = ["Neighborhood",
                             "Neighborhood Latitude",
                             "Neighborhood Longitude",
                             "Venue",
                             "Venue Latitude",
                             "Venue Longitude",
                             "Venue Category"]
    return(nearby_venues)

In [32]:
toronto_venues = get_nearby_venues(toronto_df["Neighborhood"], toronto_df["Latitude"], toronto_df["Longitude"])

In [33]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [34]:
toronto_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,56,56,56,56,56,56
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",18,18,18,18,18,18
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Central Bay Street,63,63,63,63,63,63
Christie,16,16,16,16,16,16
Church and Wellesley,79,79,79,79,79,79
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,36,36,36,36,36,36
Davisville North,9,9,9,9,9,9


### Analyze Each Neighborhood

In [35]:
# Create a new dataframe with all unique venue categories
toronto_onehot = pd.get_dummies(toronto_venues["Venue Category"])

toronto_onehot["Neighborhood"] = toronto_venues["Neighborhood"]

# fixed_columns = [toronto_onehot.columns[col == "Neighborhood" for col in toronto_onehot.columns]] + list(toronto_onehot.columns[col != "Neighborhood" for col in toronto_onehot.columns])
# toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
toronto_onehot.columns.get_loc("Neighborhood")

165

In [49]:
toronto_grouped = toronto_onehot.groupby("Neighborhood").mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.066667,0.066667,0.133333,0.066667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.015873,0.0,0.015873,0.0,0.015873,0.0,0.0,0.015873
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.025316
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
toronto_grouped.shape

(39, 239)

### Create a new dataframe with each neighborhood having top 10 common venues

In [56]:
# function to sort the values 
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:10]

In [58]:
#Create a new dataframe
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

columns = ["Neighborhood"]
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1,indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted["Neighborhood"] = toronto_grouped["Neighborhood"]

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind,1:] = return_most_common_venues(toronto_grouped.iloc[ind,:], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Pub,Café,Bakery,Restaurant,Cheese Shop,Beer Bar,Seafood Restaurant,Bistro
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Grocery Store,Bakery,Performing Arts Venue,Pet Store,Nightclub,Climbing Gym,Restaurant
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Auto Workshop,Park,Pizza Place,Recording Studio,Restaurant,Butcher,Burrito Place,Brewery,Skate Park
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Harbor / Marina,Bar,Plane,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry,Boutique,Airport Lounge
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Department Store,Salad Place,Bubble Tea Shop,Ice Cream Shop


### Cluster Neighborhoods using K means clustering

In [59]:
#import kmeans 
from sklearn.cluster import KMeans

#set number of clusters
k = 5

toronto_grouped_clustering = toronto_grouped.drop("Neighborhood", axis = 1)

kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_grouped_clustering)

kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [62]:
toronto_df.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Latitude,Longitude
0,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront",43.65426,-79.360636
1,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,,M5B,,Downtown Toronto,,"Garden District, Ryerson",43.657162,-79.378937
3,,M5C,,Downtown Toronto,,St. James Town,43.651494,-79.375418
4,,M4E,,East Toronto,,The Beaches,43.676357,-79.293031


In [63]:
neighborhoods_venues_sorted.insert(0, "Cluster Labels", kmeans.labels_)
toronto_merged = toronto_df

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on="Neighborhood")

toronto_merged.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Theater,Breakfast Spot,Café,Yoga Studio,Mexican Restaurant,French Restaurant
1,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Yoga Studio,Creperie,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
2,,M5B,,Downtown Toronto,,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Cosmetics Shop,Café,Bubble Tea Shop,Bookstore,Tea Room
3,,M5C,,Downtown Toronto,,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Cocktail Bar,Gastropub,American Restaurant,Restaurant,Beer Bar,Cosmetics Shop,Moroccan Restaurant,Department Store
4,,M4E,,East Toronto,,The Beaches,43.676357,-79.293031,0,Pub,Health Food Store,Coffee Shop,Trail,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop


### Visualize the clusters

In [74]:
# create map
map_cluster = folium.Map(location=[latitude, longitude], zoom_start=11)

#color scheme for clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0,1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#add markers to map
markers_map = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + 'Cluster' + str(cluster), parse_html = True)
    folium.CircleMarker(
        [lat,lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_cluster)
map_cluster

### Examine Clusters

### Cluster1 - familiar with cafe and coffee shop

In [76]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 0, toronto_merged.columns[[2] + list(range(5,toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,,,,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Theater,Breakfast Spot,Café,Yoga Studio,Mexican Restaurant,French Restaurant
1,,,,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Yoga Studio,Creperie,Beer Bar,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Café
2,,,,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Cosmetics Shop,Café,Bubble Tea Shop,Bookstore,Tea Room
3,,,,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Cocktail Bar,Gastropub,American Restaurant,Restaurant,Beer Bar,Cosmetics Shop,Moroccan Restaurant,Department Store
4,,,,The Beaches,43.676357,-79.293031,0,Pub,Health Food Store,Coffee Shop,Trail,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
5,,,,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Pub,Café,Bakery,Restaurant,Cheese Shop,Beer Bar,Seafood Restaurant,Bistro
6,,,,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Japanese Restaurant,Department Store,Salad Place,Bubble Tea Shop,Ice Cream Shop
7,,,,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Restaurant,Italian Restaurant,Baby Store,Diner,Coffee Shop,Candy Store,Nightclub
8,,,,"Richmond, Adelaide, King",43.650571,-79.384568,0,Coffee Shop,Café,Restaurant,Clothing Store,Hotel,Gym,Deli / Bodega,Thai Restaurant,Pizza Place,Cosmetics Shop
9,,,,"Dufferin, Dovercourt Village",43.669005,-79.442259,0,Pharmacy,Bakery,Grocery Store,Music Venue,Pool,Middle Eastern Restaurant,Café,Brewery,Supermarket,Bar


### Cluster 2 - familiar with relaxation centers such as Park,yoga studio, Playground

In [77]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 1, toronto_merged.columns[[2] + list(range(5,toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,,,,Lawrence Park,43.72802,-79.38879,1,Park,Swim School,Bus Line,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
33,,,,Rosedale,43.679563,-79.377529,1,Park,Playground,Trail,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant


### Cluster3 - familiar with fitness centers such as Gym, tennis court 

In [78]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 2, toronto_merged.columns[[2] + list(range(5,toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,,,,"Moore Park, Summerhill East",43.689574,-79.38316,2,Gym,Trail,Tennis Court,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### Cluster4 - familiar with home appliances such as home service, garden, electronics stor

In [79]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 3, toronto_merged.columns[[2] + list(range(5,toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,,,,Roselawn,43.711695,-79.416936,3,Garden,Home Service,Music Venue,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### Cluster 5 - Danger zone(Jewelry store)

In [80]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 4, toronto_merged.columns[[2] + list(range(5,toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,,,,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,4,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Discount Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
