# IBM Data Science CapStone Project Final project
  
In this notebook, I will be creating clusters to find the most suitable location to open an Pizza place in Toronto, Canada.


In [41]:
import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
import folium 
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests

In [6]:
data = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
df=data[0]

df = pd.DataFrame(df)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [7]:
df1 = df[df.Borough != 'Not assigned']

df2 = df1.groupby(['Postal Code','Borough'], sort=False).agg(', '.join)
df2.reset_index(inplace=True)

df2['Neighbourhood'] = np.where(df2['Neighbourhood'] == 'Not assigned',df2['Borough'], df2['Neighbourhood'])

df2

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [8]:
df2.shape

(103, 3)

In [9]:
geo_df=pd.read_csv('https://cocl.us/Geospatial_data')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
geo_merged = pd.merge(geo_df, df2, on='Postal Code')

In [12]:
geo_data=geo_merged[['Postal Code','Borough','Neighbourhood','Latitude','Longitude']]
geo_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [13]:
df4 = geo_data[geo_data['Borough'].str.contains('Toronto',regex=False)]
df4

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049


In [15]:
#Check the number of neighborhoods
print(df4.groupby('Borough').count()['Neighbourhood'])

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
West Toronto         6
Name: Neighbourhood, dtype: int64


In [17]:
#Create list with the Boroughs (to be used later)
boroughs = df4['Borough'].unique().tolist()

In [18]:
#Obtain the coordinates from the dataset itself, just averaging Latitude/Longitude of the current dataset 
lat_toronto = df4['Latitude'].mean()
lon_toronto = df4['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat_toronto, lon_toronto))

The geographical coordinates of Toronto are 43.66713498717948, -79.38987324871795


In [19]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3)) #Random color

In [66]:
map_toronto = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=12)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df4['Latitude'], 
                                           df4['Longitude'],
                                           df4['Borough'], 
                                           df4['Neighbourhood']):

    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

Getting Venues Data using Foursquare

In [36]:
CLIENT_ID = 'YJ3JGO4K155V2U51EHUPEZKWVZGRXL0EJXPZGO3HANLWWOLK'
CLIENT_SECRET = 'GIDFXKLYJJDBZ3S4F1GTITZHIY4ONSP2MK4VJL2XRTDBG3GN'
VERSION = '20200806'
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

In [43]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [44]:
#Get venues for all neighborhoods in our dataset
toronto_venues = getNearbyVenues(names=df4['Neighbourhood'],
                                latitudes=df4['Latitude'],
                                longitudes=df4['Longitude'])

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West, Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High 

In [45]:
#Check size of resulting dataframe
toronto_venues.shape

(1627, 7)

In [46]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


In [47]:
#Number of venues per neighborhood
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Central Bay Street,66,66,66,66,66,66
Christie,17,17,17,17,17,17
Church and Wellesley,75,75,75,75,75,75
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,32,32,32,32,32,32
Davisville North,10,10,10,10,10,10


In [48]:
#Number of unique venue categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 234 uniques categories.


In [49]:
#print out the list of categories
toronto_venues['Venue Category'].unique()[:100]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood',
       'Cosmetics Shop', 'Ice Cream Shop', 'Greek Restaurant',
       'Italian Restaurant', 'Brewery', 'Yoga Studio', 'Juice Bar',
       'Fruit & Vegetable Store', 'Dessert Shop', 'Pizza Place',
       'Restaurant', 'Bookstore', 'Furniture / Home Store', 'Café',
       'Bubble Tea Shop', 'Spa', 'Grocery Store', 'Coffee Shop', 'Bakery',
       'Caribbean Restaurant', 'Indian Restaurant', 'American Restaurant',
       'Lounge', 'Frozen Yogurt Shop', 'Liquor Store', 'Gym',
       'Fast Food Restaurant', 'Fish & Chips Shop', 'Sushi Restaurant',
       'Park', 'Burrito Place', 'Pet Store', 'Steakhouse',
       'Movie Theater', 'Sandwich Place', 'Light Rail Station',
       'Food & Drink Shop', 'Fish Market', 'Seafood Restaurant',
       'Gay Bar', 'Cheese Shop', 'Middle Eastern Restaurant',
       'Stationery Store', 'Comfort Food Restaurant', 'Thai Restaurant',
       'Coworking Space', 'Wine Bar', 'Latin American Restaurant',
  

In [50]:
# check if the results contain "Pizza Place"
"Pizza Place" in toronto_venues['Venue Category'].unique()

True

Analyze Each Neighborhood

In [51]:
# one hot encoding
to_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighbourhoods'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(1627, 235)


Unnamed: 0,Neighbourhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
to_grouped = to_onehot.groupby(["Neighbourhoods"]).mean().reset_index()

print(to_grouped.shape)
to_grouped

(39, 235)


Unnamed: 0,Neighbourhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.066667,0.066667,0.066667,0.066667,0.133333,0.133333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.015152,0.0,0.015152
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,...,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026667
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
len(to_grouped[to_grouped["Pizza Place"] > 0])


19

Create a new dataframe to find Pizza Place only

In [55]:
to_pizza_place = to_grouped[["Neighbourhoods","Pizza Place"]]

In [56]:
to_pizza_place.head()

Unnamed: 0,Neighbourhoods,Pizza Place
0,Berczy Park,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0
2,"Business reply mail Processing Centre, South C...",0.0625
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0
4,Central Bay Street,0.0


# Cluster Neighborhoods

In [57]:
# set number of clusters
toclusters = 3

to_clustering = to_pizza_place.drop(["Neighbourhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=0).fit(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 1, 0, 0, 0, 2, 0, 1, 1])

In [58]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
to_merged = to_pizza_place.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

In [59]:
to_merged.rename(columns={"Neighbourhoods": "Neighbourhood"}, inplace=True)
to_merged.head()

Unnamed: 0,Neighbourhood,Pizza Place,Cluster Labels
0,Berczy Park,0.0,0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0
2,"Business reply mail Processing Centre, South C...",0.0625,1
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0
4,Central Bay Street,0.0,0


In [60]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
to_merged = to_merged.join(toronto_venues.set_index("Neighbourhood"), on="Neighbourhood")

print(to_merged.shape)
to_merged.head()

(1627, 9)


Unnamed: 0,Neighbourhood,Pizza Place,Cluster Labels,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
0,Berczy Park,0.0,0,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
0,Berczy Park,0.0,0,43.644771,-79.373306,Goose Island Brewhouse,43.647329,-79.373541,Beer Bar


In [61]:
# sort the results by Cluster Labels
print(to_merged.shape)
to_merged.sort_values(["Cluster Labels"], inplace=True)
to_merged

(1627, 9)


Unnamed: 0,Neighbourhood,Pizza Place,Cluster Labels,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.000000,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
29,St. James Town,0.000000,0,43.651494,-79.375418,The Poké Box,43.650469,-79.376317,Poke Place
29,St. James Town,0.000000,0,43.651494,-79.375418,Richmond Station,43.651569,-79.379266,American Restaurant
29,St. James Town,0.000000,0,43.651494,-79.375418,Dineen Coffee,43.650497,-79.378765,Café
29,St. James Town,0.000000,0,43.651494,-79.375418,Elgin And Winter Garden Theatres,43.653394,-79.378507,Theater
...,...,...,...,...,...,...,...,...,...
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,Lost & Found,43.649378,-79.424149,Men's Store
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,The Lucky Penny,43.647020,-79.417003,Deli / Bodega
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,Pilot Coffee Roasters,43.646610,-79.419606,Coffee Shop
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,Bazara,43.648535,-79.420521,Japanese Restaurant


In [67]:
# create map
map_clusters = folium.Map(location=[lat_toronto, lon_toronto], zoom_start=11)

# set color scheme for the clusters
x = np.arange(toclusters)
ys = [i+x+(i*x)**2 for i in range(toclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(to_merged['Neighbourhood Latitude'], to_merged['Neighbourhood Longitude'], to_merged['Neighbourhood'], to_merged['Cluster Labels']):
    label = '{}, {}'.format(str(poi), str(cluster))
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters


In [68]:
#Cluster 0
to_merged.loc[to_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighbourhood,Pizza Place,Cluster Labels,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,0.0,0,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
29,St. James Town,0.0,0,43.651494,-79.375418,The Poké Box,43.650469,-79.376317,Poke Place
29,St. James Town,0.0,0,43.651494,-79.375418,Richmond Station,43.651569,-79.379266,American Restaurant
29,St. James Town,0.0,0,43.651494,-79.375418,Dineen Coffee,43.650497,-79.378765,Café
29,St. James Town,0.0,0,43.651494,-79.375418,Elgin And Winter Garden Theatres,43.653394,-79.378507,Theater
...,...,...,...,...,...,...,...,...,...
0,Berczy Park,0.0,0,43.644771,-79.373306,Starbucks,43.648738,-79.372519,Coffee Shop
0,Berczy Park,0.0,0,43.644771,-79.373306,Oyshi Sushi,43.642340,-79.375853,Sushi Restaurant
0,Berczy Park,0.0,0,43.644771,-79.373306,Shoppers Drug Mart,43.647141,-79.370898,Pharmacy
0,Berczy Park,0.0,0,43.644771,-79.373306,St. Urbain Bagel,43.648611,-79.371497,Bagel Shop


In [69]:
#Cluster 1
to_merged.loc[to_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighbourhood,Pizza Place,Cluster Labels,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
30,"St. James Town, Cabbagetown",0.063830,1,43.667967,-79.367675,Pizza Pizza,43.667179,-79.369669,Pizza Place
30,"St. James Town, Cabbagetown",0.063830,1,43.667967,-79.367675,Tim Hortons,43.667169,-79.368849,Coffee Shop
30,"St. James Town, Cabbagetown",0.063830,1,43.667967,-79.367675,Daniel et Daniel Event Creation & Catering,43.664384,-79.368328,Bakery
30,"St. James Town, Cabbagetown",0.063830,1,43.667967,-79.367675,Matt's No Frills,43.663515,-79.367166,Grocery Store
30,"St. James Town, Cabbagetown",0.063830,1,43.667967,-79.367675,Wellesley Parliament Square,43.668589,-79.370169,Plaza
...,...,...,...,...,...,...,...,...,...
16,"India Bazaar, The Beaches West",0.047619,1,43.668999,-79.315572,LCBO,43.666732,-79.314966,Liquor Store
16,"India Bazaar, The Beaches West",0.047619,1,43.668999,-79.315572,Pet Valu,43.666979,-79.314665,Pet Store
16,"India Bazaar, The Beaches West",0.047619,1,43.668999,-79.315572,The Tulip Steakhouse,43.666348,-79.316854,Steakhouse
33,"Summerhill West, Rathnelly, South Hill, Forest...",0.062500,1,43.686412,-79.400049,Starbucks,43.686756,-79.398292,Coffee Shop


In [70]:
#Cluster 2
to_merged.loc[to_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighbourhood,Pizza Place,Cluster Labels,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
11,"First Canadian Place, Underground city",0.020000,2,43.648429,-79.382280,Canadian Opera Company,43.650660,-79.386242,Opera House
6,Church and Wellesley,0.013333,2,43.665860,-79.383160,Asahi Sushi,43.669874,-79.382943,Sushi Restaurant
36,"The Danforth West, Riverdale",0.024390,2,43.679557,-79.352188,Messini Authentic Gyros,43.677704,-79.350480,Greek Restaurant
36,"The Danforth West, Riverdale",0.024390,2,43.679557,-79.352188,7 Numbers,43.677062,-79.353934,Italian Restaurant
36,"The Danforth West, Riverdale",0.024390,2,43.679557,-79.352188,Valley Farm Produce,43.677999,-79.349969,Fruit & Vegetable Store
...,...,...,...,...,...,...,...,...,...
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,Lost & Found,43.649378,-79.424149,Men's Store
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,The Lucky Penny,43.647020,-79.417003,Deli / Bodega
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,Pilot Coffee Roasters,43.646610,-79.419606,Coffee Shop
19,"Little Portugal, Trinity",0.021277,2,43.647927,-79.419750,Bazara,43.648535,-79.420521,Japanese Restaurant


Observations
Most of Pizza places are in Cluster 1 which is around St. James Town, Cabbagetown areas and lowest (close to zero) in Cluster 0 areas which are Berczy Park and St. James Town areas. Also, there are good opportunities to open near India Bazaar, The Beaches Wes, Runnymede, Swansea as the competition seems to be low. Looking at nearby venues, it seems Cluster 2 might be a good location as there are not a lot of Pizza places in these areas. Therefore, this project recommends the entrepreneur to open an pizza place in these locations with little to no competition. Nonetheless, if the food is authentic, affordable and good taste, I am confident that it will have great following everywhere =)