In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20181001144907-0025


In [2]:
#get item table from source
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')

In [3]:
#convert item table from html into pandas dataframe
df = pd.read_html(str(soup.table),header=0)[0]

In [4]:
#drop any rows that have value 'Not assigned' in column 'Borough'
df_clean = df[df['Borough'] != 'Not assigned'].copy()
df_clean.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [5]:
#get index for all rows that have value 'Not assigned' in column 'Neighbourhood'
index_list = df_clean[df_clean['Neighbourhood']=='Not assigned'].index

In [6]:
#copy 'Borough' value into 'Neighbourhood' with the same index
for i in index_list:
    df_clean.loc[i,'Neighbourhood'] = df_clean.loc[i,'Borough']

df_clean.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [7]:
#group the dataframe using value in 'Postcode' and 'Borough' and join 'Neighboorhood'
df_clean = df_clean.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_clean.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [8]:
df_clean.shape

(103, 3)

In [9]:
import geocoder

In [10]:
#lng = []
#lat = []

#for postal_code in df['Postcode']:
    # initialize your variable to None
#    lat_lng_coords = None
    
    # loop until you get the coordinates
#    while(lat_lng_coords is None):
#      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#      lat_lng_coords = g.latlng

#    latitude = lat_lng_coords[0]
#    longitude = lat_lng_coords[1]
    
#    lng.append(longitude)
#    lat.append(latitude)

#print(postal_code[:5],lng[:5], lat[:5])

In [11]:
#getting longitude and latitude from laternative file
df_latlng = pd.read_csv('https://cocl.us/Geospatial_data')
df_latlng.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [12]:
#making sure the dataframe size is same for merge
df_latlng.shape

(103, 3)

In [13]:
#merging data_clean and df_latlong and drop redudndant column
df_canada = df_clean.merge(df_latlng, left_on='Postcode', right_on='Postal Code', how='outer')
df_canada.drop(['Postal Code'], axis=1, inplace=True)
df_canada.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [14]:
from geopy.geocoders import Nominatim
import folium
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

In [15]:
#get data for all rows that have value 'Toronto' in column 'Borough'
df_toronto = df_canada[df_canada['Borough'].str.contains('Toronto')==True]
df_toronto.reset_index(inplace=True, drop=True)
df_toronto.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [16]:
df_toronto.shape

(38, 5)

In [17]:
#getting list of Toronto to use
ar_toronto = df_toronto['Borough'].unique().tolist()
ar_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [18]:
#getting latitude and longitude of Toronto
address = 'Toronto, CAN'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of East Toronto are {0:0.6f}, {1:0.6f}.'.format(latitude, longitude))

The geograpical coordinate of East Toronto are 43.660700, -79.385089.


In [19]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighbourhood, postcode in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood'], df_toronto['Postcode']):
    label = 'Postcode: {0}<br>Borough: {1}<br>Neighbourhood: {2}'.format(postcode, borough, neighbourhood)
    iframe = folium.IFrame(html=label, width=300, height=100)
    popup = folium.Popup(iframe, parse_html=True)
    folium.CircleMarker([lat, lng],
                        radius=5,
                        popup=popup,
                        color='blue',
                        fill=True,
                        fill_color='blue',
                        fill_opacity=0.7,
                       ).add_to(map_toronto)

map_toronto

In [20]:
# The code was removed by Watson Studio for sharing.

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#function to get data from Foursquare for each neighbourhood
def getNearbyVenues(names, latitudes, longitudes):    
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(Client_Id,
                                                                                                                                    Client_Secret,
                                                                                                                                    Version,
                                                                                                                                    lat,
                                                                                                                                    lng,
                                                                                                                                    Radius,
                                                                                                                                    Limit
                                                                                                                                   )
        
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(name, 
                             lat, 
                             lng,
                             v['venue']['name'],
                             v['venue']['categories'][0]['name'],
                             v['venue']['location']['lat'],
                             v['venue']['location']['lng']
                            ) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood',
                             'Neighbourhood_Latitude',
                             'Neighbourhood_Longitude',
                             'Venue',
                             'Venue_Category',
                             'Venue_Latitude',
                             'Venue_Longitude'
                             ]
    
    nearby = json_normalize(results)
    return(nearby_venues)

In [22]:
df_toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                    latitudes=df_toronto['Latitude'],
                                    longitudes=df_toronto['Longitude']
                                   )

In [23]:
#delete any rows that have none value
df_toronto_venues.dropna(how='any', axis=0, inplace=True)
df_toronto_venues.reset_index(inplace=True, drop=True)
df_toronto_venues.head(10)

Unnamed: 0,Neighbourhood,Neighbourhood_Latitude,Neighbourhood_Longitude,Venue,Venue_Category,Venue_Latitude,Venue_Longitude
0,The Beaches,43.676357,-79.293031,Starbucks,Coffee Shop,43.678798,-79.298045
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,Pub,43.679181,-79.297215
2,The Beaches,43.676357,-79.293031,Upper Beaches,Neighborhood,43.680563,-79.292869
3,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,Greek Restaurant,43.677621,-79.351434
4,"The Danforth West, Riverdale",43.679557,-79.352188,Dolce Gelato,Ice Cream Shop,43.677773,-79.351187
5,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,Cosmetics Shop,43.67782,-79.351265
6,"The Danforth West, Riverdale",43.679557,-79.352188,Messini Authentic Gyros,Greek Restaurant,43.677827,-79.350569
7,"The Danforth West, Riverdale",43.679557,-79.352188,Cafe Fiorentina,Italian Restaurant,43.677743,-79.350115
8,"The Danforth West, Riverdale",43.679557,-79.352188,Mezes,Greek Restaurant,43.677962,-79.350196
9,"The Danforth West, Riverdale",43.679557,-79.352188,Christina's On The Danforth,Greek Restaurant,43.67824,-79.349185


In [24]:
#getting count for each postcode/neighbourhood
df_toronto_neigh = df_toronto_venues.copy()
df_toronto_neigh.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood_Latitude,Neighbourhood_Longitude,Venue,Venue_Category,Venue_Latitude,Venue_Longitude
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,54,54,54,54,54,54
"Brockton, Exhibition Place, Parkdale Village",22,22,22,22,22,22
Business reply mail Processing Centre969 Eastern,19,19,19,19,19,19
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,84,84,84,84,84,84
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,84,84,84,84,84,84


In [25]:
#make one_coded dataframe
df_toronto_venues_onecode = pd.get_dummies(df_toronto_venues['Venue_Category'])
df_toronto_venues_onecode['Neighbourhood'] = df_toronto_venues['Neighbourhood']

# move neighborhood column to the first column
fixed_columns = [df_toronto_venues_onecode.columns[-1]] + list(df_toronto_venues_onecode.columns[:-1])
df_toronto_venues_onecode = df_toronto_venues_onecode[fixed_columns]

df_toronto_venues_onecode.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
df_toronto_venues_onecode.shape

(1692, 231)

In [27]:
#getting mean in each based on category and neighbourhood
df_toronto_venues_neigh_mean = df_toronto_venues_onecode.groupby('Neighbourhood').mean().reset_index()
df_toronto_venues_neigh_mean.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business reply mail Processing Centre969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.011905
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.06,0.0,0.04,0.01,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.0,0.011905,0.0,0.011905


In [28]:
#setting up dataframe for clustering
df_toronto_venues_clustering = df_toronto_venues_neigh_mean.copy()
df_toronto_venues_clustering.drop(['Neighbourhood'], axis=1, inplace=True)
df_toronto_venues_clustering.head(10)

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,0.0,0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,...,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.011905
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.06,0.0,0.04,0.01,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,...,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.0,0.011905,0.0,0.011905


In [29]:
# set number of clusters
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(df_toronto_venues_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1,
       3, 1, 2, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 0, 1, 1], dtype=int32)

In [30]:
#making sure the number of cluster label is correct
len(kmeans.labels_)

38

In [31]:
#label each neighbourhood with cluster
df_toronto_cluster = df_toronto.copy()
df_toronto_cluster.sort_values(['Neighbourhood'], ascending=True, inplace=True)
df_toronto_cluster.reset_index(inplace=True, drop=True)
df_toronto_cluster['Cluster'] = kmeans.labels_
df_toronto_cluster.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster
0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,1
1,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1
2,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191,1
3,M7Y,East Toronto,Business reply mail Processing Centre969 Eastern,43.662744,-79.321558,1
4,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,1
5,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,1
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,1
7,M5T,Downtown Toronto,"Chinatown, Grange Park, Kensington Market",43.653206,-79.400049,1
8,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1
9,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,1


In [32]:
#populate venue with cluster
df_toronto_venues_cluster = df_toronto_venues.copy()
df_toronto_venues_cluster = df_toronto_venues_cluster.merge(df_toronto_cluster.drop(['Postcode','Borough','Latitude','Longitude'], axis=1), left_on='Neighbourhood', right_on='Neighbourhood', how='outer')
df_toronto_venues_cluster.head(10)

Unnamed: 0,Neighbourhood,Neighbourhood_Latitude,Neighbourhood_Longitude,Venue,Venue_Category,Venue_Latitude,Venue_Longitude,Cluster
0,The Beaches,43.676357,-79.293031,Starbucks,Coffee Shop,43.678798,-79.298045,0
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,Pub,43.679181,-79.297215,0
2,The Beaches,43.676357,-79.293031,Upper Beaches,Neighborhood,43.680563,-79.292869,0
3,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,Greek Restaurant,43.677621,-79.351434,1
4,"The Danforth West, Riverdale",43.679557,-79.352188,Dolce Gelato,Ice Cream Shop,43.677773,-79.351187,1
5,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,Cosmetics Shop,43.67782,-79.351265,1
6,"The Danforth West, Riverdale",43.679557,-79.352188,Messini Authentic Gyros,Greek Restaurant,43.677827,-79.350569,1
7,"The Danforth West, Riverdale",43.679557,-79.352188,Cafe Fiorentina,Italian Restaurant,43.677743,-79.350115,1
8,"The Danforth West, Riverdale",43.679557,-79.352188,Mezes,Greek Restaurant,43.677962,-79.350196,1
9,"The Danforth West, Riverdale",43.679557,-79.352188,Christina's On The Danforth,Greek Restaurant,43.67824,-79.349185,1


In [33]:
# cluster 1
df_toronto_cluster0 = df_toronto_venues_cluster[df_toronto_venues_cluster['Cluster']==0].copy()
df_toronto_cluster0.drop(['Neighbourhood_Latitude','Neighbourhood_Longitude'], axis=1, inplace=True)
df_toronto_cluster0.reset_index(inplace=True, drop=True)
df_toronto_cluster0.head(10)

Unnamed: 0,Neighbourhood,Venue,Venue_Category,Venue_Latitude,Venue_Longitude,Cluster
0,The Beaches,Starbucks,Coffee Shop,43.678798,-79.298045,0
1,The Beaches,Grover Pub and Grub,Pub,43.679181,-79.297215,0
2,The Beaches,Upper Beaches,Neighborhood,43.680563,-79.292869,0
3,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",LCBO,Convenience Store,43.686991,-79.399238,0
4,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",The Market By Longo’s,Supermarket,43.686711,-79.399536,0
5,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Starbucks,Coffee Shop,43.687101,-79.398612,0
6,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Daeco Sushi,Sushi Restaurant,43.687838,-79.395652,0
7,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Union Social Eatery,American Restaurant,43.687895,-79.394916,0
8,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Tim Hortons,Coffee Shop,43.687682,-79.39684,0
9,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Raiders E-Sports Centre,Sports Bar,43.687683,-79.395944,0


In [34]:
#cluster 2
df_toronto_cluster1 = df_toronto_venues_cluster[df_toronto_venues_cluster['Cluster']==1].copy()
df_toronto_cluster1.drop(['Neighbourhood_Latitude','Neighbourhood_Longitude'], axis=1, inplace=True)
df_toronto_cluster1.reset_index(inplace=True, drop=True)
df_toronto_cluster1.head(10)

Unnamed: 0,Neighbourhood,Venue,Venue_Category,Venue_Latitude,Venue_Longitude,Cluster
0,"The Danforth West, Riverdale",Pantheon,Greek Restaurant,43.677621,-79.351434,1
1,"The Danforth West, Riverdale",Dolce Gelato,Ice Cream Shop,43.677773,-79.351187,1
2,"The Danforth West, Riverdale",MenEssentials,Cosmetics Shop,43.67782,-79.351265,1
3,"The Danforth West, Riverdale",Messini Authentic Gyros,Greek Restaurant,43.677827,-79.350569,1
4,"The Danforth West, Riverdale",Cafe Fiorentina,Italian Restaurant,43.677743,-79.350115,1
5,"The Danforth West, Riverdale",Mezes,Greek Restaurant,43.677962,-79.350196,1
6,"The Danforth West, Riverdale",Christina's On The Danforth,Greek Restaurant,43.67824,-79.349185,1
7,"The Danforth West, Riverdale",La Diperie,Ice Cream Shop,43.67753,-79.352295,1
8,"The Danforth West, Riverdale",The Big Carrot Natural Food Market,Health Food Store,43.677631,-79.353076,1
9,"The Danforth West, Riverdale",The Auld Spot Pub,Pub,43.677335,-79.35313,1


In [35]:
#cluster 3
df_toronto_cluster2 = df_toronto_venues_cluster[df_toronto_venues_cluster['Cluster']==2].copy()
df_toronto_cluster2.drop(['Neighbourhood_Latitude','Neighbourhood_Longitude'], axis=1, inplace=True)
df_toronto_cluster2.reset_index(inplace=True, drop=True)
df_toronto_cluster2.head(10)

Unnamed: 0,Neighbourhood,Venue,Venue_Category,Venue_Latitude,Venue_Longitude,Cluster
0,"Moore Park, Summerhill East",Loring-Wyle Parkette,Park,43.69027,-79.383438,2
1,"Moore Park, Summerhill East",Ravine,Trail,43.690356,-79.386841,2
2,"Moore Park, Summerhill East",Totum Life Science St. Clair,Gym,43.686525,-79.383449,2
3,"Moore Park, Summerhill East",Moorevale Park,Playground,43.69361,-79.383465,2
4,"Moore Park, Summerhill East",Mount Pleasant Road And Moore,Intersection,43.69356,-79.3846,2
5,Rosedale,Rosedale Park,Playground,43.682328,-79.378934,2
6,Rosedale,Whitney Park,Park,43.682036,-79.373788,2
7,Rosedale,Alex Murray Parkette,Park,43.6783,-79.382773,2
8,Rosedale,Milkman's Lane,Trail,43.676352,-79.373842,2
9,"Forest Hill North, Forest Hill West",Kay Gardner Beltline Trail,Trail,43.700726,-79.410101,2


In [36]:
#cluster 4
df_toronto_cluster3 = df_toronto_venues_cluster[df_toronto_venues_cluster['Cluster']==3].copy()
df_toronto_cluster3.drop(['Neighbourhood_Latitude','Neighbourhood_Longitude'], axis=1, inplace=True)
df_toronto_cluster3.reset_index(inplace=True, drop=True)
df_toronto_cluster3.head(10)

Unnamed: 0,Neighbourhood,Venue,Venue_Category,Venue_Latitude,Venue_Longitude,Cluster
0,Lawrence Park,Lawrence Park Ravine,Park,43.726963,-79.394382,3
1,Lawrence Park,Dim Sum Deluxe,Dim Sum Restaurant,43.726953,-79.39426,3
2,Lawrence Park,Zodiac Swim School,Swim School,43.728532,-79.38286,3
3,Lawrence Park,TTC Bus #162 - Lawrence-Donway,Bus Line,43.728026,-79.382805,3


In [37]:
#cluster 5
df_toronto_cluster4 = df_toronto_venues_cluster[df_toronto_venues_cluster['Cluster']==4].copy()
df_toronto_cluster4.drop(['Neighbourhood_Latitude','Neighbourhood_Longitude'], axis=1, inplace=True)
df_toronto_cluster4.reset_index(inplace=True, drop=True)
df_toronto_cluster4.head(10)

Unnamed: 0,Neighbourhood,Venue,Venue_Category,Venue_Latitude,Venue_Longitude,Cluster
0,Roselawn,Rosalind's Garden Oasis,Garden,43.712189,-79.411978,4


In [38]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, name, category in zip(df_toronto_cluster3['Venue_Latitude'], df_toronto_cluster3['Venue_Longitude'], df_toronto_cluster3['Venue'], df_toronto_cluster3['Venue_Category']):
    label = 'Name: {0}<br>Category: {1}'.format(name, category)
    iframe = folium.IFrame(html=label, width=300, height=100)
    popup = folium.Popup(iframe, parse_html=True)
    folium.CircleMarker([lat, lng],
                        radius=5,
                        popup=popup,
                        color='blue',
                        fill=True,
                        fill_color='blue',
                        fill_opacity=0.7,
                       ).add_to(map_toronto)

map_toronto