# Ontario Cities Capstone Project

Goal of project is to examine cities in Ontario and evaluate common popular venues by clustering each city into different categories

In [141]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests

## Data Collection

Grab the table of cities in Ontario from wikipedia

In [142]:
url = 'https://en.wikipedia.org/wiki/List_of_cities_in_Ontario'

Clean the table and place it in the df DataFrame

In [143]:
source = requests.get(url).text
soup = BeautifulSoup(source,'lxml')
table = soup.find('table').text
table = soup.find_all('table')
df = pd.read_html(str(table).strip())[0]
df.columns = ['City','Status','Division','Population (2016)','Population (2011)','Change','Area','Density']
df['City'] = df['City'].map(lambda x: (x.split("["))[0])

In [144]:
# Remove the total cities row
df.drop(df.tail(1).index,inplace=True)

Drop Clarence-Rockland as Foursquares does not register any venues

In [145]:
df = df[df.City != 'Sarnia']
df = df[df.City != 'Clarence-Rockland']
df = df[df.City != 'Quinte West']

In [146]:
df.head()

Unnamed: 0,City,Status,Division,Population (2016),Population (2011),Change,Area,Density
0,Barrie,Single-tier,Simcoe,141434,136063,3.9,99.04,1428.0
1,Belleville,Single-tier,Hastings,50716,49454,2.6,247.25,205.1
2,Brampton,Lower-tier,Peel,593638,523911,13.3,266.36,2228.7
3,Brant,Single-tier,Brant,36707,35638,3.0,843.25,43.5
4,Brantford,Single-tier,Brant,97496,93650,4.1,72.44,1345.9


Add the longitute and latitude value from the mapquest api

In [147]:
# Map quest credientials
mapquest_key = 'GM4fJLkCuA5Ea1xoU4r14ANevZ76RGcq'
mapquest_secret = 'OP7bhP07rJQUo1qn'

In [148]:
for index, row in df.iterrows():
    location = row['City'] + ", ON"
    url = 'https://www.mapquestapi.com/geocoding/v1/address?key={}&inFormat=kvp&outFormat=json&location={}&thumbMaps=false'.format(
    mapquest_key, 
    location)
    response = requests.get(url).json()
    df.at[index,'Latitude'] = response['results'][0]['locations'][0]['latLng']['lat']
    df.at[index,'Longitude'] = response['results'][0]['locations'][0]['latLng']['lng']
    #df.to_csv('OntarioCities.csv', sep=',',index=False)

In [149]:
df.head()

Unnamed: 0,City,Status,Division,Population (2016),Population (2011),Change,Area,Density,Latitude,Longitude
0,Barrie,Single-tier,Simcoe,141434,136063,3.9,99.04,1428.0,44.38934,-79.685418
1,Belleville,Single-tier,Hastings,50716,49454,2.6,247.25,205.1,44.164013,-77.382466
2,Brampton,Lower-tier,Peel,593638,523911,13.3,266.36,2228.7,43.68402,-79.759046
3,Brant,Single-tier,Brant,36707,35638,3.0,843.25,43.5,43.101486,-80.428403
4,Brantford,Single-tier,Brant,97496,93650,4.1,72.44,1345.9,43.139405,-80.263646


Create a subset DataFrame that only contains the city and the latitude and longitude

In [150]:
df_ontario = df[['City','Latitude','Longitude']]

In [151]:
df_ontario.head()

Unnamed: 0,City,Latitude,Longitude
0,Barrie,44.38934,-79.685418
1,Belleville,44.164013,-77.382466
2,Brampton,43.68402,-79.759046
3,Brant,43.101486,-80.428403
4,Brantford,43.139405,-80.263646


## Visualizing Data

In [152]:
import folium
latitude = 48.7
longitude = -80.3

In [153]:
map_ontario = folium.Map(location=[latitude, longitude], zoom_start=4)

# add markers to map
for lat, lng, name in zip(df['Latitude'], df['Longitude'], df['City']):
    label = '{}, Ontario'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='white',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ontario)  

In [154]:
map_ontario

## Using FourSquare API

In [155]:
foursquare_id = 'OXWFKNWJSZGHZ3RSYHICFVVCVX3ZHK0Y1QIFY5VFZ0WQCXBO' # your Foursquare ID
foursquare_secret = 'KDVR1PB4ZBHOCPIAPPAVID05YD5KCINHO3BJDTDPNTSLVLLL' # your Foursquare Secret
version = '20180605' # Foursquare API version

LIMIT = 100
#radius = 500 # was previously 500

In [156]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            foursquare_id, 
            foursquare_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [157]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [158]:
ontario_venues = getNearbyVenues(names=df['City'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Barrie
Belleville
Brampton
Brant
Brantford
Brockville
Burlington
Cambridge
Cornwall
Dryden
Elliot Lake
Greater Sudbury
Guelph
Haldimand County
Hamilton
Kawartha Lakes
Kenora
Kingston
Kitchener
London
Markham
Mississauga
Niagara Falls
Norfolk County
North Bay
Orillia
Oshawa
Ottawa
Owen Sound
Pembroke
Peterborough
Pickering
Port Colborne
Prince Edward County
Richmond Hill
Sault Ste. Marie
St. Catharines
St. Thomas
Stratford
Temiskaming Shores
Thorold
Thunder Bay
Timmins
Toronto
Vaughan
Waterloo
Welland
Windsor
Woodstock


In [159]:
print(ontario_venues.shape)
print(ontario_venues.head())
count = ontario_venues.groupby('City').count()


(1152, 7)
     City  City Latitude  City Longitude                           Venue  \
0  Barrie       44.38934      -79.685418    Flying Monkeys Craft Brewery   
1  Barrie       44.38934      -79.685418                   Heritage Park   
2  Barrie       44.38934      -79.685418  Donaleigh's Irish Public House   
3  Barrie       44.38934      -79.685418          Swirleez Frozen Yogurt   
4  Barrie       44.38934      -79.685418           Kenzington Burger Bar   

   Venue Latitude  Venue Longitude      Venue Category  
0       44.389279       -79.686682             Brewery  
1       44.388158       -79.687047                Park  
2       44.389344       -79.689112                 Pub  
3       44.388950       -79.687343  Frozen Yogurt Shop  
4       44.389292       -79.688787                 Bar  


In [160]:
print(count.shape)
count

(48, 6)


Unnamed: 0_level_0,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barrie,30,30,30,30,30,30
Belleville,13,13,13,13,13,13
Brampton,21,21,21,21,21,21
Brant,4,4,4,4,4,4
Brantford,28,28,28,28,28,28
Brockville,13,13,13,13,13,13
Burlington,59,59,59,59,59,59
Cambridge,7,7,7,7,7,7
Cornwall,16,16,16,16,16,16
Dryden,6,6,6,6,6,6


Remove cities that have less than 5 venues

In [161]:
df_subset_cities = count.loc[count['Venue'] >= 5]

In [162]:
df_subset_cities

Unnamed: 0_level_0,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barrie,30,30,30,30,30,30
Belleville,13,13,13,13,13,13
Brampton,21,21,21,21,21,21
Brantford,28,28,28,28,28,28
Brockville,13,13,13,13,13,13
Burlington,59,59,59,59,59,59
Cambridge,7,7,7,7,7,7
Cornwall,16,16,16,16,16,16
Dryden,6,6,6,6,6,6
Greater Sudbury,19,19,19,19,19,19


In [163]:
df_subset_cities.shape

(42, 6)

In [164]:
ontario_venues = pd.merge(df_subset_cities,ontario_venues,on='City')

In [165]:
ontario_venues

Unnamed: 0,City,City Latitude_x,City Longitude_x,Venue_x,Venue Latitude_x,Venue Longitude_x,Venue Category_x,City Latitude_y,City Longitude_y,Venue_y,Venue Latitude_y,Venue Longitude_y,Venue Category_y
0,Barrie,30,30,30,30,30,30,44.389340,-79.685418,Flying Monkeys Craft Brewery,44.389279,-79.686682,Brewery
1,Barrie,30,30,30,30,30,30,44.389340,-79.685418,Heritage Park,44.388158,-79.687047,Park
2,Barrie,30,30,30,30,30,30,44.389340,-79.685418,Donaleigh's Irish Public House,44.389344,-79.689112,Pub
3,Barrie,30,30,30,30,30,30,44.389340,-79.685418,Swirleez Frozen Yogurt,44.388950,-79.687343,Frozen Yogurt Shop
4,Barrie,30,30,30,30,30,30,44.389340,-79.685418,Kenzington Burger Bar,44.389292,-79.688787,Bar
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1127,Woodstock,19,19,19,19,19,19,43.130387,-80.754677,Fritzie's,43.128463,-80.759202,Hot Dog Joint
1128,Woodstock,19,19,19,19,19,19,43.130387,-80.754677,Foodland - Woodstock,43.132491,-80.749727,Grocery Store
1129,Woodstock,19,19,19,19,19,19,43.130387,-80.754677,Zellers,43.132231,-80.749408,Department Store
1130,Woodstock,19,19,19,19,19,19,43.130387,-80.754677,VIA Rail Woodstock,43.126503,-80.752256,Train Station


Now clean the merged dataframe by taking the necessary columns and renaming

In [166]:
ontario_venues = ontario_venues[['City','City Latitude_y','City Longitude_y','Venue_y','Venue Latitude_y','Venue Longitude_y','Venue Category_y']]
ontario_venues.rename(columns={'City Latitude_y':'City Latitude','City Longitude_y':'City Longitude','Venue_y':'Venue','Venue Latitude_y':'Venue Latitude','Venue Longitude_y':'Venue Longitude','Venue Category_y':'Venue Category'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [167]:
# one hot encoding
ontario_onehot = pd.get_dummies(ontario_venues[['Venue Category']], prefix="", prefix_sep="")
print(ontario_onehot)
# add city column back to dataframe
ontario_onehot['City'] = ontario_venues['City'] 

# move city column to the first column
fixed_columns = [ontario_onehot.columns[-1]] + list(ontario_onehot.columns[:-1])
ontario_onehot = ontario_onehot[fixed_columns]

ontario_onehot.head()

      ATM  Accessories Store  American Restaurant  Arcade  Art Gallery  \
0       0                  0                    0       0            0   
1       0                  0                    0       0            0   
2       0                  0                    0       0            0   
3       0                  0                    0       0            0   
4       0                  0                    0       0            0   
...   ...                ...                  ...     ...          ...   
1127    0                  0                    0       0            0   
1128    0                  0                    0       0            0   
1129    0                  0                    0       0            0   
1130    0                  0                    0       0            0   
1131    0                  0                    0       0            0   

      Art Museum  Arts & Crafts Store  Asian Restaurant  BBQ Joint  Bakery  \
0              0                 

Unnamed: 0,City,ATM,Accessories Store,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,...,Tunnel,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Barrie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Barrie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Barrie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Barrie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Barrie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group each rows by the city and tally mean of the frequency for each venue category

In [168]:
ontario_grouped = ontario_onehot.groupby('City').mean().reset_index()
ontario_grouped

Unnamed: 0,City,ATM,Accessories Store,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,BBQ Joint,...,Tunnel,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Barrie,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Belleville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Brampton,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,...,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0
3,Brantford,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Brockville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Burlington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,...,0.0,0.0,0.033898,0.0,0.0,0.016949,0.0,0.0,0.0,0.0
6,Cambridge,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Cornwall,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0
8,Dryden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Greater Sudbury,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0


Print the top ten categories for each city

In [169]:
num_top_venues = 5

for hood in ontario_grouped['City']:
    print("----"+hood+"----")
    temp = ontario_grouped[ontario_grouped['City'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Barrie----
             venue  freq
0              Pub  0.10
1      Coffee Shop  0.07
2             Café  0.07
3  Thai Restaurant  0.07
4              Bar  0.07


----Belleville----
                           venue  freq
0                     Steakhouse  0.08
1  Vegetarian / Vegan Restaurant  0.08
2              French Restaurant  0.08
3                   Concert Hall  0.08
4                     Beer Store  0.08


----Brampton----
                venue  freq
0  Italian Restaurant  0.10
1                Bank  0.10
2         Coffee Shop  0.10
3      Sandwich Place  0.10
4      Farmers Market  0.05


----Brantford----
                  venue  freq
0           Coffee Shop  0.11
1            Restaurant  0.07
2  Fast Food Restaurant  0.07
3                  Café  0.07
4        Sandwich Place  0.07


----Brockville----
                venue  freq
0                 Pub  0.15
1            Pharmacy  0.15
2     Harbor / Marina  0.15
3  Italian Restaurant  0.08
4              Bakery  0.08


--

Turning this into a DataFrame

In [170]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [171]:

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = ontario_grouped['City']

for ind in np.arange(ontario_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ontario_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Barrie,Pub,Bar,Coffee Shop,Vegetarian / Vegan Restaurant,Café
1,Belleville,Ice Cream Shop,Record Shop,Beer Store,Thai Restaurant,Café
2,Brampton,Italian Restaurant,Bank,Coffee Shop,Sandwich Place,Pub
3,Brantford,Coffee Shop,Restaurant,Pizza Place,Café,Sandwich Place
4,Brockville,Harbor / Marina,Pharmacy,Pub,BBQ Joint,Steakhouse
5,Burlington,Coffee Shop,Pub,Restaurant,Bank,Mediterranean Restaurant
6,Cambridge,Insurance Office,Locksmith,General Entertainment,Pharmacy,Rental Car Location
7,Cornwall,Coffee Shop,Shopping Mall,Juice Bar,Eastern European Restaurant,Fast Food Restaurant
8,Dryden,Sandwich Place,Beer Store,Pharmacy,Supermarket,Bank
9,Greater Sudbury,Coffee Shop,Hotel,Café,Breakfast Spot,Rock Club


In [172]:
from sklearn.cluster import KMeans

In [173]:
# set number of clusters
kclusters = 6

ontario_grouped_clustering = ontario_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ontario_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:50] 

array([4, 4, 3, 4, 5, 3, 3, 3, 5, 3, 4, 4, 4, 2, 3, 4, 3, 1, 3, 3, 5, 3,
       5, 3, 4, 4, 4, 5, 4, 3, 4, 4, 0, 3, 0, 1, 3, 2, 4, 5, 4, 3])

In [174]:
# add clustering labels
city_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ontario_merged = df_ontario

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
ontario_merged = ontario_merged.join(city_venues_sorted.set_index('City'), on='City')
ontario_merged = ontario_merged.dropna()


In [175]:
ontario_merged # check the last columns!

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Barrie,44.38934,-79.685418,4.0,Pub,Bar,Coffee Shop,Vegetarian / Vegan Restaurant,Café
1,Belleville,44.164013,-77.382466,4.0,Ice Cream Shop,Record Shop,Beer Store,Thai Restaurant,Café
2,Brampton,43.68402,-79.759046,3.0,Italian Restaurant,Bank,Coffee Shop,Sandwich Place,Pub
4,Brantford,43.139405,-80.263646,4.0,Coffee Shop,Restaurant,Pizza Place,Café,Sandwich Place
5,Brockville,44.58958,-75.684303,5.0,Harbor / Marina,Pharmacy,Pub,BBQ Joint,Steakhouse
6,Burlington,43.325991,-79.798296,3.0,Coffee Shop,Pub,Restaurant,Bank,Mediterranean Restaurant
7,Cambridge,43.397792,-80.307223,3.0,Insurance Office,Locksmith,General Entertainment,Pharmacy,Rental Car Location
9,Cornwall,45.018258,-74.728577,3.0,Coffee Shop,Shopping Mall,Juice Bar,Eastern European Restaurant,Fast Food Restaurant
10,Dryden,49.782056,-92.834388,5.0,Sandwich Place,Beer Store,Pharmacy,Supermarket,Bank
12,Greater Sudbury,46.48952,-80.989155,3.0,Coffee Shop,Hotel,Café,Breakfast Spot,Rock Club


In [176]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib import cm

In [177]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ontario_merged['Latitude'], ontario_merged['Longitude'], ontario_merged['City'], ontario_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [178]:
map_clusters

Cluster 0 - Pizza Cities 

In [179]:
cluster0 = ontario_merged.loc[ontario_merged['Cluster Labels'] == 0, ontario_merged.columns[[0] + [1] + list(range(2, ontario_merged.shape[1]))]]
cluster0

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
40,St. Thomas,42.775142,-81.185978,0.0,Pizza Place,Museum,Coffee Shop,American Restaurant,Convenience Store
44,Thunder Bay,48.38251,-89.245486,0.0,Pizza Place,Pharmacy,Discount Store,Convenience Store,Burger Joint


In [191]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cluster0['Latitude'], cluster0['Longitude'], cluster0['City'], cluster0['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters


Cluster 1 - Restaurant Cities

In [181]:
cluster1 = ontario_merged.loc[ontario_merged['Cluster Labels'] == 1, ontario_merged.columns[[0] + [1] + list(range(2, ontario_merged.shape[1]))]]
cluster1

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
21,Markham,43.873987,-79.259867,1.0,Italian Restaurant,Coffee Shop,Ice Cream Shop,Bakery,Café
45,Timmins,48.476115,-81.328332,1.0,Italian Restaurant,Wings Joint,Ice Cream Shop,Supermarket,Bank


In [192]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cluster1['Latitude'], cluster1['Longitude'], cluster1['City'], cluster1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 2 - Outlier City

In [187]:
cluster2 = ontario_merged.loc[ontario_merged['Cluster Labels'] == 2, ontario_merged.columns[[0] + [1] + list(range(2, ontario_merged.shape[1]))]]
cluster2

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
17,Kenora,49.765258,-94.477687,2.0,Hotel,Fast Food Restaurant,Grocery Store,Sandwich Place,Yoga Studio
47,Vaughan,43.850443,-79.511368,2.0,Fast Food Restaurant,American Restaurant,Pizza Place,Music Venue,Park


In [193]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cluster2['Latitude'], cluster2['Longitude'], cluster2['City'], cluster2['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 3 - Coffee Shop Cities

In [189]:
cluster3 = ontario_merged.loc[ontario_merged['Cluster Labels'] == 3, ontario_merged.columns[[0] + [1] + list(range(2, ontario_merged.shape[1]))]]
cluster3

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Brampton,43.68402,-79.759046,3.0,Italian Restaurant,Bank,Coffee Shop,Sandwich Place,Pub
6,Burlington,43.325991,-79.798296,3.0,Coffee Shop,Pub,Restaurant,Bank,Mediterranean Restaurant
7,Cambridge,43.397792,-80.307223,3.0,Insurance Office,Locksmith,General Entertainment,Pharmacy,Rental Car Location
9,Cornwall,45.018258,-74.728577,3.0,Coffee Shop,Shopping Mall,Juice Bar,Eastern European Restaurant,Fast Food Restaurant
12,Greater Sudbury,46.48952,-80.989155,3.0,Coffee Shop,Hotel,Café,Breakfast Spot,Rock Club
18,Kingston,44.230468,-76.481187,3.0,Pub,Coffee Shop,Italian Restaurant,Hotel,Café
20,London,42.984267,-81.247534,3.0,Indian Restaurant,Italian Restaurant,Hotel,Bookstore,Coffee Shop
22,Mississauga,43.586162,-79.646215,3.0,Performing Arts Venue,Café,Yoga Studio,Ramen Restaurant,Bank
23,Niagara Falls,43.106641,-79.065209,3.0,Bus Station,Train Station,Pharmacy,General Entertainment,Beer Store
26,Orillia,44.611477,-79.415624,3.0,Café,Bookstore,Pharmacy,Pizza Place,Pub


In [194]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cluster3['Latitude'], cluster3['Longitude'], cluster3['City'], cluster3['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 4 - Cafe and Bar

In [195]:
cluster4 = ontario_merged.loc[ontario_merged['Cluster Labels'] == 4, ontario_merged.columns[[0] + [1] + list(range(2, ontario_merged.shape[1]))]]
cluster4

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Barrie,44.38934,-79.685418,4.0,Pub,Bar,Coffee Shop,Vegetarian / Vegan Restaurant,Café
1,Belleville,44.164013,-77.382466,4.0,Ice Cream Shop,Record Shop,Beer Store,Thai Restaurant,Café
4,Brantford,43.139405,-80.263646,4.0,Coffee Shop,Restaurant,Pizza Place,Café,Sandwich Place
13,Guelph,43.544774,-80.248091,4.0,Café,Diner,Restaurant,Pub,Indian Restaurant
15,Hamilton,43.256503,-79.874423,4.0,Restaurant,Coffee Shop,Sandwich Place,Café,Bar
16,Kawartha Lakes,44.355002,-78.742927,4.0,Coffee Shop,Café,Pizza Place,American Restaurant,Pharmacy
19,Kitchener,43.449787,-80.489089,4.0,Café,Sandwich Place,Vietnamese Restaurant,Restaurant,Coffee Shop
29,Owen Sound,44.573961,-80.923275,4.0,Restaurant,Fast Food Restaurant,Gas Station,Café,Pharmacy
31,Peterborough,44.305913,-78.320087,4.0,Café,Mexican Restaurant,Restaurant,Pub,Bank
32,Pickering,43.834365,-79.082022,4.0,Restaurant,Clothing Store,Movie Theater,Fast Food Restaurant,Department Store


In [196]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cluster4['Latitude'], cluster4['Longitude'], cluster4['City'], cluster4['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 5: Sandwich Place

In [197]:
cluster5 = ontario_merged.loc[ontario_merged['Cluster Labels'] == 5, ontario_merged.columns[[0] + [1] + list(range(2, ontario_merged.shape[1]))]]
cluster5

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
5,Brockville,44.58958,-75.684303,5.0,Harbor / Marina,Pharmacy,Pub,BBQ Joint,Steakhouse
10,Dryden,49.782056,-92.834388,5.0,Sandwich Place,Beer Store,Pharmacy,Supermarket,Bank
25,North Bay,46.309464,-79.46163,5.0,Restaurant,Sandwich Place,Gym,Indian Restaurant,Bank
27,Oshawa,43.896085,-78.865128,5.0,Sandwich Place,Italian Restaurant,Theater,Indian Restaurant,Fast Food Restaurant
33,Port Colborne,42.886251,-79.251371,5.0,Gas Station,Thai Restaurant,Supermarket,Seafood Restaurant,Sandwich Place
49,Welland,42.993507,-79.228177,5.0,Sandwich Place,Furniture / Home Store,Chinese Restaurant,Tunnel,Coffee Shop


In [198]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.Set1(np.linspace(0, 1, len(ys)))
tab10 = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cluster5['Latitude'], cluster5['Longitude'], cluster5['City'], cluster5['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=tab10[int(cluster-1)],
        fill=True,
        fill_color=tab10[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters