In [57]:
import pandas as pd
import numpy as np
import json
import math
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [3]:
dfs = pd.read_html('toronto.txt')
df = dfs[0]
# Replacing "Not Assigned" value in Borough column and dropping the rows without an assigned borough
df.replace('Not assigned', math.nan,inplace=True)
df.dropna(subset=['Borough'],inplace=True)
#Making Neighborhoods segemented by commas
df['Neighborhood'] = df['Neighborhood'].str.replace(' / ',', ')
df.shape


(103, 3)

In [4]:
geodf = pd.read_csv('geodata.csv')
geodf

# Iterate through the Postal Code column and append the Lat & Long to the corresponding column on main df
headers_list = ['Postal code','Borough',"Neighborhood","Latitude","Longitude"]
df = df.reindex(columns=headers_list)
for x in range(len(geodf)):
    for i in range(len(geodf)):
        if df["Postal code"].iloc[x] == geodf["Postal Code"].iloc[i]:
            df["Latitude"].iloc[x] = geodf["Latitude"].iloc[i]
            df["Longitude"].iloc[x] = geodf["Longitude"].iloc[i]
df.head(1)

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656


In [5]:
#Adding Toronto Map Location
map = folium.Map(location=['43.651070', '-79.347015'], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
# map

In [7]:
import foursquare

client = foursquare.Foursquare(client_id='PQIS1IDQFXR4FBSRDYLFSEC0D44BRMVUDYZ2MYIIPQZ3MJVY', client_secret='1KUVF5Y11WW1WHQR25QN3Y44PASIFT5EWGNIUGHCOOIZYKNJ')
headers_list = ['Postal code','Borough',"Neighborhood","Latitude","Longitude","Average Price"]
df = df.reindex(columns=headers_list)

# Looping through each neighboorhood and averaging the price of the top 4 recommendations 

for x in range(len(df)):
    priceSum = 0
    integerCount=0
    latitude = str(df['Latitude'].iloc[x])
    longitude = str(df['Longitude'].iloc[x])
    print((latitude+','+longitude))
    test = client.venues.explore(params={'ll':(latitude+','+longitude),'limit':4})
    for i in test['groups'][0]['items']:
        # print(i['venue']['id'])
        try:
            venueid = i['venue']['id']
            price = client.venues(VENUE_ID=venueid)['venue']['price']['tier']
            print(price)
            priceSum += price
            integerCount += 1
        except:
            print("No price value available")
    if integerCount > 0:
        avgPrice = priceSum/integerCount
    else:
        avgPrice=0
    df['Average Price'].iloc[x] = avgPrice



43.7532586,-79.3296565
2
No price value available
2
No price value available
43.725882299999995,-79.31557159999998
2
1
No price value available
2
43.6542599,-79.3606359
1
1
2
No price value available
43.718517999999996,-79.46476329999999
No price value available
2
2
No price value available
43.6623015,-79.3894938
No price value available
1
2
3
43.6678556,-79.53224240000002
1
No price value available
3
No price value available
43.806686299999996,-79.19435340000001
No price value available
No price value available
No price value available
No price value available
43.745905799999996,-79.352188
2
1
2
No price value available
43.7063972,-79.309937
No price value available
2
No price value available
1
43.6571618,-79.37893709999999
No price value available
2
No price value available
No price value available
43.709577,-79.44507259999999
No price value available
2
No price value available
2
43.6509432,-79.55472440000001
No price value available
1
No price value available
1
43.7845351,-79.160497

In [49]:
from sklearn.cluster import KMeans 

kclusters = 4
# Making clusters
dfData=df.drop(columns=['Neighborhood','Borough','Postal code'])
dfData
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dfData)
kmeans.labels_
df.insert(0,'Cluster Labels',kmeans.labels_)

array([0, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 2, 2, 2, 2, 3, 0, 2, 2, 3, 3, 0,
       2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 1, 3, 2, 2, 1, 0, 0, 0,
       0, 0, 3, 0, 0, 3, 3, 2, 2, 3, 0, 2, 0, 2, 2, 3, 1, 2, 0, 0, 2, 2,
       3, 0, 0, 3, 2, 2, 0, 2, 0, 2, 2, 3, 2, 2, 0, 0, 0, 2, 2, 2, 0, 1,
       0, 0, 2, 2, 3, 0, 0, 1, 0, 0, 3, 2, 2, 2, 2])

In [73]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.gist_rainbow_r(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df['Latitude'], df['Longitude'], df['Neighborhood'], df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters),
        
       
map_clusters

<img src="clusterMap.png">