In [1]:
import requests
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium

In [2]:
data = pd.read_csv("/content/Toronto_Coordinates_data_with_ZipCode.csv")
print("Toronto data has {} boroughs and {} neighborhoods".format(len(data['Borough'].unique()), data.shape[0]))

Toronto data has 15 boroughs and 103 neighborhoods


In [3]:
address = "Toronto"
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [4]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(data.Latitude, data.Longitude, data.Borough, data.Neighborhood):
  label = "{}, {}".format(neighborhood, borough)
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker([lat, lng], radius=10, color="blue", popup=label, fill=True, fill_color="red", fill_opacity=0.6).add_to(map_toronto)
map_toronto

In [5]:
CLIENT_ID = "5BGRRQUUPOFG3NAJLCH2NHRG4E4RJAJHAWT5V4TAFSLAHNBN"
CLIENT_SECRET = "QYESNXMCCMEWWTXPHOC2CDUKJIWC1U0HSYCWCRX2Z1VO43UM"
ACCESS_TOKEN = "G5OECRZFCQBHPYXW2VDEHZDIK4VR0TWK5PWDWZLCAXZMTFZE"
VERSION = "20180604"
LIMIT = 30

In [6]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [7]:
toronto_venues = getNearbyVenues(data.Neighborhood, data.Latitude, data.Longitude)

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

In [8]:
toronto_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",23,23,23,23,23,23
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",26,26,26,26,26,26
...,...,...,...,...,...,...
Willowdale South,30,30,30,30,30,30
Willowdale West,5,5,5,5,5,5
Woburn,3,3,3,3,3,3
Woodbine Heights,8,8,8,8,8,8


In [9]:
print("There are {} unique categories".format(len(toronto_venues['Venue Category'].unique())))

There are 235 unique categories


In [10]:
toronto_one_hot = pd.get_dummies(toronto_venues[["Venue Category"]], prefix="", prefix_sep="")
toronto_one_hot["Neighborhood"] = toronto_venues["Neighborhood"]
fixed_columns = [toronto_one_hot.columns[-1]] + list(toronto_one_hot.columns[:-1])
toronto_one_hot = toronto_one_hot[fixed_columns]
toronto_one_hot.head()
toronto_one_hot.shape

(1340, 235)

In [11]:
toronto_grouped = toronto_one_hot.groupby("Neighborhood").mean().reset_index()
toronto_grouped
toronto_grouped.shape

(98, 235)

In [12]:
top_venues_number = 5
for hood in toronto_grouped["Neighborhood"]:
  print("----------{}----------".format(hood))
  temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
  temp.columns = ['venue','freq']
  temp = temp.iloc[1:]
  temp['freq'] = temp['freq'].astype(float)
  temp = temp.round({'freq': 2})
  print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(top_venues_number))
  print('\n')

----------Agincourt----------
                       venue  freq
0                     Lounge  0.25
1  Latin American Restaurant  0.25
2             Breakfast Spot  0.25
3               Skating Rink  0.25
4                Yoga Studio  0.00


----------Alderwood, Long Branch----------
          venue  freq
0   Pizza Place  0.22
1   Coffee Shop  0.11
2  Skating Rink  0.11
3      Pharmacy  0.11
4          Pool  0.11


----------Bathurst Manor, Wilson Heights, Downsview North----------
           venue  freq
0    Coffee Shop  0.09
1           Bank  0.09
2           Park  0.04
3   Intersection  0.04
4  Shopping Mall  0.04


----------Bayview Village----------
                 venue  freq
0   Chinese Restaurant  0.25
1                 Bank  0.25
2                 Café  0.25
3  Japanese Restaurant  0.25
4          Yoga Studio  0.00


----------Bedford Park, Lawrence Manor East----------
                venue  freq
0  Italian Restaurant  0.12
1         Pizza Place  0.08
2         Coffee Shop  

In [13]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [14]:
top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Coffee Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
1,"Alderwood, Long Branch",Pizza Place,Pool,Coffee Shop,Skating Rink,Pharmacy,Gym,Pub,Sandwich Place,Women's Store,Creperie
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Health Food Store,Shopping Mall,Diner,Bridal Shop,Deli / Bodega,Restaurant,Intersection,Ice Cream Shop
3,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Women's Store,Curling Ice,Donut Shop,Dog Run,Distribution Center,Discount Store
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Pizza Place,Restaurant,Sandwich Place,Sushi Restaurant,Thai Restaurant,Liquor Store,Juice Bar,Indian Restaurant


In [15]:
from sklearn.cluster import KMeans
clusters_number = 5
toronto_grouped_clustering = toronto_grouped.drop("Neighborhood", 1)
kmeans = KMeans(n_clusters=clusters_number, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 2], dtype=int32)

In [16]:
neighborhoods_venues_sorted.insert(0, "Cluster labels", kmeans.labels_)
toronto_merged = data
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index("Neighborhood"), on="Neighborhood")
toronto_merged.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,0,M3A,North York,Parkwoods,43.753259,-79.329656,2.0,Park,Fast Food Restaurant,Food & Drink Shop,Women's Store,Cuban Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
1,1,1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Portuguese Restaurant,Coffee Shop,French Restaurant,Hockey Arena,Dessert Shop,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Women's Store
2,2,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0.0,Coffee Shop,Bakery,Park,Breakfast Spot,Theater,Restaurant,Pub,Café,Chocolate Shop,Yoga Studio
3,3,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0.0,Clothing Store,Accessories Store,Furniture / Home Store,Women's Store,Boutique,Miscellaneous Shop,Arts & Crafts Store,Coffee Shop,Vietnamese Restaurant,College Stadium
4,4,4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,0.0,Coffee Shop,Sushi Restaurant,Fried Chicken Joint,Gym,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Salad Place,Burger Joint


In [17]:
toronto_merged.dropna(inplace=True)

In [18]:
toronto_merged["Cluster labels"] = toronto_merged["Cluster labels"].astype("int")

In [19]:
import matplotlib.cm as cm
import matplotlib.colors as colors
x = np.arange(clusters_number)
ys = [i + x + (i*x)**2 for i in range(clusters_number)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
markers_colors = []
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### ***Exploring Clusters***

## ***Cluster-1***

In [20]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 0.1,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,1,43.725882,-79.315572,0,Portuguese Restaurant,Coffee Shop,French Restaurant,Hockey Arena,Dessert Shop,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Women's Store
2,2,43.654260,-79.360636,0,Coffee Shop,Bakery,Park,Breakfast Spot,Theater,Restaurant,Pub,Café,Chocolate Shop,Yoga Studio
3,3,43.718518,-79.464763,0,Clothing Store,Accessories Store,Furniture / Home Store,Women's Store,Boutique,Miscellaneous Shop,Arts & Crafts Store,Coffee Shop,Vietnamese Restaurant,College Stadium
4,4,43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Fried Chicken Joint,Gym,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Salad Place,Burger Joint
6,6,43.806686,-79.194353,0,Fast Food Restaurant,Women's Store,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,98,43.653654,-79.506944,0,Smoke Shop,River,Women's Store,Cocktail Bar,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
99,99,43.665860,-79.383160,0,Sushi Restaurant,Bubble Tea Shop,Beer Bar,Indian Restaurant,Ice Cream Shop,Ramen Restaurant,Dance Studio,Italian Restaurant,Escape Room,Ethiopian Restaurant
100,100,43.662744,-79.321558,0,Light Rail Station,Yoga Studio,Comic Shop,Spa,Brewery,Burrito Place,Restaurant,Farmers Market,Fast Food Restaurant,Skate Park
101,101,43.636258,-79.498509,0,Baseball Field,Deli / Bodega,Women's Store,Electronics Store,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner


## ***Cluster-2***

In [21]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 0.1,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,11,43.650943,-79.554724,1,Bakery,Women's Store,Electronics Store,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant


## ***Cluster-3***

In [22]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 0.1,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,43.753259,-79.329656,2,Park,Fast Food Restaurant,Food & Drink Shop,Women's Store,Cuban Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
21,21,43.689026,-79.453512,2,Park,Pool,Women's Store,General Entertainment,Gay Bar,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
35,35,43.685347,-79.338106,2,Park,Metro Station,Convenience Store,Women's Store,Cuban Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
61,61,43.72802,-79.38879,2,Park,Swim School,Bus Line,Drugstore,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
66,66,43.752758,-79.400049,2,Park,Convenience Store,Women's Store,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
77,77,43.688905,-79.554724,2,Park,Sandwich Place,Mobile Phone Shop,College Auditorium,College Gym,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop


## ***Cluster-4***

In [23]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 0.1,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,32,43.744734,-79.239476,3,Playground,Women's Store,Coworking Space,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
40,40,43.737473,-79.464763,3,Park,Airport,Construction & Landscaping,Women's Store,Cuban Restaurant,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
49,49,43.713756,-79.490074,3,Bakery,Park,Basketball Court,Construction & Landscaping,Trail,Women's Store,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant
68,68,43.696948,-79.411307,3,Park,Jewelry Store,Trail,Sushi Restaurant,Women's Store,Cuban Restaurant,Dog Run,Distribution Center,Discount Store,Diner
83,83,43.689574,-79.38316,3,Park,Playground,Restaurant,College Arts Building,Coworking Space,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
85,85,43.815252,-79.284577,3,Park,Playground,Intersection,Creperie,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
91,91,43.679563,-79.377529,3,Park,Playground,Trail,Cosmetics Shop,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store


## ***Cluster-5***

In [24]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Unnamed: 0.1,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,12,43.784535,-79.160497,4,Bar,Women's Store,Electronics Store,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
