<h1 align='center'>Segmenting and Clustering Neighborhoods in Toronto - Part 3</h1>

## Import Libraries

In [1]:
import numpy as np # handle data in vectorized
import pandas as pd # for data analysis
import requests
import json # handle json file
import matplotlib.cm as cm # handle plotting
import matplotlib.colors as colors # handle plotting
#! pip install folium
import folium # map rendering
#! pip install geopy
from geopy.geocoders import Nominatim # use to get the coordinates from a defined address
from sklearn.cluster import KMeans # algorithm use to grouping the data

## Preparing Data

#### getting toronto dataframe from previous part

In [2]:
toronto_ll = pd.read_csv('Toronto_Coordinates.csv') # importing data
print('datafame contains {} rows'.format(toronto_ll.shape[0])) # length of original dataframe
toronto_ll.head() # display the original dataframe

datafame contains 103 rows


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### filtering data - get only borough that include the word 'Toronto'

In [3]:
toronto = toronto_ll[toronto_ll['Borough'].str.contains('Toronto')] # ignore the row without word 'Toronto' on Borough columns
print('datafame contains {} rows'.format(toronto.shape[0])) # length of dataframe after filtering
toronto.reset_index(drop=True, inplace=True)
toronto.head() # display the resulting dataframe

datafame contains 39 rows


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


#### the row without 'Toronto' in dataframe are filtered out and now have only 39 rows

## Getting Venue Data

#### define credentials

In [4]:
CLIENT_ID = 'RANTRTMBVOQOIHLZLEXAH3CXS5WJWVEWZCO0OL0U5EA5NTAX' # foursquare id
CLIENT_SECRET = '53TB4P3VBOZTCQFMKSJ2NAXHVYI5G5KDR5OONTEMTANKW2Y3' # foursquare secret
VERSION = '20180605' # foursquare api version
RADIUS = 500 # radius from a defined coordinates
LIMIT = 100 # limiting result to less than 100

#### getting venue name and their categories via foursquare api

In [5]:
venue_list = []

# getting the information of each venue
for name, lat, lng in zip(toronto['Neighborhood'], toronto['Latitude'], toronto['Longitude']):
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, RADIUS, LIMIT) # define an url
    result = requests.get(url).json()['response']['groups'][0]['items'] # sent get requests

    # getting venues name and their categories from whole items
    for vn in result:
        venue_list.append([name, lat, lng, vn['venue']['name'], vn['venue']['categories'][0]['name']]) # put venue data into list

col = ['Neighborhood', 'Neighborhood Lat', 'Neighborhood Lon', 'Venue Name', 'Venue Categories'] # define columns name
toronto_venue = pd.DataFrame(venue_list, columns = col) # convert all list into dataframe

print('dataframe contain {} neighborhood rows, {} venues, and {} unique categories'.format(
    toronto_venue['Neighborhood'].unique().shape[0], toronto_venue.shape[0], toronto_venue['Venue Categories'].unique().shape[0])) # dataframe information

toronto_venue.head() # display the resulting dataframe

dataframe contain 39 neighborhood rows, 1620 venues, and 237 unique categories


Unnamed: 0,Neighborhood,Neighborhood Lat,Neighborhood Lon,Venue Name,Venue Categories
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Dominion Pub and Kitchen,Pub


#### transform venue categories with one hot encoding method

In [6]:
# creating one hot encoding table from categories column
toronto_venue_cat = pd.get_dummies(toronto_venue['Venue Categories'])

# insert neighborhood column to the table
toronto_venue_cat.insert(0, 'Toronto Neighborhood', toronto_venue['Neighborhood'])

print('dataframe contain {} venues and {} unique categories'.format(toronto_venue_cat.shape[0], toronto_venue_cat.shape[1]-1))
#toronto_venue_cat.head()

dataframe contain 1620 venues and 237 unique categories


#### processing data to get the top ten venue categories in each borough that contain the word 'Toronto'

In [7]:
toronto_grouped = toronto_venue_cat.groupby(toronto_venue_cat['Toronto Neighborhood']).mean().reset_index() # frequency table

num_top_venues = 10 # get top ten venues

# define columns name
col = []
ind = ['st', 'nd', 'rd'] # indices for first three rank '1st', '2nd', '3rd'
for n in range(num_top_venues):
    if n < 3: col.append('{}{} Most Common Venue'.format(n+1, ind[n]))
    else: col.append('{}th Most Common Venue'.format(n+1))

# sorting top ten values, then get their index name (per row) and append it into a list of rows
toronto_sorted = []
for neighbor in range(len(toronto_grouped)):
    toronto_sorted.append(toronto_grouped.iloc[neighbor, 1:].sort_values(ascending=False).index[0:num_top_venues])

toronto_cluster = pd.DataFrame(toronto_sorted, columns=col) # convert a list of row into a dataframe

toronto_cluster.insert(0, 'Neighborhood', toronto['Neighborhood']) # insert neighborhood column to the table

toronto_cluster.head() # display the resulting dataframe

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Cheese Shop,Seafood Restaurant,Café,Restaurant,Greek Restaurant,Irish Pub
1,"Queen's Park, Ontario Provincial Government",Café,Bakery,Coffee Shop,Breakfast Spot,Yoga Studio,Convenience Store,Performing Arts Venue,Pet Store,Climbing Gym,Restaurant
2,"Garden District, Ryerson",Garden,Brewery,Burrito Place,Spa,Light Rail Station,Pizza Place,Butcher,Fast Food Restaurant,Auto Workshop,Farmers Market
3,St. James Town,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boutique,Boat or Ferry,Rental Car Location
4,The Beaches,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Japanese Restaurant,Salad Place,Bubble Tea Shop,Burger Joint,Department Store,Thai Restaurant


## Clustering

#### clustering data with k-means clustering algorithm

In [8]:
# use frequency table (toronto_grouped) as a feature and drop unnecessary columns
toronto_x = toronto_grouped.drop(['Toronto Neighborhood'], axis=1)

# clustering
cluster = 5 # number of clusters
k_means = KMeans(n_clusters=cluster, random_state=4).fit(toronto_x) # fit the model

toronto_cluster.insert(1, 'Cluster', k_means.labels_) # insert the result label into the table
toronto_cluster.head() # display the resulting dataframe

Unnamed: 0,Neighborhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",0,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Cheese Shop,Seafood Restaurant,Café,Restaurant,Greek Restaurant,Irish Pub
1,"Queen's Park, Ontario Provincial Government",0,Café,Bakery,Coffee Shop,Breakfast Spot,Yoga Studio,Convenience Store,Performing Arts Venue,Pet Store,Climbing Gym,Restaurant
2,"Garden District, Ryerson",0,Garden,Brewery,Burrito Place,Spa,Light Rail Station,Pizza Place,Butcher,Fast Food Restaurant,Auto Workshop,Farmers Market
3,St. James Town,0,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boutique,Boat or Ferry,Rental Car Location
4,The Beaches,0,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Japanese Restaurant,Salad Place,Bubble Tea Shop,Burger Joint,Department Store,Thai Restaurant


## Visualizing Result

#### getting Toronto latitude and longitude

In [9]:
address = 'Toronto, Ontario' # define an address
geolocator = Nominatim(user_agent='ny_explorer')
location = geolocator.geocode(address) # getting coordinates
latitude = location.latitude # 43.6534817
longitude = location.longitude # -79.3839347

#### adding the map with neighborhoods and their cluster

In [10]:
# create the map center around 'Toronto, Ontario'
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12) # creating map

# define color array
x = cm.rainbow(np.linspace(0, 1, cluster))
color = [colors.rgb2hex(i) for i in x] # array of colors for each cluster

for neigh, lat, lon, clus in zip(
toronto['Neighborhood'], toronto['Latitude'], toronto['Longitude'], toronto_cluster['Cluster']):
    labels=folium.Popup('{} Cluster {}'.format(neigh, clus), parse_html=True) # define labels
    folium.CircleMarker(
        location=[lat,lon], # location of each neighborhood
        popup=labels, # put label into the popup
        radius=5, # size of marker
        color=color[clus], 
        fill='blue', 
        fill_color=color[clus],
        fill_opacity=0.7,
        ).add_to(map_toronto) # add the marker into the map

map_toronto # display the map

## Explore Clusters

#### Cluster 0 - Include most of the cafe and coffee shops

In [11]:
toronto_cluster[toronto_cluster['Cluster'] == 0]

Unnamed: 0,Neighborhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",0,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Cheese Shop,Seafood Restaurant,Café,Restaurant,Greek Restaurant,Irish Pub
1,"Queen's Park, Ontario Provincial Government",0,Café,Bakery,Coffee Shop,Breakfast Spot,Yoga Studio,Convenience Store,Performing Arts Venue,Pet Store,Climbing Gym,Restaurant
2,"Garden District, Ryerson",0,Garden,Brewery,Burrito Place,Spa,Light Rail Station,Pizza Place,Butcher,Fast Food Restaurant,Auto Workshop,Farmers Market
3,St. James Town,0,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boutique,Boat or Ferry,Rental Car Location
4,The Beaches,0,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Japanese Restaurant,Salad Place,Bubble Tea Shop,Burger Joint,Department Store,Thai Restaurant
5,Berczy Park,0,Grocery Store,Café,Park,Diner,Candy Store,Baby Store,Restaurant,Athletics & Sports,Italian Restaurant,Nightclub
6,Central Bay Street,0,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Yoga Studio,Bubble Tea Shop,Burger Joint,Mediterranean Restaurant,Pub
7,Christie,0,Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Gym,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Vegetarian / Vegan Restaurant
8,"Richmond, Adelaide, King",0,Pizza Place,Dessert Shop,Sandwich Place,Coffee Shop,Italian Restaurant,Café,Sushi Restaurant,Gym,Diner,Indian Restaurant
9,"Dufferin, Dovercourt Village",0,Park,Pizza Place,Breakfast Spot,Sandwich Place,Department Store,Food & Drink Shop,Convenience Store,Hotel,Gym,Concert Hall


#### Cluster 1 - Include park and some outdoor places

In [12]:
toronto_cluster[toronto_cluster['Cluster'] == 1]

Unnamed: 0,Neighborhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,"The Danforth West, Riverdale",1,Trail,Park,Bus Line,Jewelry Store,Sushi Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
20,Davisville North,1,Park,Restaurant,Playground,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
26,Davisville,1,Park,Playground,Trail,Cupcake Shop,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store


#### Cluster 2 - Include some outdoor specific places

In [13]:
toronto_cluster[toronto_cluster['Cluster'] == 2]

Unnamed: 0,Neighborhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,"St. James Town, Cabbagetown",2,Health Food Store,Neighborhood,Asian Restaurant,Pub,Trail,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Yoga Studio


#### Cluster 3 - Include some specific places

In [14]:
toronto_cluster[toronto_cluster['Cluster'] == 3]

Unnamed: 0,Neighborhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,"University of Toronto, Harbord",3,Garden,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store


#### Cluster 4 - Include some outdoor places

In [15]:
toronto_cluster[toronto_cluster['Cluster'] == 4]

Unnamed: 0,Neighborhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Lawrence Park,4,Park,Bus Line,Swim School,Lawyer,Colombian Restaurant,College Gym,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
