### Import required libraries for exploring and clustering neighborhood in Toronto

In [1]:
import pandas as pd
from pandas.io.json import json_normalize
import requests
import numpy as np

import json

!pip install geopy 
from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!pip install folium
import folium



I have saved the previous dataframe as **canada_neighborhood_locations.csv** to import it here

In [2]:
# Import Canda DataFrame CSV File (That I told you I save in the previous notebook)
canada_neighborhood_locations = pd.DataFrame().from_csv("canada_neighborhood_locations.csv")
canada_neighborhood_locations.head()

  


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054
1,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
2,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9N,York,Weston,43.706876,-79.518188


### Then I get the Borough that contains Toronto to work with

In [3]:
# Create a list of booleans that descripes if the Borough column in a row contains Toronto 
toronto_checker = []
# Loop over the rows and fill toronto_checker with results
for borough in canada_neighborhood_locations['Borough'].to_list():
    if 'Toronto' in borough:
        toronto_checker.append(True)
    else:
        toronto_checker.append(False)

# Then get the Toronto dataframe by appling toronto_checker
toronto_df = canada_neighborhood_locations[toronto_checker].reset_index().drop(columns=['index'])
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M6S,West Toronto,"Runnymede, Swansea",43.651571,-79.48445
3,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325
4,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763


## Exploring Toronto Neighborhoods

In [4]:
# Define my Foursquare app codes 
# I define my Foursquare app codes to get results then I delete them from repo notebook
CLIENT_ID = "" # your Foursquare ID
CLIENT_SECRET = ""# your Foursquare Secret
VERSION = '20180605' # Foursquare API version

# Define a new function to get a data frame of nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Get Toronto Venues

In [5]:
toronto_venues = getNearbyVenues(names=toronto_df['Neighborhood'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude']
                                  )

In [6]:
print(toronto_venues.shape)
toronto_venues.head(10)

(1637, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Rorschach Brewing Co.,43.663483,-79.319824,Brewery
1,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Leslieville Farmers Market,43.664901,-79.319784,Farmers Market
2,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,The Sidekick,43.664484,-79.325162,Comic Shop
3,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Chino Locos,43.664653,-79.325584,Burrito Place
4,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Queen Margherita Pizza,43.664685,-79.324164,Pizza Place
5,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Ashbridges Bay Skatepark,43.662548,-79.315631,Skate Park
6,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Chick-n-Joy,43.665181,-79.321403,Fast Food Restaurant
7,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,The Green Wood,43.664728,-79.324117,Restaurant
8,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,East End Garden Centre & Hardware,43.664564,-79.324471,Garden Center
9,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Amin Car Repair Garage,43.663544,-79.32013,Auto Workshop


## Prepare for getting Top 5 Venues of each neighborhood (another way unlike the way in th Lab)

In [7]:
# Get list of unique neighborhoods in Toronto
toronto_unique_neighborhoods = toronto_df["Neighborhood"].unique()

# Group the list of Venues by neighborhood
grouped_toronto_neighborhoods = toronto_venues.groupby("Neighborhood")

In [35]:
# Define a function to get top 5 venues for a given neighborhood
def get_top_venues(neighborhood, topnum=5):
    top_venues_list = grouped_toronto_neighborhoods.get_group(neighborhood)["Venue Category"].value_counts().index.to_list()[0:topnum]
    top_bulider = ""
    for venue in top_venues_list:
        top_bulider = top_bulider + venue + ", "
    return top_bulider[:-2]

# Loop over all unique neighborhoods and get top 5 venues and store them in a dict
toronto_top5_venues = {"Neighborhood": [], "Top 5 Venues": []}
for neighborhood in toronto_unique_neighborhoods:
    toronto_top5_venues["Neighborhood"].append(neighborhood)
    top5_venues_str = get_top_venues(neighborhood)
    toronto_top5_venues["Top 5 Venues"].append(top5_venues_str)

# Convert the dict to Pandas dataframe and show 10 samples
toronto_top5_venues_df = pd.DataFrame(toronto_top5_venues)
toronto_top5_venues_df.head(10)

Unnamed: 0,Neighborhood,Top 5 Venues
0,"Business reply mail Processing Centre, South C...","Light Rail Station, Pizza Place, Gym / Fitness..."
1,"Queen's Park, Ontario Provincial Government","Coffee Shop, Diner, Creperie, Sandwich Place, ..."
2,"Runnymede, Swansea","Sushi Restaurant, Café, Coffee Shop, Diner, Pi..."
3,"Parkdale, Roncesvalles","Gift Shop, Breakfast Spot, Dessert Shop, Cuban..."
4,"High Park, The Junction South","Mexican Restaurant, Café, Thai Restaurant, Art..."
5,"Brockton, Parkdale Village, Exhibition Place","Café, Breakfast Spot, Bakery, Coffee Shop, Cli..."
6,"Little Portugal, Trinity","Bar, Coffee Shop, Restaurant, Vietnamese Resta..."
7,"Dufferin, Dovercourt Village","Pharmacy, Bakery, Café, Pet Store, Supermarket"
8,Christie,"Grocery Store, Café, Park, Baby Store, Diner"
9,"First Canadian Place, Underground city","Coffee Shop, Café, Hotel, Gym, Restaurant"


## Time To Clustring

In [13]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot.drop(columns=['Neighborhood'], inplace=True)
toronto_onehot.insert(0, 'Neighborhood', toronto_venues['Neighborhood'] )

In [18]:
toronto_onehot.head(10)

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Business reply mail Processing Centre, South C...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.015625


## Get most common venues in Toronto

In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted.head()

(39, 11)


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Restaurant,Beer Bar,Bakery,Farmers Market,Cocktail Bar,Seafood Restaurant,Cheese Shop,Café,Park
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Bakery,Bar,Stadium,Italian Restaurant,Climbing Gym,Furniture / Home Store,Intersection
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Skate Park,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Harbor / Marina,Coffee Shop,Plane,Boat or Ferry,Rental Car Location,Bar
4,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Bubble Tea Shop,Burger Joint,Salad Place,Japanese Restaurant,Department Store,Portuguese Restaurant


## Start Using KMeans

In [28]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 4, 0,
       0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [32]:
# add clustering labels
neighborhoods_venues_sorted.drop(columns=['Cluster Labels'], inplace=True)
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,0,Light Rail Station,Yoga Studio,Skate Park,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Diner,Yoga Studio,Park,Beer Bar,Smoothie Shop,Burrito Place,Sandwich Place,Café,Restaurant
2,M6S,West Toronto,"Runnymede, Swansea",43.651571,-79.48445,0,Coffee Shop,Café,Sushi Restaurant,Pizza Place,Pub,Diner,Italian Restaurant,Bar,Smoothie Shop,Bookstore
3,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,0,Breakfast Spot,Gift Shop,Dessert Shop,Bar,Italian Restaurant,Bookstore,Movie Theater,Restaurant,Cuban Restaurant,Eastern European Restaurant
4,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763,0,Thai Restaurant,Mexican Restaurant,Café,Bar,Diner,Bakery,Italian Restaurant,Speakeasy,Flea Market,Furniture / Home Store


## Finally show the clusters in a map 

In [38]:
# create map
latitude = 43.651070
longitude = -79.347015
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
# The map maybe not rendering So, I save it as a picture in the same repository
# Or in the following url : https://github.com/BolaAshEf/Coursera_Capstone/blob/master/clustring_map.jpg
map_clusters