## 1- Import libraries:

In [48]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## 2- Get data and perform prepartions:

In [50]:
# Read Table from wikipedia site:
tor_df= pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0)  # using direct pandas read_html function 
tor_df = tor_df[0]
# Let us change column names as required :
column_names =['PostalCode','Borough','Neighborhood'] 
tor_df.columns=column_names
tor_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [52]:
# Drop rows where borough not assigned:
tor_df = tor_df[tor_df['Borough'] != 'Not assigned']
# Merge same poastal code rows:
tor_df= tor_df.groupby(['PostalCode','Borough']).agg({'Neighborhood': ', '.join}).reset_index()
# rename neighborhood if not assigned:
for ind, row in tor_df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']
    else:
        row['Neighborhood'] =row['Neighborhood']
tor_df.shape

(103, 3)

In [53]:
# Read location of each Neighborhood from csv file:
tor_df_loc=pd.read_csv('Geospatial_Coordinates.csv')
tor_df_loc.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [54]:
# join dataframes:
tor_df = tor_df.join(tor_df_loc)
tor_df.drop('Postal Code', axis=1, inplace=True)
tor_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## 3- Select Toronto boroughs:

In [58]:
# Working with Toronto boroughs: 
df=tor_df[tor_df['Borough'].str.contains('Toronto')]

In [59]:
df.shape

(38, 5)

In [60]:
## Create function to explore venues:

CLIENT_ID = 'LZU2EMARAXADU3FJJSCDAPNG0ULAJB3V2VEX5CDZ1SQ22SDH'

CLIENT_SECRET = 'QBC22I2UI5BJEVYPKHHBHH4YQ4344CCCLCKSW5PJ4N33DSNU'

VERSION = '20180605'

LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Now write the code to run the above function on each neighborhood and create a new dataframe called *tor_venues*.

In [61]:
tor_venues = getNearbyVenues(names=df['Neighborhood'],latitudes=df['Latitude'],longitudes=df['Longitude'])

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The 

In [62]:
tor_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Ravine,43.6763,-79.294784,Other Great Outdoors
4,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood


## 4- Anaalyze Neighborhoods:

#### Let's check how many venues were returned for each neighborhood

In [63]:
tor_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"Brockton, Exhibition Place, Parkdale Village",24,24,24,24,24,24
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",16,16,16,16,16,16
"Cabbagetown, St. James Town",44,44,44,44,44,44
Central Bay Street,89,89,89,89,89,89
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,89,89,89,89,89,89


In [64]:
print('There are {} uniques categories.'.format(len(tor_venues['Venue Category'].unique())))

There are 231 uniques categories.


In [65]:
# to feed categorial column to model, it should be numerical [Encoding as dummy variables]:
dummy_var = pd.get_dummies(tor_venues['Venue Category'], prefix='')
tor_hot=pd.concat([tor_venues,dummy_var], axis=1)
tor_hot.drop(['Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category'], axis=1, inplace=True)
tor_hot.head()

Unnamed: 0,Neighborhood,_Afghan Restaurant,_Airport,_Airport Food Court,_Airport Gate,_Airport Lounge,_Airport Service,_Airport Terminal,_American Restaurant,_Antique Shop,_Aquarium,_Art Gallery,_Art Museum,_Arts & Crafts Store,_Asian Restaurant,_Auto Workshop,_BBQ Joint,_Baby Store,_Bagel Shop,_Bakery,_Bank,_Bar,_Baseball Stadium,_Basketball Stadium,_Beach,_Beer Bar,_Beer Store,_Belgian Restaurant,_Bistro,_Boat or Ferry,_Bookstore,_Boutique,_Brazilian Restaurant,_Breakfast Spot,_Brewery,_Bubble Tea Shop,_Building,_Burger Joint,_Burrito Place,_Bus Line,_Butcher,_Café,_Cajun / Creole Restaurant,_Camera Store,_Caribbean Restaurant,_Cheese Shop,_Chinese Restaurant,_Chocolate Shop,_Church,_Climbing Gym,_Clothing Store,_Cocktail Bar,_Coffee Shop,_College Arts Building,_College Gym,_College Rec Center,_Colombian Restaurant,_Comfort Food Restaurant,_Comic Shop,_Concert Hall,_Convenience Store,_Cosmetics Shop,_Coworking Space,_Creperie,_Cuban Restaurant,_Cupcake Shop,_Dance Studio,_Deli / Bodega,_Department Store,_Dessert Shop,_Diner,_Discount Store,_Dive Bar,_Dog Run,_Doner Restaurant,_Donut Shop,_Dumpling Restaurant,_Eastern European Restaurant,_Electronics Store,_Ethiopian Restaurant,_Event Space,_Falafel Restaurant,_Farmers Market,_Fast Food Restaurant,_Filipino Restaurant,_Fish & Chips Shop,_Fish Market,_Flea Market,_Flower Shop,_Food,_Food & Drink Shop,_Food Court,_Food Truck,_Fountain,_French Restaurant,_Fried Chicken Joint,_Fruit & Vegetable Store,_Furniture / Home Store,_Gaming Cafe,_Garden,_Garden Center,_Gastropub,_Gay Bar,_General Entertainment,_General Travel,_Gift Shop,_Gluten-free Restaurant,_Gourmet Shop,_Greek Restaurant,_Grocery Store,_Gym,_Gym / Fitness Center,_Harbor / Marina,_Health & Beauty Service,_Health Food Store,_Historic Site,_History Museum,_Hobby Shop,_Hookah Bar,_Hostel,_Hotel,_Hotel Bar,_Hotpot Restaurant,_Ice Cream Shop,_Indian Restaurant,_Indie Movie Theater,_Indoor Play Area,_Intersection,_Irish Pub,_Italian Restaurant,_Japanese Restaurant,_Jazz Club,_Jewelry Store,_Jewish Restaurant,_Juice Bar,_Korean Restaurant,_Lake,_Latin American Restaurant,_Light Rail Station,_Lingerie Store,_Liquor Store,_Lounge,_Mac & Cheese Joint,_Malay Restaurant,_Market,_Martial Arts Dojo,_Mediterranean Restaurant,_Men's Store,_Metro Station,_Mexican Restaurant,_Middle Eastern Restaurant,_Miscellaneous Shop,_Modern European Restaurant,_Molecular Gastronomy Restaurant,_Monument / Landmark,_Movie Theater,_Museum,_Music Store,_Music Venue,_Neighborhood,_New American Restaurant,_Nightclub,_Noodle House,_Office,_Opera House,_Optical Shop,_Organic Grocery,_Other Great Outdoors,_Park,_Performing Arts Venue,_Pet Store,_Pharmacy,_Pizza Place,_Plane,_Playground,_Plaza,_Poke Place,_Portuguese Restaurant,_Poutine Place,_Pub,_Ramen Restaurant,_Record Shop,_Rental Car Location,_Restaurant,_Roof Deck,_Sake Bar,_Salad Place,_Salon / Barbershop,_Sandwich Place,_Scenic Lookout,_Sculpture Garden,_Seafood Restaurant,_Shoe Store,_Shopping Mall,_Skate Park,_Skating Rink,_Smoke Shop,_Smoothie Shop,_Snack Place,_Soup Place,_Southern / Soul Food Restaurant,_Spa,_Speakeasy,_Sporting Goods Shop,_Sports Bar,_Stadium,_Stationery Store,_Steakhouse,_Strip Club,_Supermarket,_Sushi Restaurant,_Swim School,_Taco Place,_Tailor Shop,_Taiwanese Restaurant,_Tanning Salon,_Tapas Restaurant,_Tea Room,_Thai Restaurant,_Theater,_Theme Restaurant,_Thrift / Vintage Store,_Toy / Game Store,_Trail,_Train Station,_Vegetarian / Vegan Restaurant,_Video Game Store,_Video Store,_Vietnamese Restaurant,_Wine Bar,_Wings Joint,_Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [66]:
df = df.groupby('Neighborhood').mean().reset_index()
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Adelaide, King, Richmond",43.650571,-79.384568
1,Berczy Park,43.644771,-79.373306
2,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191
3,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442


In [67]:
tor_hot.shape

(1705, 232)

### Create  function for common venues:

In [68]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [69]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_hot['Neighborhood']

for ind in np.arange(tor_hot.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_hot.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,_Trail,_Yoga Studio,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store
1,The Beaches,_Health Food Store,_Fish Market,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store,_Eastern European Restaurant
2,The Beaches,_Pub,_Yoga Studio,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store
3,The Beaches,_Other Great Outdoors,_Yoga Studio,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store
4,The Beaches,_Neighborhood,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store,_Eastern European Restaurant


## 5-Clustering:

In [70]:
# set number of clusters
kclusters = 5

tor_clustering =tor_hot.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [71]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tor_merged = tor_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

tor_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",43.650571,-79.384568,0,_Concert Hall,_Fish Market,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store,_Eastern European Restaurant
0,"Adelaide, King, Richmond",43.650571,-79.384568,0,_Steakhouse,_Yoga Studio,_Dog Run,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store
0,"Adelaide, King, Richmond",43.650571,-79.384568,0,_Plaza,_Yoga Studio,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store
0,"Adelaide, King, Richmond",43.650571,-79.384568,0,_Vegetarian / Vegan Restaurant,_Yoga Studio,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store
0,"Adelaide, King, Richmond",43.650571,-79.384568,0,_Hotel,_Yoga Studio,_Dive Bar,_Filipino Restaurant,_Fast Food Restaurant,_Farmers Market,_Falafel Restaurant,_Event Space,_Ethiopian Restaurant,_Electronics Store


## 6- Visualization on maps:

In [74]:
# create map
import html
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.Marker([lat, lon],radius=5, popup=label, color=rainbow[cluster-1], fill=True, fill_color=rainbow[cluster-1], fill_opacity=0.7).add_to(map_clusters)
       
# map_clusters
# for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
#     text = 'cluster No :' + str (cluster)
#     folium.Marker([lat,lon],popup=html.escape(text)).add_to(map_clusters)
map_clusters