In [29]:
# This is a cell to hide code snippets from displaying
# This must be at first cell!

from IPython.display import HTML

hide_me = ''
HTML('''<script>
code_show=true; 
function code_toggle() {
  if (code_show) {
    $('div.input').each(function(id) {
      el = $(this).find('.cm-variable:first');
      if (id == 0 || el.text() == 'hide_me') {
        $(this).hide();
      }
    });
    $('div.output_prompt').css('opacity', 0);
  } else {
    $('div.input').each(function(id) {
      $(this).show();
    });
    $('div.output_prompt').css('opacity', 1);
  }
  code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input style="opacity:0" type="submit" value="Click here to toggle on/off the raw code."></form>''')

## Importing required libraries

In [0]:
#BeautifulSoup library helps us in scraping html documents
from bs4 import BeautifulSoup

#requets library helps us to access required content through urls
import requests

#pandas library helps us in converting data in variious formats to dataframes which are easier to work with
import pandas as pd

#Folium library helps in working with maps
import folium

#for converting an address into latitude and longitude values
from geopy.geocoders import Nominatim

import numpy as np

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

# Acquring HTML content from webpage

In [0]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_doc=requests.get(url)
wiki_content=BeautifulSoup(html_doc.content,'lxml')
tables=wiki_content.find_all('table')[0] #find_all returns a list of all elemnts with the tag passed to it.

# Converting the list of tables to a list of dataframes

In [32]:
df=pd.read_html(str(tables))
df

[    Postal Code  ...                                       Neighborhood
 0           M1A  ...                                                NaN
 1           M2A  ...                                                NaN
 2           M3A  ...                                          Parkwoods
 3           M4A  ...                                   Victoria Village
 4           M5A  ...                          Regent Park, Harbourfront
 ..          ...  ...                                                ...
 175         M5Z  ...                                                NaN
 176         M6Z  ...                                                NaN
 177         M7Z  ...                                                NaN
 178         M8Z  ...  Mimico NW, The Queensway West, South of Bloor,...
 179         M9Z  ...                                                NaN
 
 [180 rows x 3 columns]]

# The first and only table is the required one

In [33]:
postal_codes=df[0]
postal_codes.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


# Dropping postal codes that are not assigned with boroughs

In [34]:
postal_codes=postal_codes[~postal_codes.Borough.str.contains("Not assigned")]
postal_codes.reset_index(drop=True,inplace=True)
postal_codes.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


# Merging neighbourhoods under same postal code

In [35]:

postal_codes.set_index(['Postal Code','Borough'],inplace=True)
postal_codes_grouped=postal_codes.groupby(level=['Postal Code','Borough']).agg(','.join)
postal_codes_grouped.reset_index(inplace=True)
postal_codes_grouped.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [36]:
postal_codes_grouped.shape

(103, 3)

# Loading locations into a dataframe

In [37]:
!wget -q -O 'locations.csv' http://cocl.us/Geospatial_data
locations=pd.read_csv('locations.csv')
locations.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Merging locations with their Boroughs

In [38]:
postal_codes_located=postal_codes_grouped.merge(locations,how='inner', on='Postal Code' )
postal_codes_located.reset_index(drop=True,inplace=True)
postal_codes_located.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Splitting multiple neighborhoods under same postal code into different rows

In [39]:
postal_codes_flattened=postal_codes_located.assign(Neighborhood=postal_codes_located['Neighborhood'].str.split(',')).explode('Neighborhood')
postal_codes_flattened.reset_index(drop=True,inplace=True)
postal_codes_flattened.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern,43.806686,-79.194353
1,M1B,Scarborough,Rouge,43.806686,-79.194353
2,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
3,M1C,Scarborough,Port Union,43.784535,-79.160497
4,M1C,Scarborough,Highland Creek,43.784535,-79.160497


# separating boroughs that have toronto in their name for further analysis

In [40]:
toronto=postal_codes_flattened[postal_codes_flattened.Borough.str.contains('Toronto')]
toronto.reset_index(drop=True,inplace=True)
toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,The Danforth West,43.679557,-79.352188
2,M4K,East Toronto,Riverdale,43.679557,-79.352188
3,M4L,East Toronto,India Bazaar,43.668999,-79.315572
4,M4L,East Toronto,The Beaches West,43.668999,-79.315572


In [0]:
hide_me
CLIENT_ID = 'HAVSYF2EXYE3ZZYGUNAED0X2TA0H3PN4HHLAARFE0SEW41UZ' # your Foursquare ID
CLIENT_SECRET = 'YLFWFYECMOVPL0JNPVWOR4YR2PETGPBWOBLX4JF1C0EZER2Q' # your Foursquare Secret
VERSION = '20200515'

In [0]:
#function to obtain details of venues in each neighborhood using foursquare
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [43]:
Toronto_venues=getNearbyVenues(names=toronto['Neighborhood'],
                               latitudes=toronto['Latitude'],
                               longitudes=toronto['Longitude'])
Toronto_venues.head()

The Beaches
The Danforth West
 Riverdale
India Bazaar
 The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park
 Summerhill East
Summerhill West
 Rathnelly
 South Hill
 Forest Hill SE
 Deer Park
Rosedale
St. James Town
 Cabbagetown
Church and Wellesley
Regent Park
 Harbourfront
Garden District
 Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond
 Adelaide
 King
Harbourfront East
 Union Station
 Toronto Islands
Toronto Dominion Centre
 Design Exchange
Commerce Court
 Victoria Hotel
Roselawn
Forest Hill North & West
The Annex
 North Midtown
 Yorkville
University of Toronto
 Harbord
Kensington Market
 Chinatown
 Grange Park
CN Tower
 King and Spadina
 Railway Lands
 Harbourfront West
 Bathurst Quay
 South Niagara
 Island airport
Stn A PO Boxes
First Canadian Place
 Underground city
Christie
Dufferin
 Dovercourt Village
Little Portugal
 Trinity
Brockton
 Parkdale Village
 Exhibition Place
High Park
 The Junction South
Parkdale
 Ron

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,The Beaches,43.676357,-79.293031,Seaspray Restaurant,43.678888,-79.298167,Asian Restaurant


In [44]:
h=sorted(Toronto_venues['Venue Category'].unique())
print(h)

['Afghan Restaurant', 'Airport', 'Airport Food Court', 'Airport Gate', 'Airport Lounge', 'Airport Service', 'Airport Terminal', 'American Restaurant', 'Antique Shop', 'Aquarium', 'Art Gallery', 'Art Museum', 'Arts & Crafts Store', 'Asian Restaurant', 'Auto Workshop', 'BBQ Joint', 'Baby Store', 'Bagel Shop', 'Bakery', 'Bank', 'Bar', 'Baseball Stadium', 'Basketball Stadium', 'Beach', 'Bed & Breakfast', 'Beer Bar', 'Beer Store', 'Belgian Restaurant', 'Bistro', 'Boat or Ferry', 'Bookstore', 'Boutique', 'Brazilian Restaurant', 'Breakfast Spot', 'Brewery', 'Bubble Tea Shop', 'Building', 'Burger Joint', 'Burrito Place', 'Bus Line', 'Butcher', 'Café', 'Cajun / Creole Restaurant', 'Candy Store', 'Caribbean Restaurant', 'Cheese Shop', 'Chinese Restaurant', 'Chocolate Shop', 'Church', 'Climbing Gym', 'Clothing Store', 'Cocktail Bar', 'Coffee Shop', 'College Arts Building', 'College Auditorium', 'College Gym', 'College Rec Center', 'Colombian Restaurant', 'Comfort Food Restaurant', 'Comic Shop', '

#### as we can see there is a venue named Neighborhood. So let us name our neighborhood column Neighbourhood 

In [45]:
toronto_onehot=pd.get_dummies(Toronto_venues[['Venue Category']],prefix="",prefix_sep="")
toronto_onehot['Neighbourhood']=Toronto_venues['Neighborhood']
columns_changed=[toronto_onehot.columns[-1]]+list(toronto_onehot.columns[:-1])#changing neighborhood as first column
toronto_onehot=toronto_onehot[columns_changed]
toronto_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,...,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Group the values by neighbourhood and find their mean

In [46]:
toronto_grouped=toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,...,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021505,0.0,0.0,0.010753,0.010753,0.0,0.010753,0.0,0.0,0.0,0.0,0.010753,0.0,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021505,0.0,0.010753,0.010753,0.0,0.0,0.010753,0.010753,0.010753,...,0.021505,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.010753,0.0,0.010753,0.0,0.0,0.0,0.0,0.021505,0.0,0.0,0.0,0.021505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.010753,0.0,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.010753,0.0
1,Bathurst Quay,0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cabbagetown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chinatown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.053571,0.0,0.035714,0.0,0.0,0.0,0.017857,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.017857,0.017857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.053571,0.017857,0.0,0.0,0.0
4,Deer Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0


In [0]:
#function to return popular venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [48]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Restaurant,Clothing Store,Deli / Bodega,Gym,Thai Restaurant,Hotel,Bookstore,Seafood Restaurant
1,Bathurst Quay,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Boat or Ferry,Plane,Rental Car Location,Sculpture Garden,Boutique,Airport Gate
2,Cabbagetown,Coffee Shop,Pizza Place,Park,Italian Restaurant,Convenience Store,Bakery,Café,Restaurant,Pub,Pet Store
3,Chinatown,Café,Coffee Shop,Vietnamese Restaurant,Mexican Restaurant,Bakery,Gaming Cafe,Bar,Vegetarian / Vegan Restaurant,Dessert Shop,Burger Joint
4,Deer Park,Coffee Shop,Pub,Liquor Store,Sports Bar,Restaurant,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint


## Now applying K-Means clustering algorithm on grouped venues

In [0]:
clusters=5
toronto_clustering=toronto_grouped.drop('Neighbourhood',axis=1)
k=KMeans(init='k-means++',n_clusters=clusters,random_state=2).fit(toronto_clustering)


### Insert labels of clusters as a column

In [50]:
neighborhoods_venues_sorted.insert(0,'cluster',k.labels_)
neighborhoods_venues_sorted.rename(columns={'Neighbourhood':'Neighborhood'},inplace=True)#for merging dataframes we need to rename neighbourhood column
neighborhoods_venues_sorted.head()

Unnamed: 0,cluster,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,Adelaide,Coffee Shop,Café,Restaurant,Clothing Store,Deli / Bodega,Gym,Thai Restaurant,Hotel,Bookstore,Seafood Restaurant
1,1,Bathurst Quay,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Boat or Ferry,Plane,Rental Car Location,Sculpture Garden,Boutique,Airport Gate
2,2,Cabbagetown,Coffee Shop,Pizza Place,Park,Italian Restaurant,Convenience Store,Bakery,Café,Restaurant,Pub,Pet Store
3,2,Chinatown,Café,Coffee Shop,Vietnamese Restaurant,Mexican Restaurant,Bakery,Gaming Cafe,Bar,Vegetarian / Vegan Restaurant,Dessert Shop,Burger Joint
4,2,Deer Park,Coffee Shop,Pub,Liquor Store,Sports Bar,Restaurant,Bank,Supermarket,Sushi Restaurant,Bagel Shop,Fried Chicken Joint


In [51]:

toronto_merged = toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Asian Restaurant,Health Food Store,Pub,Trail,Neighborhood,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dessert Shop
1,M4K,East Toronto,The Danforth West,43.679557,-79.352188,2,Greek Restaurant,Italian Restaurant,Coffee Shop,Furniture / Home Store,Restaurant,Ice Cream Shop,Yoga Studio,Bubble Tea Shop,Spa,Juice Bar
2,M4K,East Toronto,Riverdale,43.679557,-79.352188,2,Greek Restaurant,Italian Restaurant,Coffee Shop,Furniture / Home Store,Restaurant,Ice Cream Shop,Yoga Studio,Bubble Tea Shop,Spa,Juice Bar
3,M4L,East Toronto,India Bazaar,43.668999,-79.315572,2,Sandwich Place,Fast Food Restaurant,Pizza Place,Pet Store,Pub,Liquor Store,Burrito Place,Fish & Chips Shop,Italian Restaurant,Steakhouse
4,M4L,East Toronto,The Beaches West,43.668999,-79.315572,2,Sandwich Place,Fast Food Restaurant,Pizza Place,Pet Store,Pub,Liquor Store,Burrito Place,Fish & Chips Shop,Italian Restaurant,Steakhouse


In [52]:
# create map
address='Toronto,Ontario'
geo=Nominatim(user_agent='Toronto')
loc=geo.geocode(address)
lat=loc.latitude
lon=loc.longitude
map_clusters = folium.Map(location=[lat, lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(clusters)
ys = [i + x + (i*x)**2 for i in range(clusters)]
print(ys)
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

[array([0, 1, 2, 3, 4]), array([ 1,  3,  7, 13, 21]), array([ 2,  7, 20, 41, 70]), array([  3,  13,  41,  87, 151]), array([  4,  21,  70, 151, 264])]


### This notebook is created by **Manasa devi Chakka**