## 1. Scraping Data and Making df

In [1]:
import requests  
!pip install beautifulsoup4
from bs4 import BeautifulSoup 
import pandas as pd

Collecting beautifulsoup4
[?25l  Downloading https://files.pythonhosted.org/packages/66/25/ff030e2437265616a1e9b25ccc864e0371a0bc3adb7c5a404fd661c6f4f6/beautifulsoup4-4.9.1-py3-none-any.whl (115kB)
[K     |████████████████████████████████| 122kB 7.6MB/s eta 0:00:01
[?25hCollecting soupsieve>1.2 (from beautifulsoup4)
  Downloading https://files.pythonhosted.org/packages/6f/8f/457f4a5390eeae1cc3aeab89deb7724c965be841ffca6cfca9197482e470/soupsieve-2.0.1-py3-none-any.whl
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.9.1 soupsieve-2.0.1


### Importing Data from Wikipedia

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'html.parser')

In [3]:
My_table=soup.find('table', class_='wikitable sortable')

In [4]:
postal = []
borough = []
neighborhood = []

for row in My_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        postal.append(cells[0].find(text=True))
        borough.append(cells[1].find(text=True))
        neighborhood.append(cells[2].find(text=True))
print(postal[0:5])
print(borough[0:5])
print(neighborhood[0:5])

['M1A\n', 'M2A\n', 'M3A\n', 'M4A\n', 'M5A\n']
['Not assigned\n', 'Not assigned\n', 'North York\n', 'North York\n', 'Downtown Toronto\n']
['Not assigned\n', 'Not assigned\n', 'Parkwoods\n', 'Victoria Village\n', 'Regent Park, Harbourfront\n']


### Building Data Frame

In [5]:
df=pd.DataFrame(postal,columns=['Postal Code'])
df['Borough']=borough
df['Neighborhood']=neighborhood
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


In [6]:
df.shape

(180, 3)

### Cleanning the data

In [7]:
# Remove: '\n' from Data
df['Postal Code'] = df['Postal Code'].str.replace(r'\n', '')
df['Borough'] = df['Borough'].str.replace(r'\n', '')
df['Neighborhood'] = df['Neighborhood'].str.replace(r'\n', '')
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [8]:
df.groupby('Borough').size()

Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Not assigned        77
Scarborough         17
West Toronto         6
York                 5
dtype: int64

In [9]:
# Remove 'Not assigned' data in Borough
df1 = df[df.Borough!='Not assigned']
df1.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [10]:
df1.reset_index(drop=True,inplace=True)
print(df1.shape)
df1.head()

(103, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [11]:
# Remove 'Not assigned' data from Neighborhood
import numpy as np
df1['Neighborhood'] = np.where(df1['Neighborhood'] == 'Not assigned', df1['Borough'], df1['Neighborhood'])
df1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [12]:
df1.shape

(103, 3)

## 2. Getting coordinates of each neighborhood

In [13]:
df2 = df1.sort_values("Postal Code", ascending=True)
df2.reset_index(drop=True,inplace=True)
df2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [14]:
df_csv = pd.read_csv('Geospatial_Coordinates.csv')
df_csv = df_csv.sort_values("Postal Code", ascending=True) #Organize ascending the data
df_csv.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df_csv.shape

(103, 3)

In [16]:
df_csv1=df_csv.drop(['Postal Code'], axis=1)
df_csv1.head()

Unnamed: 0,Latitude,Longitude
0,43.806686,-79.194353
1,43.784535,-79.160497
2,43.763573,-79.188711
3,43.770992,-79.216917
4,43.773136,-79.239476


In [17]:
neighborhoods=pd.concat([df2, df_csv1], axis=1)
neighborhoods= neighborhoods.sort_values("Neighborhood", ascending=True) #Organize ascending the data
neighborhoods.reset_index(drop=True,inplace=True)
#neighborhoods.to_csv('neighborhoods.csv')
print(neighborhoods.shape)
neighborhoods.head(12)

(103, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1S,Scarborough,Agincourt,43.7942,-79.262029
1,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
2,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
3,M2K,North York,Bayview Village,43.786947,-79.385975
4,M5M,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
7,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191
8,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442


In [18]:
# After some problems I realize that there are 4 repeated geighborhoods and 4 where there is no venues,
# so we rearrange the data frame in order to clean these neighborhoods.
neighborhoods_drop=neighborhoods.drop([22,25,26,27,46,90,96,102])
print(neighborhoods_drop.shape)
neighborhoods_drop.reset_index(drop=True,inplace=True)
#neighborhoods_drop.to_csv('neighborhoods_drop.csv')
neighborhoods_drop.head()

(95, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1S,Scarborough,Agincourt,43.7942,-79.262029
1,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
2,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
3,M2K,North York,Bayview Village,43.786947,-79.385975
4,M5M,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975


## 3. Explore and cluster the neighborhoods in Toronto

In [19]:
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.2 |       hecda079_0         147 KB  conda-forge
    certifi-2020.4.5.2         |   py36h9f0ad1d_0         152 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         395 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0

The following packages will b

In [20]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods_drop['Borough'].unique()),
        neighborhoods_drop.shape[0]
    )
)

The dataframe has 10 boroughs and 95 neighborhoods.


In [21]:
#Use geopy library to get the latitude and longitude values of Toronto.
#In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent ny_explorer

address = 'Toronto City, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.6534817, -79.3839347.


In [22]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood_drop in zip(neighborhoods_drop['Latitude'], neighborhoods_drop['Longitude'], neighborhoods_drop['Borough'], neighborhoods_drop['Neighborhood']):
    label = '{}, {}'.format(neighborhood_drop, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare Credentials and Version

In [23]:
CLIENT_ID = 'RXMC3MBV3FWUEZKRNTUBR15YR334VHS3TRN35BLMHPCCIZTI' # your Foursquare ID
CLIENT_SECRET = 'H1KDAB3QUWBLVVJBUSEH44AHA0G4AZQLCVTNS0RAXABBEBQ0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RXMC3MBV3FWUEZKRNTUBR15YR334VHS3TRN35BLMHPCCIZTI
CLIENT_SECRET:H1KDAB3QUWBLVVJBUSEH44AHA0G4AZQLCVTNS0RAXABBEBQ0


#### Let's explore the first neighborhood in our dataframe.

In [24]:
#Get the neighborhood's name.
neighborhoods_drop.loc[0, 'Neighborhood']

'Agincourt'

In [25]:
#Get the neighborhood's latitude and longitude values.
neighborhood_latitude = neighborhoods_drop.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods_drop.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods_drop.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Agincourt are 43.7942003, -79.26202940000002.


#### Now, let's get the top 100 venues that are in Malvern, Rouge within a radius of 500 meters.

In [26]:
#First, let's create the GET request URL. Name your URL url.
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=RXMC3MBV3FWUEZKRNTUBR15YR334VHS3TRN35BLMHPCCIZTI&client_secret=H1KDAB3QUWBLVVJBUSEH44AHA0G4AZQLCVTNS0RAXABBEBQ0&v=20180605&ll=43.7942003,-79.26202940000002&radius=500&limit=100'

In [27]:
#Send the GET request and examine the resutls
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ee188fff89b1820a69b2c53'},
 'response': {'headerLocation': 'Agincourt',
  'headerFullLocation': 'Agincourt, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.7987003045, 'lng': -79.25580688126533},
   'sw': {'lat': 43.7897002955, 'lng': -79.2682519187347}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e3451fcd4c063434821c41e',
       'name': "Panagio's Breakfast & Lunch",
       'location': {'address': '1928 McCowan Rd.',
        'lat': 43.79237001092506,
        'lng': -79.26020325805978,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.79237001092506,
          'lng': -79.26020325805978}],
        'distance': 251,
        'postalCode': 'M1S 4K

In [28]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [29]:
#Now we are ready to clean the json and structure it into a pandas dataframe.
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  after removing the cwd from sys.path.


Unnamed: 0,name,categories,lat,lng
0,Panagio's Breakfast & Lunch,Breakfast Spot,43.79237,-79.260203
1,El Pulgarcito,Latin American Restaurant,43.792648,-79.259208
2,Twilight,Lounge,43.791999,-79.258584
3,Commander Arena,Skating Rink,43.794867,-79.267989


In [30]:
#And how many venues were returned by Foursquare?
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


## Explore Neighborhoods in Toronto

In [31]:
#Let's create a function to repeat the same process to all the neighborhoods in Toronto
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
#Now write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues.
toronto_venues = getNearbyVenues(names=neighborhoods_drop['Neighborhood'],
                                   latitudes=neighborhoods_drop['Latitude'],
                                   longitudes=neighborhoods_drop['Longitude']
                                  )

Agincourt
Alderwood, Long Branch
Bathurst Manor, Wilson Heights, Downsview North
Bayview Village
Bedford Park, Lawrence Manor East
Berczy Park
Birch Cliff, Cliffside West
Brockton, Parkdale Village, Exhibition Place
Business reply mail Processing Centre, South Central Letter Processing Plant Toronto
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Caledonia-Fairbanks
Canada Post Gateway Processing Centre
Cedarbrae
Central Bay Street
Christie
Church and Wellesley
Clarks Corners, Tam O'Shanter, Sullivan
Cliffside, Cliffcrest, Scarborough Village West
Commerce Court, Victoria Hotel
Davisville
Davisville North
Del Ray, Mount Dennis, Keelsdale and Silverthorn
Don Mills
Dorset Park, Wexford Heights, Scarborough Town Centre
Downsview
Dufferin, Dovercourt Village
East Toronto, Broadview North (Old East York)
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Fairview, Henry Farm, Oriole
First Canadian Place, Underground city


In [None]:
#Let's check the size of the resulting dataframe
print(toronto_venues.shape)
toronto_venues.head()

In [None]:
#Let's check how many venues were returned for each neighborhood
toronto_venues.groupby('Neighborhood').count()

In [None]:
#Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

## Analyze Each Neighborhood

In [None]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-79]] + list(toronto_onehot.columns[:-79])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

In [None]:
toronto_onehot.to_csv('toronto_onehot.csv')
toronto_onehot.shape

In [None]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

In [None]:
toronto_grouped.to_csv('toronto_grouped.csv')
toronto_grouped.shape

In [None]:
#Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

In [None]:
#Let's put that into a pandas dataframe
#First, let's write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

In [None]:
neighborhoods_venues_sorted.shape

## Cluster Neighborhoods

In [None]:
#Run k-means to cluster the neighborhood into 5 clusters.
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
#df_labels = pd.DataFrame(kmeans.labels_)
#df_labels.head(95)
#df_labels.dtypes

In [None]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
#neighborhoods_venues_sorted.append(df_labels)

toronto_merged = neighborhoods_drop

# merge toronto_grouped with neighborhood to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_merged.head() # check the last columns!

In [None]:
#Let's visualize the resulting clusters
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [None]:
#Cluster-1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

In [None]:
#Cluster-2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

In [None]:
#Cluster-3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

In [None]:
#Cluster-3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

In [None]:
#Cluster-4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]