# This section represents how data was scraped from the Wikipedia page and transformed into a pandas dataframe.

In [1]:
# Below are the libraries required for this project

import pandas as pd # To allow creation and manipulation of dataframes
import numpy as np # To allow execution of mathematical operations on arrays , matrices 
import urllib.request # To allow query of a website
from bs4 import BeautifulSoup # To allow extraction of unstructured data from website 

In [2]:
# Storing the url into the variable web_url

web_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# Opening the website and query through the variable web_page

web_page = urllib.request.urlopen(web_url)

In [3]:
# Storing the parsed html into beautiful soup format

web_soup = BeautifulSoup(web_page)

# Since we are interested in the table content of the page, lets find all the tables listed in the HTML

web_soup_tables = web_soup.find_all('table')

web_soup_tables

[<table class="wikitable sortable">
 <tbody><tr>
 <th>Postcode</th>
 <th>Borough</th>
 <th>Neighbourhood
 </th></tr>
 <tr>
 <td>M1A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M2A</td>
 <td>Not assigned</td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M3A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
 </td></tr>
 <tr>
 <td>M4A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
 </td></tr>
 <tr>
 <td>M5A</td>
 <td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
 <td><a href="/wiki/Regent_Park" title="Regent Park">Harbourfront</a>
 </td></tr>
 <tr>
 <td>M6A</td>
 <td><a href="/wiki/North_York" title="North York">North York</a></td>
 <td><a href="/wiki/Lawrence_Heights" title="Lawrence Heights">Lawrence Heights</a>
 </td></tr>
 <tr>


In [4]:
# Finding the right table by inspecting the class of the data strucutre

right_web_soup_table = web_soup.find('table', class_= 'wikitable sortable')

# Display if the table has been extracted accurately

right_web_soup_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Harbourfront</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Lawrence_Heights" title="Lawrence Heights">Lawrence Heights</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North

In [5]:
# Through the information, it has been identified that three columns of data were to be extracted and thus three empty list were created to accommodate its contents

p_list = []
b_list = []
n_list =[]

# Looping through the correct table and finding all rows of entry (value)

for row in right_web_soup_table.findAll('tr'):
    
    # Creating an object where att is the variable holding each attribute (column value) of the rows 
    
    att = row.findAll('td')
    
    # This is a checking mechanism that means it has complete values for each row
    
    if len(att)==3:
        
        # Postal Code Value
        
        p_list.append(att[0].find(text=True))
        
        # Borough Value
        
        b_list.append(att[1].find(text=True))
        
        # Neighbourhood Value
        
        n_list.append(att[2].find(text=True))


In [6]:
# Specifying the names of the column that will hold the list created

column_names = ['Postalcode','Borough','Neighbourhood']

# Creating a blank dataframe with the column names stated in variable "column_names"

df = pd.DataFrame(columns=column_names)

# Moving the contents of the list to the column of the dataframe

df['Postalcode']=p_list
df['Borough']=b_list
df['Neighbourhood']=n_list

# Displaying the top 10 rows of the dataframe

df.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned\n
9,M9A,Queen's Park,Not assigned\n


In [7]:
# Creating a copy of the raw dataframe and then filtering it to contain only values that have valid Borough entries

df_copy = df[df['Borough']!='Not assigned']

# Resetting the numerical hierarchy of the index

df_copy.reset_index(drop=True,inplace=True)

# Displaying the top 10 rows of the dataframe

df_copy.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Not assigned\n
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North\n


In [8]:
# Create a loop function that specifies the name of the neighbourhood as the name of the Borough if the original neighbourhood does not contain any valid data

for item in range(df_copy.shape[0]):
    
    # This is to check if the index containt the words that "Not assigned"
    
    if df_copy['Neighbourhood'][item] == 'Not assigned\n':
        
        # Replacing the value of the neighbourhood with the borough name
        
        df_copy['Neighbourhood'][item] = df_copy['Borough'][item]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


In [9]:
# This section is just a measure to remove any whitespace characters in each entry

for item in range(df_copy.shape[0]):
    
    df_copy['Neighbourhood'][item] =df_copy['Neighbourhood'][item].strip('\n')
    df_copy['Borough'][item] =df_copy['Borough'][item].strip('\n')

df_copy.shape

(210, 3)

In [10]:
# Creating a list that contains all the unique values of the Postalcode

unique_list= df_copy['Postalcode'].unique()

# Creating another column name for the second dataframe that will hold the finalized values

column_names_2 = ['Postalcode', 'Borough', 'Neighbourhood']

# Creating a second dataframe to accommodate additional filtering of data

df_unique = pd.DataFrame(columns=column_names_2)

# Moving the contents the unique list of postalcode to the dataframe

df_unique['Postalcode'] = unique_list

# Converting the data type to string to accommodate concatenation of string values later on

df_unique['Borough']= df_unique['Borough'].astype(str)
df_unique['Neighbourhood']= df_unique['Neighbourhood'].astype(str)

# Displaying the top 10 rows

df_unique.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,,
1,M4A,,
2,M5A,,
3,M6A,,
4,M7A,,
5,M9A,,
6,M1B,,
7,M3B,,
8,M4B,,
9,M5B,,


In [11]:
# This is a loop function that will ultimately complete the created dataframe based on the requirements of the assignment

# Looping through the rows of the newly created dataframe

for row in range(df_unique.shape[0]):
    
    # Looping through the rows of the original dataframe
    
    for row_2 in range(df_copy.shape[0]):
        
        # If it contains the same postalcode then
        
        if df_unique['Postalcode'][row] == df_copy['Postalcode'][row_2]:
            
            # Neighbourhood inputs
            
            # If original value is empty then omit the empty value
            
            if df_unique['Neighbourhood'][row] == 'nan':
                
                df_unique['Neighbourhood'][row] = df_copy['Neighbourhood'][row_2]
            
            # If value is the same as the original content then retain value
            
            elif df_unique['Neighbourhood'][row] == df_copy['Neighbourhood'][row_2]:
                
                df_unique['Neighbourhood'][row] == df_unique['Neighbourhood'][row]
            
            # If value is different from the original, concatenate the strings
            
            else:
                
                df_unique['Neighbourhood'][row] = df_unique['Neighbourhood'][row] + ", " + df_copy['Neighbourhood'][row_2]
            
            # Borough Inputs
            
            # If original value is empty then omit the empty value
            
            if df_unique['Borough'][row] == 'nan':
                
                df_unique['Borough'][row] = df_copy['Borough'][row_2]
            
            # If value is the same as the original content then retain value
            
            elif df_unique['Borough'][row] == df_copy['Borough'][row_2]:
                
                df_unique['Borough'][row] == df_unique['Borough'][row] 
             
            # If value is different from the original, concatenate the strings
            
            else:
                
                df_unique['Borough'][row] = df_unique['Borough'][row] + ", " + df_copy['Borough'][row_2]

# Displaying the top 10 rows of the dataframe

df_unique.head(10)


Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [12]:
# Displaying the shape of the dataframe

df_unique.shape

(103, 3)

# After trying multiple times on extracting coordinates using Geocode syntax, it was unsuccessful and thus reverting to use of the CSV to gather the necessary data.

In [14]:
# Reading the csv file containing the coordinates and storing them into the variable "df_geospatial"

df_geospatial = pd.read_csv('Geospatial_Coordinates.csv')

# Reading the first 10 rows of the dataframe

df_geospatial.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [15]:
# Creating a copy of the Postalcode, Borough and Neighbourhood dataframe

df_unique_copy = df_unique

# Create a blank column to represent the Latitude and Longitude values

column_list = ['Postalcode','Borough','Neighbourhood','Latitude','Longitude']

df_unique_copy = df_unique_copy.reindex(columns=column_list)

# The new shape should have 5 columns to represent successful creation of the additional latitude and longitude columns

df_unique_copy.shape

(103, 5)

In [16]:
# Creation of a loop that will extract the correct latitude and longitude values based on postal code

for row in range(df_unique_copy.shape[0]):
    
    # Looping through the different values of latitude and longitude dataframe
    
    for row_2 in range(df_geospatial.shape[0]):
        
        # If the Postal code matches then get the corresponding latitude and longitude values
        
        if df_geospatial['Postal Code'][row_2] == df_unique_copy['Postalcode'][row]:
            
            # Copy the latitude and longitude values
            
            df_unique_copy['Latitude'][row] = df_geospatial['Latitude'][row_2]
            df_unique_copy['Longitude'][row] = df_geospatial['Longitude'][row_2]

# Display the top 10 rows

df_unique_copy.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [17]:
# Validate the correct size of 103 rows and 5 columns

df_unique_copy.shape

(103, 5)

# This section will now refer to exploring and clustering neighbourhoods in the created dataframe

In [18]:
# For this assignment, I will be clustering and exploring neighbourhoods encompassing Central Toronto

# Creation of the dataframe

df_cent_tor = df_unique_copy[df_unique_copy['Borough']=='Central Toronto']

# Dropping the original index and resetting

df_cent_tor.reset_index(drop=True, inplace=True)

# Displaying the first 10 rows of the newly created dataframe

df_cent_tor.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197
3,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307
4,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
5,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
6,M4S,Central Toronto,Davisville,43.704324,-79.38879
7,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
8,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [19]:
# Visualizing the different neighbourhoods in the Central Toronto throuhg folium

# Installing the folium system (package)

!conda install -c conda-forge folium=0.5.0 --yes

# Importing the folium library to allow mapping of geographical coordinates

import folium # map rendering library

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.1

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



In [20]:
# Before proceeding to creating the visuals for the neighbourhood, extract the coordinates of Central Toronto

# Installing the geopy system (package)

!conda install -c conda-forge geopy --yes 

# Importing the Nominatim function which allows creation of geocode object for extraction of latitude and longitude values

from geopy.geocoders import Nominatim 

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.1

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



In [21]:
# Creation of the Nominatim object for the Central Toronto coordinates

# Storing the address in a variable named 'complete_add'

complete_add = 'Central Toronto, Canada'

# Creating the object instance

GeoAddress = Nominatim(user_agent='toronto_explorer')

# Creation of the object for latitude and longitude coordinates

GeoCoordinates = GeoAddress.geocode(complete_add)

# Extraction of the latitude and longitude values

Add_lat = GeoCoordinates.latitude
Add_long = GeoCoordinates.longitude

# Printing the coordinates

print('The geograpical coordinate of Central Toronto, Canada are {}, {}.'.format(Add_lat, Add_long))

The geograpical coordinate of Central Toronto, Canada are 43.653963, -79.387207.


In [22]:
# Create a map of the neighbourhood of Central Toronto using geographical coordinates

Cent_tor_map = folium.Map(location=[Add_lat, Add_long], zoom_start = 12)

# Looping through the neighbourhoods and creating visuals for each map

for lat, lng, borough, neighbourhood in zip(df_cent_tor['Latitude'], df_cent_tor['Longitude'], df_cent_tor['Borough'], df_cent_tor['Neighbourhood']):
    
    # Create a label object containing the neighbourhood and borough name
    
    label = '{}, {}'.format(neighbourhood, borough)
    
    # Providing a pop-up setting for the created label object
    
    label = folium.Popup(label, parse_html=True)
    
    # Creating a circle market for each neighbourhood in Central Toronto
    
    folium.CircleMarker(
        
        # Latitude and Longitude values
        [lat, lng],
        # Radius of the circle
        radius=5,
        # Specifying the contents of the pop-up
        popup=label,
        # Defining the color of the marker
        color='green',
        # Specifying to color the inside of the circle
        fill=True,
        fill_opacity=0.7,
        parse_html=False).add_to(Cent_tor_map)  
    
Cent_tor_map

In [23]:
# Exploring the venues located in each neighbourhood of Central Toronto

# First, define the credentials required to use Foursquare API

# Foursquare ID

user_id = 'TUCQICNMB5ZXJYCKCD2SGD1A4WERIUMJUHB2O5QXAH00CENU' # your Foursquare ID

# Foursquare Password

user_secret = 'PWRSWNQ45HEAVNCMWXHLDYZSU4ID1O033G1YEW3DNDKVRC1T' # your Foursquare Secret

# Foursquare version

user_version = '20180605' # Foursquare API version

In [24]:
# Importing library that will allow calls to API

import requests 

# Importing library for handling json files

import json

# Importing function to convert json to pd dataframe

from pandas.io.json import json_normalize

# Creating a loop that will get the 10 venues per neighbourhood per borough

# Creating a blank list that will store information of each venue extracted based on user defined radius and limit

cent_tor_venues=[]

user_radius = 1000
user_limit = 10

# Loop through each row of the created dataframe for Central Toronto

for row in range(df_cent_tor.shape[0]):
    
    # Defining the needed latitude and longitude values
    
    user_lat = df_cent_tor['Latitude'][row]
    user_long = df_cent_tor['Longitude'][row]
    
    # Creating the URL to allow API calls
    
    api_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        user_id, user_secret, user_version, user_lat, user_long, user_radius, user_limit)
    
    # Making the call to the API
    
    extraction_results = requests.get(api_url).json()['response']['groups'][0]['items']
    
    # Appending only the relevant information on the blank list
    
    cent_tor_venues.append([(df_cent_tor['Postalcode'][row],
                             df_cent_tor['Neighbourhood'][row],
                             each_result['venue']['location']['lat'],
                             each_result['venue']['location']['lng'],
                             each_result['venue']['name'],
                             each_result['venue']['categories'][0]['name']) for each_result in extraction_results])

# Converting the list into a dataframe

closest_venues_cent_tor = pd.DataFrame([item for cent_tor_venues in cent_tor_venues for item in cent_tor_venues])
closest_venues_cent_tor.columns = ['Postalcode', 
                                  'Neighbourhood', 
                                  'Venue Latitude', 
                                  'Venue Longitude',
                                  'Venue Name',
                                  'Venue Category']

# Display the top 10 rows of the closest venues per neighbourhood
closest_venues_cent_tor.head(10)

Unnamed: 0,Postalcode,Neighbourhood,Venue Latitude,Venue Longitude,Venue Name,Venue Category
0,M4N,Lawrence Park,43.726963,-79.394382,Lawrence Park Ravine,Park
1,M4N,Lawrence Park,43.733043,-79.381986,Granite Club,Gym / Fitness Center
2,M4N,Lawrence Park,43.727324,-79.379563,Tim Hortons,Coffee Shop
3,M4N,Lawrence Park,43.727024,-79.378976,Glendon Bookstore,Bookstore
4,M4N,Lawrence Park,43.727226,-79.378413,Glendon Forest,Trail
5,M4N,Lawrence Park,43.727334,-79.378222,Glendon Rose Garden,College Quad
6,M4N,Lawrence Park,43.727311,-79.377835,Lunik Co-op,Café
7,M4N,Lawrence Park,43.728799,-79.376819,Glendon Athletic Club,College Gym
8,M5N,Roselawn,43.703688,-79.413485,The Abbot,Gastropub
9,M5N,Roselawn,43.703478,-79.414311,Hotel Gelato,Café


In [25]:
# Importing libraries needed for the clustering of the data

# Libraries necessary for plotting of relevant visuls

import matplotlib.cm as cm
import matplotlib.colors as colors

# Importing the KMeans clustering algorithm

from sklearn.cluster import KMeans

# Defining the number of clusters to be made

cluster_number = 5

# Create dummy variables for the different venue categories

cent_tor_venues = pd.get_dummies(closest_venues_cent_tor[['Venue Category']], prefix="", prefix_sep="")

# Combine the two dataframes created

cent_tor_cluster = pd.concat([closest_venues_cent_tor, cent_tor_venues], axis=1)

# Drop the original venue category prior to providing dummy variables

cent_tor_cluster.drop('Venue Category', axis=1, inplace=True)

# Group the dataframe based on their neighbourhood together with the frequency of each category

cent_tor_cluster_grouped = cent_tor_cluster.groupby('Neighbourhood').mean().reset_index()

# Since neighbourhoods have been grouped, dropping the neighbourhood column

cent_tor_cluster = cent_tor_cluster_grouped.drop('Neighbourhood', 1)

# Create the K-Means Clustering Statement

cent_tor_kMeans = KMeans(n_clusters=cluster_number, random_state=0).fit(cent_tor_cluster)

# check cluster labels generated for each row in the dataframe

cent_tor_kMeans.labels_[0:9] 

array([0, 1, 4, 1, 3, 2, 1, 1, 0], dtype=int32)

In [26]:
# Add the created cluster labels to the dataframe

cent_tor_cluster_grouped.insert(0, 'Cluster Labels', cent_tor_kMeans.labels_)

# Creating the final dataframe which is the Central Toronto Neighbourhoods and their cluster identifications

central_toronto = df_cent_tor.join(cent_tor_cluster_grouped.set_index('Neighbourhood'), on='Neighbourhood')

In [27]:
# Checking the final dataframe with each neighbourhood and their corresponding clusters

central_toronto.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Venue Latitude,Venue Longitude,American Restaurant,BBQ Joint,...,Seafood Restaurant,Skating Rink,Spa,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Trail,Vegetarian / Vegan Restaurant,Yoga Studio
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,43.728128,-79.380774,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936,1,43.703831,-79.41359,0.0,0.0,...,0.0,0.1,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197,1,43.711499,-79.396397,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.0,0.1,0.0,0.0,0.0
3,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307,1,43.700344,-79.412541,0.0,0.0,...,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.1,0.0,0.0
4,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,1,43.712566,-79.399542,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.1
5,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0,43.674971,-79.406916,0.1,0.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
6,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,43.704904,-79.389778,0.0,0.0,...,0.1,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0
7,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2,43.687792,-79.388843,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0
8,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049,4,43.68649,-79.395631,0.1,0.0,...,0.0,0.0,0.1,0.1,0.0,0.1,0.1,0.0,0.0,0.0


In [28]:
# Create a map of Central Toronto using the earlier declared latitude and longitude

Central_Toronto_Clusters = folium.Map(location=[Add_lat, Add_long], zoom_start=11)

# Provide the color scheme for the clusters

# This creates a range of color values depending on the size of the cluster
X = np.arange(cluster_number)
Y = [i + X + (i*X)**2 for i in range(cluster_number)]

# This creates the color schemes that will be used for the visualization of the map
Color_Scheme = cm.rainbow(np.linspace(0, 1, len(Y)))
Color_Array = [colors.rgb2hex(i) for i in Color_Scheme]

# This will create a marker for each neighbourhood and their color defined by their respective clusters
Cluster_Colors = []

# Looping through the rows of the dataframe

for Latitude, Longitude, Neighbourhood, Cluster in zip(central_toronto['Latitude'], central_toronto['Longitude'], central_toronto['Neighbourhood'], central_toronto['Cluster Labels']):
    
    # Creating a pop-up label that provides the neighbourhood name and their cluster
    
    label = folium.Popup(str(Neighbourhood) + ' Cluster ' + str(Cluster), parse_html=True)
    
    # Creating a circle marker for each data entry
    
    folium.CircleMarker(
        
        # The latitude and longitude of each entry
        
        [Latitude, Longitude],
        
        # The size of each marker
        
        radius=5,
        
        # The content of the pop-up
        
        popup=label,
        
        # The color of each marker
        
        color=Color_Array[Cluster-1],
        fill=True,
        fill_color=Color_Array[Cluster-1],
        fill_opacity=0.7).add_to(Central_Toronto_Clusters)
       
Central_Toronto_Clusters

# This Ends the Assignment. Thank you.