In [1]:
#Installing all the required libraries for this assignment
import numpy as np 
import pandas as pd
import json
#Using Beautiful Soup for Webscrapping
from bs4 import BeautifulSoup
import lxml
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs
# library for map rendering
!conda install -c conda-forge folium=0.5.0 --yes
import folium
#Library for getting longititude and latitude of an address
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
#For converting Json file into Dataframe
from pandas.io.json import json_normalize
# library for requests
import requests 

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    altair-3.1.0               |           py36_0         724 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be 

In [57]:
#downloading the data
link = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(link.text)
table=soup.find('table', attrs={'class':'wikitable sortable'})

In [58]:
#getting header of the table and removing <th> from the headings
columns = table.findAll('th')
for i, column in enumerate(columns): 
    columns[i]=str(columns[i]).replace("<th>","").replace("</th>","").replace("\n","")
columns,i

(['Postcode', 'Borough', 'Neighbourhood'], 2)

In [59]:
#getting rows of the table and removing <td> & <tr> from the headings
rows=table.findAll('tr')
rows=rows[1:len(rows)]
for i, row in enumerate(rows): 
    rows[i] = str(rows[i]).replace("\n</td></tr>","").replace("<tr>\n<td>","")

In [60]:
# creating canada_location dataframe, expand rows and drop the old one:
canada_location = pd.DataFrame(rows)
canada_location[columns] = canada_location[0].str.split("</td>\n<td>", n = 2, expand = True) 
canada_location.drop(columns=[0],inplace=True)
canada_location.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Parkwoods"" title=""Parkwoods"">Pa..."
3,M4A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Victoria_Village"" title=""Victor..."
4,M5A,"<a href=""/wiki/Downtown_Toronto"" title=""Downto...","<a href=""/wiki/Harbourfront_(Toronto)"" title=""..."


In [61]:
#skipping not assigned rows
canada_location = canada_location.drop(canada_location[(canada_location.Borough == "Not assigned")].index)
#canada_location.head(5)
# give "Not assigned" Neighborhoods same name as Borough:
canada_location.Neighbourhood.replace("Not assigned", canada_location.Borough, inplace=True)
canada_location.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Parkwoods"" title=""Parkwoods"">Pa..."
3,M4A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Victoria_Village"" title=""Victor..."
4,M5A,"<a href=""/wiki/Downtown_Toronto"" title=""Downto...","<a href=""/wiki/Harbourfront_(Toronto)"" title=""..."
5,M5A,"<a href=""/wiki/Downtown_Toronto"" title=""Downto...","<a href=""/wiki/Regent_Park"" title=""Regent Park..."
6,M6A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Lawrence_Heights"" title=""Lawren..."
7,M6A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Lawrence_Manor"" title=""Lawrence..."
8,M7A,"<a href=""/wiki/Queen%27s_Park_(Toronto)"" title...","<a href=""/wiki/Queen%27s_Park_(Toronto)"" title..."
10,M9A,"<a href=""/wiki/Etobicoke"" title=""Etobicoke"">Et...","<a class=""mw-redirect"" href=""/wiki/Islington_A..."
11,M1B,"<a href=""/wiki/Scarborough,_Toronto"" title=""Sc...","<a href=""/wiki/Rouge,_Toronto"" title=""Rouge, T..."
12,M1B,"<a href=""/wiki/Scarborough,_Toronto"" title=""Sc...","<a href=""/wiki/Malvern,_Toronto"" title=""Malver..."


In [62]:
# copy Borough value to Neighborhood if NaN:
canada_location.Neighbourhood.fillna(canada_location.Borough, inplace=True)
# drop duplicate rows:
canada_location=canada_location.drop_duplicates()
canada_location.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Parkwoods"" title=""Parkwoods"">Pa..."
3,M4A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Victoria_Village"" title=""Victor..."
4,M5A,"<a href=""/wiki/Downtown_Toronto"" title=""Downto...","<a href=""/wiki/Harbourfront_(Toronto)"" title=""..."
5,M5A,"<a href=""/wiki/Downtown_Toronto"" title=""Downto...","<a href=""/wiki/Regent_Park"" title=""Regent Park..."
6,M6A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Lawrence_Heights"" title=""Lawren..."
7,M6A,"<a href=""/wiki/North_York"" title=""North York"">...","<a href=""/wiki/Lawrence_Manor"" title=""Lawrence..."
8,M7A,"<a href=""/wiki/Queen%27s_Park_(Toronto)"" title...","<a href=""/wiki/Queen%27s_Park_(Toronto)"" title..."
10,M9A,"<a href=""/wiki/Etobicoke"" title=""Etobicoke"">Et...","<a class=""mw-redirect"" href=""/wiki/Islington_A..."
11,M1B,"<a href=""/wiki/Scarborough,_Toronto"" title=""Sc...","<a href=""/wiki/Rouge,_Toronto"" title=""Rouge, T..."
12,M1B,"<a href=""/wiki/Scarborough,_Toronto"" title=""Sc...","<a href=""/wiki/Malvern,_Toronto"" title=""Malver..."


In [63]:
# extract titles from columns
canada_location.update(canada_location.Neighbourhood.loc[lambda x: x.str.contains('title')].str.extract('title=\"([^\"]*)',expand=False))

canada_location.update(canada_location.Borough.loc[lambda x: x.str.contains('title')].str.extract('title=\"([^\"]*)',expand=False))

# delete Toronto annotation from Neighbourhood:
canada_location.update(canada_location.Neighbourhood.loc[lambda x: x.str.contains('Toronto')].str.replace(", Toronto",""))
canada_location.update(canada_location.Neighbourhood.loc[lambda x: x.str.contains('Toronto')].str.replace("\(Toronto\)",""))

In [64]:
canada_location.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park (Toronto),Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,"Scarborough, Toronto",Rouge
12,M1B,"Scarborough, Toronto",Malvern


In [65]:
#creating a new data frame named can_code and taking values from dataframe canada_location
#adding unique postal code and matching borough and neighbourhood to the new dataframe can_code for combining neighbourhoods with same post code
can_code = pd.DataFrame({'Postcode':canada_location.Postcode.unique()})
can_code['Borough']=pd.DataFrame(list(set(canada_location['Borough'].loc[canada_location['Postcode'] == x['Postcode']])) for i, x in can_code.iterrows())
can_code['Neighborhood']=pd.Series(list(set(canada_location['Neighbourhood'].loc[canada_location['Postcode'] == x['Postcode']])) for i, x in can_code.iterrows())
can_code['Neighborhood']=can_code['Neighborhood'].apply(lambda x: ', '.join(x))
can_code.rename(columns={'Postcode':'Postalcode'}, inplace=True)
can_code.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park (Toronto),Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,"Scarborough, Toronto","Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [66]:
#Not able to get the geographical coordinates of the neighborhoods using the Geocoder package 
#Using the link to the csv file given in the course module with coordinates of each postal code:
coardinates = pd.read_csv("http://cocl.us/Geospatial_data")
coardinates.rename(columns={'Postal Code':'Postalcode'}, inplace=True)
#resetting the index for dataframe coardinates and can_code before merging
coardinates.set_index("Postalcode")
can_code.set_index("Postalcode")
tor_address=pd.merge(can_code, coardinates)
tor_address.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park (Toronto),Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,"Scarborough, Toronto","Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [67]:
tor_address.shape

(103, 5)

In [68]:
#using the address of Toronto, Canada and getting the latitude and longitude using geolocator geocode
toronto = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(toronto)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, ON, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, ON, Canada are 43.653963, -79.387207.


In [69]:
# creating map instance of toronto using latitude and longitude values obtained by geolocator
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# addding markers in the map by iterating through the tor_address dataframe using borrough longititude and latitude
for lat, lng, borough, neighborhood in zip(tor_address['Latitude'], tor_address['Longitude'], tor_address['Borough'], tor_address['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map

In [70]:
#defining foursquare credentials to create url for data extraction
CLIENT_ID = 'TBIVDOUHRRM0XDL3LRES4ZRTH5YH5FWDZRU22IBH0MX4BQD4'
CLIENT_SECRET = 'VLSQH223MBWS2V2LWBCEM1XBVRKBE2GWAX3U241O05SS0XEW'
VERSION = '20190721'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TBIVDOUHRRM0XDL3LRES4ZRTH5YH5FWDZRU22IBH0MX4BQD4
CLIENT_SECRET:VLSQH223MBWS2V2LWBCEM1XBVRKBE2GWAX3U241O05SS0XEW


In [71]:
#creation function that will get venue and venue category using Foursquare credential for each neighbourhood in a radius of 500 with a limit of 100
def getNearbyVenues(names, latitudes, longitudes, radius=500, limit=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [72]:
toronto_venues = getNearbyVenues(names = tor_address['Neighborhood'],latitudes=tor_address['Latitude'],longitudes=tor_address['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront 
Lawrence Heights, Lawrence Manor
Queen's Park 
Islington Avenue
Malvern, Rouge
Don Mills North
Woodbine Gardens, Parkview Hill
Ryerson, Garden District
Glencairn
Princess Gardens, Islington, Martin Grove, West Deane Park, Cloverdale
Highland Creek , Rouge Hill, Port Union
Don Mills South, Flemingdon Park
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Old Burnhamthorpe, Bloordale Gardens, Markland Wood
Morningside, West Hill, Guildwood
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Downsview North, Bathurst Manor, Wilson Heights
Thorncliffe Park
Richmond, King, Adelaide
Dufferin, Dovercourt Village
Scarborough Village
Oriole, Henry Farm, Fairview
York University, Northwood Park
East Toronto
Harbourfront East, Union Station , Toronto Islands
Trinity–Bellwoods, Little Portugal
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
Downsview East, CFB

In [73]:
#displaying the number of rows and columns of table toronto_venues
toronto_venues.shape

(2268, 7)

In [74]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [75]:
#checking on how many venues were returned for each neighborhood
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Bathurst Quay, King and Spadina, Railway Lands, South Niagara, CN Tower, Harbourfront West, Island airport",16,16,16,16,16,16
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",25,25,25,25,25,25
Business Reply Mail Processing Centre 969 Eastern,18,18,18,18,18,18
"Cabbagetown, St. James Town",44,44,44,44,44,44
Caledonia-Fairbanks,6,6,6,6,6,6
Canada Post Gateway Processing Centre,11,11,11,11,11,11


In [76]:
#finding out unique values of venue categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 276 uniques categories.


In [77]:
#for unique values of venue category, we will use one hot coding into a new data frame called toronto_coded
# one hot encoding
toronto_coded = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_coded['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_coded.columns[-1]] + list(toronto_coded.columns[:-1])
toronto_coded = toronto_coded[fixed_columns]

toronto_coded.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
#analysis the columns and rows for toronto_coded
toronto_coded.shape

(2268, 276)

In [79]:
#taking mean frequency of each grouped category
toronto_groupcat = toronto_coded.groupby('Neighborhood').mean().reset_index()
toronto_groupcat

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
1,"Bathurst Quay, King and Spadina, Railway Lands...",0.000000,0.0,0.000000,0.0625,0.0625,0.0625,0.125,0.1875,0.125,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
2,Bayview Village,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
3,"Bedford Park, Lawrence Manor East",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
4,Berczy Park,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.017544,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
5,"Brockton, Parkdale Village, Exhibition Place",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
6,Business Reply Mail Processing Centre 969 Eastern,0.055556,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
7,"Cabbagetown, St. James Town",0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000
8,Caledonia-Fairbanks,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.166667
9,Canada Post Gateway Processing Centre,0.000000,0.0,0.000000,0.0000,0.0000,0.0000,0.000,0.0000,0.000,...,0.00,0.0,0.000000,0.000000,0.00,0.000000,0.0000,0.000000,0.000000,0.000000


In [80]:
#checking the number of columns and rows of new data frame "toronto_groupcat"
toronto_groupcat.shape

(100, 276)

In [81]:
#function to sort the venues
def common_venues(row, top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:top_venues]

top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venue = pd.DataFrame(columns=columns)
neighborhoods_venue['Neighborhood'] = toronto_groupcat['Neighborhood']

for ind in np.arange(toronto_groupcat.shape[0]):
    neighborhoods_venue.iloc[ind, 1:] = common_venues(toronto_groupcat.iloc[ind, :], top_venues)

neighborhoods_venue.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Sandwich Place,Lounge,Breakfast Spot,Skating Rink,Women's Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
1,"Bathurst Quay, King and Spadina, Railway Lands...",Airport Service,Airport Terminal,Airport Lounge,Coffee Shop,Sculpture Garden,Boat or Ferry,Bar,Boutique,Harbor / Marina,Airport Gate
2,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Women's Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
3,"Bedford Park, Lawrence Manor East",Juice Bar,Coffee Shop,Italian Restaurant,Pet Store,Cosmetics Shop,Liquor Store,Sandwich Place,Fast Food Restaurant,Butcher,Café
4,Berczy Park,Coffee Shop,Cocktail Bar,Steakhouse,Bakery,Seafood Restaurant,Beer Bar,Café,Cheese Shop,Farmers Market,Shopping Mall


In [82]:
# set number of clusters
k = 5

toronto_clustered = toronto_groupcat.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_clustered)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [84]:
# add clustering labels
neighborhoods_venue.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = tor_address

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venue.set_index('Neighborhood'), on='Neighborhood')



# check the last columns!
toronto_merged.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Fast Food Restaurant,Food & Drink Shop,Park,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Pizza Place,Coffee Shop,Hockey Arena,Portuguese Restaurant,Intersection,Electronics Store,Empanada Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Dumpling Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0.0,Coffee Shop,Bakery,Park,Café,Pub,Theater,Gym / Fitness Center,Breakfast Spot,Restaurant,Mexican Restaurant
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,0.0,Clothing Store,Furniture / Home Store,Coffee Shop,Miscellaneous Shop,Boutique,Athletics & Sports,Arts & Crafts Store,Sporting Goods Shop,Women's Store,Vietnamese Restaurant
4,M7A,Queen's Park (Toronto),Queen's Park,43.662301,-79.389494,0.0,Coffee Shop,Park,Gym,Yoga Studio,Seafood Restaurant,Burger Joint,Sandwich Place,Burrito Place,Café,Chinese Restaurant


In [89]:
# create map
toronto_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        #color=rainbow[cluster-1],
        fill=True,
        #fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_clusters)
       
toronto_clusters

In [90]:
#cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0.0,Fast Food Restaurant,Food & Drink Shop,Park,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,North York,0.0,Pizza Place,Coffee Shop,Hockey Arena,Portuguese Restaurant,Intersection,Electronics Store,Empanada Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Dumpling Restaurant
2,Downtown Toronto,0.0,Coffee Shop,Bakery,Park,Café,Pub,Theater,Gym / Fitness Center,Breakfast Spot,Restaurant,Mexican Restaurant
3,North York,0.0,Clothing Store,Furniture / Home Store,Coffee Shop,Miscellaneous Shop,Boutique,Athletics & Sports,Arts & Crafts Store,Sporting Goods Shop,Women's Store,Vietnamese Restaurant
4,Queen's Park (Toronto),0.0,Coffee Shop,Park,Gym,Yoga Studio,Seafood Restaurant,Burger Joint,Sandwich Place,Burrito Place,Café,Chinese Restaurant
6,"Scarborough, Toronto",0.0,Fast Food Restaurant,Print Shop,Drugstore,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
7,North York,0.0,Café,Gym / Fitness Center,Japanese Restaurant,Caribbean Restaurant,Baseball Field,Donut Shop,Discount Store,Dive Bar,Dog Run,Doner Restaurant
8,East York,0.0,Pizza Place,Fast Food Restaurant,Athletics & Sports,Gastropub,Intersection,Pharmacy,Café,Breakfast Spot,Bank,Gym / Fitness Center
9,Downtown Toronto,0.0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Fast Food Restaurant,Middle Eastern Restaurant,Theater,Italian Restaurant,Lingerie Store,Bookstore
10,North York,0.0,Pub,Park,Japanese Restaurant,Sushi Restaurant,Donut Shop,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant


In [91]:
#cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
53,North York,1.0,Baseball Field,Food Truck,Women's Store,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dim Sum Restaurant
57,North York,1.0,Baseball Field,Women's Store,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Dim Sum Restaurant


In [92]:
#cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,North York,2.0,Piano Bar,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Drugstore,Farmers Market


In [93]:
#cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Central Toronto,3.0,Garden,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Women's Store,Dessert Shop


In [94]:
#cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,"Scarborough, Toronto",4.0,Playground,Convenience Store,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop,Drugstore
83,Central Toronto,4.0,Playground,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Drugstore,Farmers Market
85,"Scarborough, Toronto",4.0,Playground,Park,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Drugstore
