In [2]:
# import all necessary libraries
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

#!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wikipage = requests.get(url)
wikipage.text[:100]

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title'

In [4]:
#Create a new pd DataFrame
toronto = pd.DataFrame()

#use beautifulsoup to read the wikipage
soup = BeautifulSoup(wikipage.text, 'lxml')
wikitable = soup.find_all('table')[0] 

row_marker = 0

for row in wikitable.find_all('tr'):
    column_marker = 0
    columns = row.find_all('td')
    for column in columns:
        toronto.loc[row_marker,column_marker] = column.get_text()
        column_marker += 1
    row_marker += 1

#rename column names
toronto.rename(columns={0:'PostalCode',1:'Borough',2:'Neighborhood'}, inplace=True)
#drop all rows with unassigned borough
toronto = toronto[toronto.Borough!='Not assigned']
toronto.reset_index(drop=True,inplace=True)

#create a new dataframe toronto_neighbor to combine all neighborhoods with the same postal code
toronto_neighbor= pd.DataFrame(columns = ['PostalCode','Borough','Neighborhood'])

for ii in range(toronto.shape[0]):
    borough = toronto.loc[ii,'Borough']
    postalcode = toronto.loc[ii,'PostalCode']
    neighborhood = toronto.loc[ii,'Neighborhood'][:-1]
    if neighborhood == 'Not assigned':
        neighborhood = borough
    
    if postalcode in toronto_neighbor.PostalCode.values:
        old_neighborhood = toronto_neighbor[toronto_neighbor.PostalCode == postalcode].Neighborhood
        new_value = (old_neighborhood + ',' + neighborhood)
        toronto_neighbor.loc[toronto_neighbor.PostalCode == postalcode,'Neighborhood'] = new_value 
    
    else:
        toronto_neighbor = toronto_neighbor.append({'PostalCode': postalcode,
                                                'Borough': borough,
                                                'Neighborhood': neighborhood,
                                               }, ignore_index=True)

toronto_neighbor

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [5]:
toronto_neighbor.shape

(103, 3)

In [6]:
# use geocoder to get coordinates
#!conda install -c conda-forge geocoder --yes

#import geocoder # import geocoder
#!wget -o geospatial.csv http://cocl.us/Geospatial_data
geospatial = pd.read_csv('Geospatial_Coordinates.csv')

for postalcode in toronto_neighbor['PostalCode']:
    ######################################
    # initialize your variable to None
    #lat_lng_coords = None
    #
    # loop until you get the coordinates   
    #while(lat_lng_coords is None):
    #    g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
    #    lat_lng_coords = g.latlng
    ####################################
    # geocoder doesn't work use the csv file
    latitude = geospatial.loc[toronto_neighbor.PostalCode == postalcode,'Latitude']
    longitude = geospatial.loc[toronto_neighbor.PostalCode == postalcode,'Longitude']
    
    toronto_neighbor.loc[toronto_neighbor.PostalCode == postalcode,'Latitude'] =  latitude 
    toronto_neighbor.loc[toronto_neighbor.PostalCode == postalcode,'Longitude'] =  longitude

toronto_neighbor

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.806686,-79.194353
1,M4A,North York,Victoria Village,43.784535,-79.160497
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.763573,-79.188711
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.770992,-79.216917
4,M7A,Queen's Park,Queen's Park,43.773136,-79.239476
5,M9A,Etobicoke,Islington Avenue,43.744734,-79.239476
6,M1B,Scarborough,"Rouge,Malvern",43.727929,-79.262029
7,M3B,North York,Don Mills North,43.711112,-79.284577
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.716316,-79.239476
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.692657,-79.264848


# Explore

In [7]:
##########################################
#######Explore############################
#########################################
# plot the Toronto neighborhoods
address = 'Toronto, Ontario'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_neighbor['Latitude'], toronto_neighbor['Longitude'], toronto_neighbor['Borough'], toronto_neighbor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto



# Explore Toronto Nearby: downtown

In [8]:
#Create a map of downtown toronto
downtown_data = pd.DataFrame()
downtown_data = toronto_neighbor[toronto_neighbor['Borough'] == 'Downtown Toronto'].reset_index(drop=True)

#My Foursquare Credentials 
CLIENT_ID = 'S1KVEF2AKPAYLU1ETMETRFO3R0FWOUJXWWGQ55TILVFOAOLY' # your Foursquare ID
CLIENT_SECRET = 'G1EAHHBTQRSOP4JVOIQBM4QV452XF5JZBLV04MV0CSPKJLAF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#first neighborhood(s) of my dataframe
neighborhood_latitude = downtown_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = downtown_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = downtown_data.loc[0, 'Neighborhood'] # neighborhood name

#Get the top 100 venues around this neighborhood with radius of 1000 meters
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

#send requests
results = requests.get(url).json()

In [9]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [10]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Chick-N-Joy,Fried Chicken Joint,43.768752,-79.187982
1,Bulk Barn,Food & Drink Shop,43.771342,-79.184341
2,LCBO,Liquor Store,43.771462,-79.184384
3,Booster Juice,Smoothie Shop,43.770668,-79.18415
4,Swiss Chalet Rotisserie & Grill,Pizza Place,43.767697,-79.189914


In [11]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

21 venues were returned by Foursquare.


In [12]:
# define a function to repeat the process in downtown
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
downtown_venues = getNearbyVenues(names=downtown_data['Neighborhood'],
                                   latitudes=downtown_data['Latitude'],
                                   longitudes=downtown_data['Longitude']
                                  )



Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown,St. James Town
First Canadian Place,Underground city
Church and Wellesley


# Analysis each neighborhood

In [14]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot.head()

Unnamed: 0,Neighborhood,Airport,American Restaurant,Auto Workshop,Bakery,Bar,Baseball Field,Beer Store,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Butcher,Cafeteria,Café,Chinese Restaurant,Coffee Shop,College Stadium,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Food,Food & Drink Shop,French Restaurant,Garden,Garden Center,Gastropub,General Entertainment,Gourmet Shop,Grocery Store,Gym,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Medical Center,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Recording Studio,Rental Car Location,Restaurant,Sandwich Place,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Social Club,Spa,Steakhouse,Supplement Shop,Sushi Restaurant,Tea Room,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Store,Wine Bar,Wings Joint
0,"Harbourfront,Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront,Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront,Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront,Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront,Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [15]:
# mean of frquence
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Airport,American Restaurant,Auto Workshop,Bakery,Bar,Baseball Field,Beer Store,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Butcher,Cafeteria,Café,Chinese Restaurant,Coffee Shop,College Stadium,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Food,Food & Drink Shop,French Restaurant,Garden,Garden Center,Gastropub,General Entertainment,Gourmet Shop,Grocery Store,Gym,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Medical Center,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Recording Studio,Rental Car Location,Restaurant,Sandwich Place,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Social Club,Spa,Steakhouse,Supplement Shop,Sushi Restaurant,Tea Room,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Store,Wine Bar,Wings Joint
0,"Adelaide,King,Richmond",0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.071429,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0
5,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.078947,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.026316,0.0,0.026316,0.0,0.0,0.026316,0.0,0.026316,0.026316,0.026316,0.0,0.026316,0.0,0.052632,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.078947,0.0,0.026316,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.052632,0.026316,0.0,0.026316,0.0,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.05,0.05,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0


In [75]:
##print topmost venues
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0      Airport  0.33
1         Park  0.33
2     Bus Stop  0.33
3          Pub  0.00
4  Pizza Place  0.00


----Berczy Park----
         venue  freq
0    Cafeteria   1.0
1      Airport   0.0
2          Pub   0.0
3  Pizza Place   0.0
4     Pharmacy   0.0


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
                venue  freq
0  Light Rail Station  0.11
1         Yoga Studio  0.06
2       Burrito Place  0.06
3                Park  0.06
4    Recording Studio  0.06


----Cabbagetown,St. James Town----
                 venue  freq
0          Pizza Place   0.5
1  Empanada Restaurant   0.5
2         Liquor Store   0.0
3             Pharmacy   0.0
4            Pet Store   0.0


----Central Bay Street----
           venue  freq
0  Grocery Store   0.2
1    Coffee Shop   0.2
2    Pizza Place   0.2
3       Pharmacy   0.2
4        Butcher   0.2


----Chinatown,Grange Park,Kensington

In [16]:
## Create a dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Airport,Construction & Landscaping,Park,Bus Stop,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store
1,Berczy Park,Cafeteria,Wings Joint,Construction & Landscaping,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Garden,Brewery,Light Rail Station,Pizza Place,Recording Studio,Restaurant,Skate Park,Smoke Shop,Burrito Place,Comic Shop
3,"Cabbagetown,St. James Town",Empanada Restaurant,Pizza Place,Garden,French Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner
4,Central Bay Street,Wine Bar,Pizza Place,Coffee Shop,Pharmacy,Food & Drink Shop,Diner,Construction & Landscaping,Garden,Convenience Store,Cosmetics Shop
5,"Chinatown,Grange Park,Kensington Market",Coffee Shop,Café,Pizza Place,Italian Restaurant,Sushi Restaurant,Indie Movie Theater,Latin American Restaurant,Gym,Grocery Store,Gourmet Shop
6,Christie,Park,Food & Drink Shop,Fast Food Restaurant,Bus Stop,Wings Joint,Electronics Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner
7,Church and Wellesley,Pizza Place,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Sandwich Place,Intersection,Discount Store,Cosmetics Shop,Curling Ice,Dessert Shop
8,"Commerce Court,Victoria Hotel",Playground,Gastropub,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store
9,"Design Exchange,Toronto Dominion Centre",Sandwich Place,Pet Store,Burrito Place,Fast Food Restaurant,Gym,Ice Cream Shop,Italian Restaurant,Liquor Store,Coffee Shop,Movie Theater


# Clustering neighbors

In [17]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 0, 4, 0, 0, 0, 0, 2, 0], dtype=int32)

In [18]:
# create a new dataframe
downtown_merged = downtown_data

# add clustering labels
downtown_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.763573,-79.188711,0,Pizza Place,Breakfast Spot,Rental Car Location,Mexican Restaurant,Medical Center,Electronics Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Fish & Chips Shop
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.692657,-79.264848,3,General Entertainment,Skating Rink,Café,College Stadium,Bakery,Bar,Cosmetics Shop,Curling Ice,Dessert Shop,Diner
2,M5C,Downtown Toronto,St. James Town,43.799525,-79.318389,0,Fast Food Restaurant,Chinese Restaurant,Pizza Place,American Restaurant,Thrift / Vintage Store,Pharmacy,Grocery Store,Coffee Shop,Breakfast Spot,Electronics Store
3,M5E,Downtown Toronto,Berczy Park,43.75749,-79.374714,4,Cafeteria,Wings Joint,Construction & Landscaping,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
4,M5G,Downtown Toronto,Central Bay Street,43.782736,-79.442259,0,Wine Bar,Pizza Place,Coffee Shop,Pharmacy,Food & Drink Shop,Diner,Construction & Landscaping,Garden,Convenience Store,Cosmetics Shop


In [19]:
# map visualization
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [20]:
#Cluster 1
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Harbourfront,Regent Park",0,Pizza Place,Breakfast Spot,Rental Car Location,Mexican Restaurant,Medical Center,Electronics Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Fish & Chips Shop
2,St. James Town,0,Fast Food Restaurant,Chinese Restaurant,Pizza Place,American Restaurant,Thrift / Vintage Store,Pharmacy,Grocery Store,Coffee Shop,Breakfast Spot,Electronics Store
4,Central Bay Street,0,Wine Bar,Pizza Place,Coffee Shop,Pharmacy,Food & Drink Shop,Diner,Construction & Landscaping,Garden,Convenience Store,Cosmetics Shop
5,Christie,0,Park,Food & Drink Shop,Fast Food Restaurant,Bus Stop,Wings Joint,Electronics Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner
6,"Adelaide,King,Richmond",0,Airport,Construction & Landscaping,Park,Bus Stop,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store
7,"Harbourfront East,Toronto Islands,Union Station",0,Park,Skating Rink,Beer Store,Cosmetics Shop,Curling Ice,Spa,Intersection,Video Store,Farmers Market,Fast Food Restaurant
9,"Commerce Court,Victoria Hotel",0,Playground,Gastropub,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store
11,"Chinatown,Grange Park,Kensington Market",0,Coffee Shop,Café,Pizza Place,Italian Restaurant,Sushi Restaurant,Indie Movie Theater,Latin American Restaurant,Gym,Grocery Store,Gourmet Shop
12,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0,Garden,Brewery,Light Rail Station,Pizza Place,Recording Studio,Restaurant,Skate Park,Smoke Shop,Burrito Place,Comic Shop
13,Rosedale,0,Baseball Field,Wings Joint,Falafel Restaurant,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant


In [21]:
#CLuster 2
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,"Harbord,University of Toronto",1,Restaurant,Sandwich Place,Fast Food Restaurant,Discount Store,Wings Joint,Electronics Store,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop
14,Stn A PO Boxes 25 The Esplanade,1,Wings Joint,Fast Food Restaurant,Discount Store,Sandwich Place,Burrito Place,Burger Joint,Social Club,Convenience Store,Supplement Shop,Gym


In [22]:
#CLuster 3
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Design Exchange,Toronto Dominion Centre",2,Sandwich Place,Pet Store,Burrito Place,Fast Food Restaurant,Gym,Ice Cream Shop,Italian Restaurant,Liquor Store,Coffee Shop,Movie Theater


In [23]:
#CLuster 4
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Ryerson,Garden District",3,General Entertainment,Skating Rink,Café,College Stadium,Bakery,Bar,Cosmetics Shop,Curling Ice,Dessert Shop,Diner


In [24]:
#CLuster 5
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Berczy Park,4,Cafeteria,Wings Joint,Construction & Landscaping,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
