In [4]:
!pip install geocoder
!pip install folium
!pip install BeautifulSoup4
!pip install lxml
!pip install geopy
!pip install pip requests



In [5]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


#### Data Extraction

In [6]:
wikilink = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(wikilink.text,'lxml')

In [7]:
data = []
columns = []
table = soup.find(class_='wikitable')

for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
        
    if (index==0):
        columns = section
    else:
        data.append(section)

# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

can_df = pd.DataFrame(data=data, columns=columns)
can_df.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [8]:
can_df = can_df[can_df['Borough'] != 'Not assigned']
can_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [9]:
link = 'https://cocl.us/Geospatial_data'

In [10]:
postal_df = pd.read_csv(link)
postal_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
can_df['Latitude'] = postal_df['Latitude'].values
can_df['Longitude'] = postal_df['Longitude'].values

can_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.806686,-79.194353
3,M4A,North York,Victoria Village,43.784535,-79.160497
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.770992,-79.216917
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.773136,-79.239476


In [12]:
can_df.drop('Postal Code', axis=1, inplace=True)
can_df.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
2,North York,Parkwoods,43.806686,-79.194353
3,North York,Victoria Village,43.784535,-79.160497
4,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711
5,North York,"Lawrence Manor, Lawrence Heights",43.770992,-79.216917
6,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.773136,-79.239476


In [13]:
can_df.dtypes

Borough           object
Neighbourhood     object
Latitude         float64
Longitude        float64
dtype: object

In [14]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(can_df['Borough'].unique()),
        can_df.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [15]:
address = 'North York, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York 43.7543263, -79.44911696639593.


In [16]:
# create map of New York using latitude and longitude values
northyork_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(can_df['Latitude'], can_df['Longitude'], can_df['Borough'], can_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(northyork_map)  
    
northyork_map

In [17]:
DwnTwn_df = can_df[can_df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
DwnTwn_df.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.773136,-79.239476
2,Downtown Toronto,"Garden District, Ryerson",43.692657,-79.264848
3,Downtown Toronto,St. James Town,43.799525,-79.318389
4,Downtown Toronto,Berczy Park,43.75749,-79.374714


In [18]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6541737, -79.38081164513409.


In [19]:
# create map of Manhattan using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(DwnTwn_df['Latitude'], DwnTwn_df['Longitude'], DwnTwn_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown# create map of Manhattan using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(DwnTwn_df['Latitude'], DwnTwn_df['Longitude'], DwnTwn_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

In [20]:
CLIENT_ID = 'EHJQ0KP1LGZHEIAGKB0DBTUB0YY1FSZM1H4NIMJPXCGRQUO4' # your Foursquare ID
CLIENT_SECRET = 'CUYC4DUPSSARMCGTIGQLQU5GJABBHTYMMGTDB014LBEV5VSX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EHJQ0KP1LGZHEIAGKB0DBTUB0YY1FSZM1H4NIMJPXCGRQUO4
CLIENT_SECRET:CUYC4DUPSSARMCGTIGQLQU5GJABBHTYMMGTDB014LBEV5VSX


In [21]:
DwnTwn_df.loc[0, 'Neighbourhood']

'Regent Park, Harbourfront'

In [22]:
neighborhood_latitude = DwnTwn_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = DwnTwn_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = DwnTwn_df.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park, Harbourfront are 43.7635726, -79.1887115.


In [23]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=EHJQ0KP1LGZHEIAGKB0DBTUB0YY1FSZM1H4NIMJPXCGRQUO4&client_secret=CUYC4DUPSSARMCGTIGQLQU5GJABBHTYMMGTDB014LBEV5VSX&v=20180605&ll=43.7635726,-79.1887115&radius=500&limit=100'

In [24]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f18133de9e5125d4d9bf7db'},
 'response': {'headerLocation': 'Scarborough Village',
  'headerFullLocation': 'Scarborough Village, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 8,
  'suggestedBounds': {'ne': {'lat': 43.768072604500006,
    'lng': -79.18249216787879},
   'sw': {'lat': 43.7590725955, 'lng': -79.1949308321212}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4beee041e24d20a1cd857314',
       'name': 'RBC Royal Bank',
       'location': {'address': '4374 KINGSTON RD',
        'crossStreet': 'Kingston & Lawrence',
        'lat': 43.76678992471017,
        'lng': -79.19115118872593,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.76678992471017,
          'lng': -79.1911511887

In [25]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [26]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,RBC Royal Bank,Bank,43.76679,-79.191151
1,G & G Electronics,Electronics Store,43.765309,-79.191537
2,Sail Sushi,Restaurant,43.765951,-79.191275
3,Big Bite Burrito,Mexican Restaurant,43.766299,-79.19072
4,Enterprise Rent-A-Car,Rental Car Location,43.764076,-79.193406


In [27]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

8 venues were returned by Foursquare.


In [28]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [29]:
DwnTwnVenues = getNearbyVenues(names=DwnTwn_df['Neighbourhood'],
                                   latitudes=DwnTwn_df['Latitude'],
                                   longitudes=DwnTwn_df['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [30]:
print(DwnTwnVenues.shape)
DwnTwnVenues.head()

(160, 7)


Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
1,"Regent Park, Harbourfront",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
2,"Regent Park, Harbourfront",43.763573,-79.188711,Sail Sushi,43.765951,-79.191275,Restaurant
3,"Regent Park, Harbourfront",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
4,"Regent Park, Harbourfront",43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location


In [31]:
DwnTwnVenues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",19,19,19,19,19,19
Central Bay Street,6,6,6,6,6,6
Christie,2,2,2,2,2,2
Church and Wellesley,8,8,8,8,8,8
"Commerce Court, Victoria Hotel",1,1,1,1,1,1
"First Canadian Place, Underground city",2,2,2,2,2,2
"Garden District, Ryerson",4,4,4,4,4,4
"Harbourfront East, Union Station, Toronto Islands",9,9,9,9,9,9
"Kensington Market, Chinatown, Grange Park",35,35,35,35,35,35
"Queen's Park, Ontario Provincial Government",8,8,8,8,8,8


In [32]:
print('There are {} uniques categories.'.format(len(DwnTwnVenues['Venue Category'].unique())))

There are 89 uniques categories.


In [33]:
# one hot encoding
dwntwn_onehot = pd.get_dummies(DwnTwnVenues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dwntwn_onehot['Neighbourhood'] = DwnTwnVenues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [dwntwn_onehot.columns[-1]] + list(dwntwn_onehot.columns[:-1])
dwntwn_onehot = dwntwn_onehot[fixed_columns]

dwntwn_onehot.head()

Unnamed: 0,Neighbourhood,Airport,Athletics & Sports,Auto Workshop,Bakery,Bank,Bar,Baseball Field,Beer Store,Board Shop,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Butcher,Café,Camera Store,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Comic Shop,Convenience Store,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Food & Drink Shop,Food Service,French Restaurant,Fried Chicken Joint,Garden,Garden Center,Gas Station,General Entertainment,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Hardware Store,Health Food Store,Home Service,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Kids Store,Latin American Restaurant,Light Rail Station,Liquor Store,Medical Center,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Other Repair Shop,Park,Pet Store,Pharmacy,Pizza Place,Pub,Recording Studio,Rental Car Location,Restaurant,Sandwich Place,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Spa,Steakhouse,Supplement Shop,Sushi Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Thrift / Vintage Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Wings Joint,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [34]:
dwntwn_onehot.shape

(160, 90)

In [35]:
dwntwn_grouped = dwntwn_onehot.groupby('Neighbourhood').mean().reset_index()
dwntwn_grouped

Unnamed: 0,Neighbourhood,Airport,Athletics & Sports,Auto Workshop,Bakery,Bank,Bar,Baseball Field,Beer Store,Board Shop,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Butcher,Café,Camera Store,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Comic Shop,Convenience Store,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Food & Drink Shop,Food Service,French Restaurant,Fried Chicken Joint,Garden,Garden Center,Gas Station,General Entertainment,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Hardware Store,Health Food Store,Home Service,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Kids Store,Latin American Restaurant,Light Rail Station,Liquor Store,Medical Center,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Other Repair Shop,Park,Pet Store,Pharmacy,Pizza Place,Pub,Recording Studio,Rental Car Location,Restaurant,Sandwich Place,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Spa,Steakhouse,Supplement Shop,Sushi Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Thrift / Vintage Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Wings Joint,Yoga Studio
0,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Central Bay Street,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5,"First Canadian Place, Underground city",0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Harbourfront East, Union Station, Toronto Islands",0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.085714,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.0,0.057143,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.057143,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.057143,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571
9,"Queen's Park, Ontario Provincial Government",0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
dwntwn_grouped.shape

(18, 90)

In [37]:
num_top_venues = 5

for hood in dwntwn_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = dwntwn_grouped[dwntwn_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0  Light Rail Station  0.11
1          Restaurant  0.05
2       Burrito Place  0.05
3      Farmers Market  0.05
4    Recording Studio  0.05


----Central Bay Street----
           venue  freq
0    Pizza Place  0.17
1       Pharmacy  0.17
2  Grocery Store  0.17
3           Bank  0.17
4    Coffee Shop  0.17


----Christie----
               venue  freq
0  Food & Drink Shop   0.5
1               Park   0.5
2        Pizza Place   0.0
3          Pet Store   0.0
4  Other Repair Shop   0.0


----Church and Wellesley----
                venue  freq
0         Pizza Place  0.25
1      Discount Store  0.12
2         Coffee Shop  0.12
3  Chinese Restaurant  0.12
4        Intersection  0.12


----Commerce Court, Victoria Hotel----
                venue  freq
0               Trail   1.0
1             Airport   0.0
2  Light Rail Station   0.0
3           Pet S

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = dwntwn_grouped['Neighbourhood']

for ind in np.arange(dwntwn_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dwntwn_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"CN Tower, King and Spadina, Railway Lands, Har...",Light Rail Station,Garden,Brewery,Park,Garden Center,Pizza Place,Butcher,Recording Studio,Restaurant,Burrito Place
1,Central Bay Street,Pharmacy,Bank,Pizza Place,Coffee Shop,Butcher,Grocery Store,Bar,Baseball Field,Dance Studio,Dessert Shop
2,Christie,Park,Food & Drink Shop,Yoga Studio,Farmers Market,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store
3,Church and Wellesley,Pizza Place,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Sandwich Place,Intersection,Discount Store,Electronics Store,Curling Ice,Dance Studio
4,"Commerce Court, Victoria Hotel",Trail,Yoga Studio,Fast Food Restaurant,Convenience Store,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store


In [40]:
# set number of clusters
kclusters = 5

dwntwn_grouped_clustering = dwntwn_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dwntwn_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 4, 0, 2, 1, 0, 0, 0, 0], dtype=int32)

In [41]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dwntwn_merged = DwnTwn_df

dwntwn_merged = dwntwn_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

dwntwn_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711,0.0,Mexican Restaurant,Medical Center,Bank,Rental Car Location,Restaurant,Breakfast Spot,Electronics Store,Intersection,Yoga Studio,Falafel Restaurant
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.773136,-79.239476,0.0,Athletics & Sports,Gas Station,Bakery,Bank,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Yoga Studio,Farmers Market
2,Downtown Toronto,"Garden District, Ryerson",43.692657,-79.264848,0.0,College Stadium,General Entertainment,Café,Skating Rink,Falafel Restaurant,Convenience Store,Curling Ice,Dance Studio,Dessert Shop,Diner
3,Downtown Toronto,St. James Town,43.799525,-79.318389,0.0,Grocery Store,Fast Food Restaurant,Chinese Restaurant,Breakfast Spot,Coffee Shop,Pizza Place,Camera Store,Sandwich Place,Pharmacy,Bank
4,Downtown Toronto,Berczy Park,43.75749,-79.374714,,,,,,,,,,,


In [53]:
dwntwn_merged = pd.DataFrame(columns=['Cluster Labels'])
dwntwn_merged['Cluster Labels'] = dwntwn_merged['Cluster Labels'].astype(float)
dwntwn_merged

AttributeError: module 'pandas' has no attribute 'dwntwn_merged'

In [51]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dwntwn_merged['Latitude'], dwntwn_merged['Longitude'], dwntwn_merged['Neighbourhood'], dwntwn_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

TypeError: list indices must be integers or slices, not float

In [45]:
dwntwn_merged.loc[dwntwn_merged['Cluster Labels'] == 0, dwntwn_merged.columns[[1] + list(range(5, dwntwn_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",Mexican Restaurant,Medical Center,Bank,Rental Car Location,Restaurant,Breakfast Spot,Electronics Store,Intersection,Yoga Studio,Falafel Restaurant
1,"Queen's Park, Ontario Provincial Government",Athletics & Sports,Gas Station,Bakery,Bank,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Yoga Studio,Farmers Market
2,"Garden District, Ryerson",College Stadium,General Entertainment,Café,Skating Rink,Falafel Restaurant,Convenience Store,Curling Ice,Dance Studio,Dessert Shop,Diner
3,St. James Town,Grocery Store,Fast Food Restaurant,Chinese Restaurant,Breakfast Spot,Coffee Shop,Pizza Place,Camera Store,Sandwich Place,Pharmacy,Bank
5,Central Bay Street,Pharmacy,Bank,Pizza Place,Coffee Shop,Butcher,Grocery Store,Bar,Baseball Field,Dance Studio,Dessert Shop
7,"Richmond, Adelaide, King",Airport,Park,Other Repair Shop,Comic Shop,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store
8,"Harbourfront East, Union Station, Toronto Islands",Pharmacy,Beer Store,Curling Ice,Dance Studio,Bus Stop,Skating Rink,Spa,Park,Athletics & Sports,Food Service
9,"Toronto Dominion Centre, Design Exchange",Park,Burrito Place,Food & Drink Shop,Fish & Chips Shop,Fast Food Restaurant,Ice Cream Shop,Italian Restaurant,Liquor Store,Coffee Shop,Movie Theater
11,"University of Toronto, Harbord",Turkish Restaurant,Sandwich Place,Discount Store,Skating Rink,Yoga Studio,Falafel Restaurant,Convenience Store,Curling Ice,Dance Studio,Dessert Shop
12,"Kensington Market, Chinatown, Grange Park",Café,Coffee Shop,Pizza Place,Italian Restaurant,Sushi Restaurant,Pub,Fish & Chips Shop,Latin American Restaurant,Indie Movie Theater,Health Food Store


In [46]:
dwntwn_merged.loc[dwntwn_merged['Cluster Labels'] == 1, dwntwn_merged.columns[[1] + list(range(5, dwntwn_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Rosedale,Baseball Field,Yoga Studio,Fast Food Restaurant,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store,Falafel Restaurant
17,"First Canadian Place, Underground city",Baseball Field,Food Service,Yoga Studio,Fast Food Restaurant,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store


In [47]:
dwntwn_merged.loc[dwntwn_merged['Cluster Labels'] == 2, dwntwn_merged.columns[[1] + list(range(5, dwntwn_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,"Commerce Court, Victoria Hotel",Trail,Yoga Studio,Fast Food Restaurant,Convenience Store,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store


In [48]:
dwntwn_merged.loc[dwntwn_merged['Cluster Labels'] == 4, dwntwn_merged.columns[[1] + list(range(5, dwntwn_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Christie,Park,Food & Drink Shop,Yoga Studio,Farmers Market,Curling Ice,Dance Studio,Dessert Shop,Diner,Discount Store,Electronics Store
