In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0           conda-forge
    geopy:          

## I imported the data and transformed the wikitable to a pandas data frame and then removed all 'Not assigned" Boroughs

In [10]:
import pandas as pd

tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0)

headings = ['Postal Code', 'Borough', 'Neighborhood']
for table in tables:
    current_headings = table.columns.values[:3]
    if list(current_headings) == headings:
        break
    
table= table[table.Borough != 'Not assigned']

## This is the result

In [11]:
table.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## The index needs to be reset and the old one removed

In [12]:
table.reset_index(drop=True)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [13]:
table.shape

(103, 3)

## Imported CSV with latitude and longitude data

In [14]:
latlon = pd.read_csv("http://cocl.us/Geospatial_data") 

In [16]:
latlon.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Now to merge the two data frames

In [17]:
table2= pd.merge(table, latlon, on='Postal Code')
table2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [18]:
table2.shape

(103, 5)

## I am only going to look at Boroughs with the word Toronto in them

In [19]:
Toronto_data= table2[table2['Borough'].str.contains("Toronto")].reset_index(drop=True)
Toronto_data

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


## Import FourSquare Credentials

In [21]:
CLIENT_ID = 'YIFS4JFAEOP2OXJWHWQONYL10MAELMJH1550YKJ3WYGOANX0' # your Foursquare ID
CLIENT_SECRET = '15YKARKAMWSXYH4WLRXSYACWMYZMGNL0ZTNLYABIYX1VTAG4' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YIFS4JFAEOP2OXJWHWQONYL10MAELMJH1550YKJ3WYGOANX0
CLIENT_SECRET:15YKARKAMWSXYH4WLRXSYACWMYZMGNL0ZTNLYABIYX1VTAG4


## Just Experimenting with One Burough, The first Postal Code, I am going to Analyze the "Boroughs" instead of "Neighborhood" becuase I think it makes more sense 

In [27]:
Toronto_data.loc[0,'Borough']

'Downtown Toronto'

In [29]:
Borough_latitude = Toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
Borough_longitude = Toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

Borough_name = Toronto_data.loc[0, 'Borough'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(Borough_name, 
                                                               Borough_latitude, 
                                                               Borough_longitude))

Latitude and longitude values of Downtown Toronto are 43.6542599, -79.3606359.


## I set the limit to 10 because I think 100 is not realistic for someone acutally looking to visit a place

In [32]:
LIMIT= 10

radius=500

url1='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={}.,.{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,VERSION, Borough_latitude, Borough_longitude, radius, LIMIT)
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    Borough_latitude, 
    Borough_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=YIFS4JFAEOP2OXJWHWQONYL10MAELMJH1550YKJ3WYGOANX0&client_secret=15YKARKAMWSXYH4WLRXSYACWMYZMGNL0ZTNLYABIYX1VTAG4&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=10'

In [33]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ef0e5553907e70023a67b0d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 44,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

## Getting the Venue info

In [34]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [36]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Dominion Pub and Kitchen,Pub,43.656919,-79.358967
5,Corktown Common,Park,43.655618,-79.356211
6,Impact Kitchen,Restaurant,43.656369,-79.35698
7,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149
8,The Extension Room,Gym / Fitness Center,43.653313,-79.359725
9,The Distillery Historic District,Historic Site,43.650244,-79.359323


In [37]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

10 venues were returned by Foursquare.


In [38]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [40]:
Toronto_venues=getNearbyVenues(names=Toronto_data['Borough'],
                                   latitudes=Toronto_data['Latitude'],
                                   longitudes=Toronto_data['Longitude']
                                  )

Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Downtown Toronto
West Toronto
Central Toronto
Downtown Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto


In [41]:
Toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,58,58,58,58,58,58
Downtown Toronto,184,184,184,184,184,184
East Toronto,44,44,44,44,44,44
West Toronto,60,60,60,60,60,60


In [42]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 119 uniques categories.


## Looking at Venue info by Borough

In [43]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Borough'] = Toronto_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Borough,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Bakery,Bank,Bar,Beer Bar,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comic Shop,Concert Hall,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Department Store,Dessert Shop,Diner,Distribution Center,Dog Run,Eastern European Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,Gift Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Korean Restaurant,Lake,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Pub,Ramen Restaurant,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Skate Park,Skating Rink,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Swim School,Tea Room,Tennis Court,Thai Restaurant,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Wine Bar,Yoga Studio
0,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [44]:
Toronto_onehot.shape

(346, 120)

In [45]:
Toronto_grouped = Toronto_onehot.groupby('Borough').mean().reset_index()
Toronto_grouped

Unnamed: 0,Borough,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Bakery,Bank,Bar,Beer Bar,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comic Shop,Concert Hall,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Department Store,Dessert Shop,Diner,Distribution Center,Dog Run,Eastern European Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,Gift Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Korean Restaurant,Lake,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Pub,Ramen Restaurant,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Skate Park,Skating Rink,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Swim School,Tea Room,Tennis Court,Thai Restaurant,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Wine Bar,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.017241,0.051724,0.0,0.0,0.017241,0.017241,0.0,0.086207,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.034483,0.017241,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.017241,0.0,0.0,0.0,0.017241,0.017241,0.0,0.034483,0.017241,0.0,0.017241,0.0,0.0,0.017241,0.017241,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086207,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.0,0.034483,0.0,0.017241,0.017241,0.017241,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.017241,0.051724,0.017241,0.0,0.017241,0.0,0.0,0.034483,0.017241,0.0,0.017241
1,Downtown Toronto,0.005435,0.005435,0.005435,0.01087,0.005435,0.0,0.0,0.0,0.005435,0.0,0.0,0.0,0.038043,0.0,0.01087,0.021739,0.01087,0.016304,0.0,0.01087,0.0,0.005435,0.0,0.070652,0.005435,0.005435,0.0,0.005435,0.01087,0.081522,0.005435,0.016304,0.005435,0.01087,0.0,0.005435,0.0,0.016304,0.01087,0.01087,0.0,0.0,0.005435,0.0,0.0,0.0,0.0,0.0,0.01087,0.005435,0.01087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005435,0.0,0.0,0.01087,0.021739,0.027174,0.005435,0.0,0.005435,0.0,0.016304,0.0,0.005435,0.032609,0.021739,0.005435,0.0,0.005435,0.005435,0.01087,0.01087,0.005435,0.0,0.016304,0.005435,0.01087,0.0,0.005435,0.043478,0.005435,0.0,0.0,0.016304,0.005435,0.005435,0.016304,0.016304,0.01087,0.070652,0.005435,0.0,0.0,0.0,0.0,0.005435,0.01087,0.005435,0.005435,0.0,0.01087,0.0,0.016304,0.0,0.016304,0.0,0.01087,0.005435,0.005435,0.016304,0.005435,0.01087
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.022727,0.0,0.068182,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.022727,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.045455,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.068182,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0,0.090909,0.0,0.045455,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.022727,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.045455,0.0,0.022727,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727
3,West Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.016667,0.016667,0.016667,0.0,0.0,0.05,0.016667,0.05,0.0,0.016667,0.016667,0.033333,0.0,0.0,0.016667,0.0,0.05,0.0,0.0,0.0,0.0,0.016667,0.066667,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.016667,0.0,0.0,0.016667,0.016667,0.0,0.0,0.016667,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.016667,0.0,0.0,0.033333,0.0,0.016667,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.066667,0.0,0.0,0.016667,0.0,0.0,0.0,0.016667,0.0,0.016667,0.0,0.033333,0.0,0.016667,0.0,0.016667,0.0,0.016667,0.016667,0.016667,0.0,0.0,0.0,0.016667,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.033333,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.016667,0.0


In [46]:
Toronto_grouped.shape

(4, 120)

## Looking at Top 5 Venues for Each Borough

In [47]:
num_top_venues = 5

for hood in Toronto_grouped['Borough']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
              venue  freq
0              Park  0.09
1       Coffee Shop  0.09
2              Café  0.05
3  Sushi Restaurant  0.05
4             Trail  0.03


----Downtown Toronto----
         venue  freq
0  Coffee Shop  0.08
1         Café  0.07
2   Restaurant  0.07
3         Park  0.04
4       Bakery  0.04


----East Toronto----
                  venue  freq
0        Ice Cream Shop  0.09
1      Greek Restaurant  0.07
2               Brewery  0.07
3                   Pub  0.05
4  Fast Food Restaurant  0.05


----West Toronto----
                venue  freq
0  Italian Restaurant  0.07
1         Coffee Shop  0.07
2              Bakery  0.05
3                Café  0.05
4                 Bar  0.05




In [48]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [50]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Borough_venues_sorted = pd.DataFrame(columns=columns)
Borough_venues_sorted['Borough'] = Toronto_grouped['Borough']

for ind in np.arange(Toronto_grouped.shape[0]):
    Borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

Borough_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Park,Coffee Shop,Café,Sushi Restaurant,Indian Restaurant,Trail,Dessert Shop,Restaurant,Breakfast Spot,Bus Line
1,Downtown Toronto,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
2,East Toronto,Ice Cream Shop,Brewery,Greek Restaurant,Fast Food Restaurant,Pub,Italian Restaurant,Neighborhood,Yoga Studio,Health Food Store,Pizza Place
3,West Toronto,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant


## Since I am analyzing based on Borough I am only going to create 3 clusters since there are only 4 Boroughs

In [70]:
# set number of clusters
kclusters = 3

Toronto_grouped_clustering = Toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 1, 2], dtype=int32)

In [71]:
# add clustering labels
Borough_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = Toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(Borough_venues_sorted.set_index('Borough'), on='Borough')

Toronto_merged.head() # check the last columns!

ValueError: cannot insert Cluster Labels, already exists

## Map of Toronto with the Clusters from the Toronto Boroughs

In [72]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Borough'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Cluster 1

In [74]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(3, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,43.65426,-79.360636,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
1,Downtown Toronto,43.662301,-79.389494,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
2,Downtown Toronto,43.657162,-79.378937,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
3,Downtown Toronto,43.651494,-79.375418,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
5,Downtown Toronto,43.644771,-79.373306,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
6,Downtown Toronto,43.657952,-79.387383,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
7,Downtown Toronto,43.669542,-79.422564,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
8,Downtown Toronto,43.650571,-79.384568,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
10,Downtown Toronto,43.640816,-79.381752,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant
13,Downtown Toronto,43.647177,-79.381576,0,Coffee Shop,Restaurant,Café,Park,Bakery,Italian Restaurant,Gym / Fitness Center,Beer Bar,Gym,Japanese Restaurant


## Cluster 2 

In [75]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(3, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,43.676357,-79.293031,1,Ice Cream Shop,Brewery,Greek Restaurant,Fast Food Restaurant,Pub,Italian Restaurant,Neighborhood,Yoga Studio,Health Food Store,Pizza Place
12,East Toronto,43.679557,-79.352188,1,Ice Cream Shop,Brewery,Greek Restaurant,Fast Food Restaurant,Pub,Italian Restaurant,Neighborhood,Yoga Studio,Health Food Store,Pizza Place
15,East Toronto,43.668999,-79.315572,1,Ice Cream Shop,Brewery,Greek Restaurant,Fast Food Restaurant,Pub,Italian Restaurant,Neighborhood,Yoga Studio,Health Food Store,Pizza Place
17,East Toronto,43.659526,-79.340923,1,Ice Cream Shop,Brewery,Greek Restaurant,Fast Food Restaurant,Pub,Italian Restaurant,Neighborhood,Yoga Studio,Health Food Store,Pizza Place
38,East Toronto,43.662744,-79.321558,1,Ice Cream Shop,Brewery,Greek Restaurant,Fast Food Restaurant,Pub,Italian Restaurant,Neighborhood,Yoga Studio,Health Food Store,Pizza Place


## Cluster 3 

In [76]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(3, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,West Toronto,43.669005,-79.442259,2,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant
11,West Toronto,43.647927,-79.41975,2,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant
14,West Toronto,43.636847,-79.428191,2,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant
22,West Toronto,43.661608,-79.464763,2,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant
25,West Toronto,43.64896,-79.456325,2,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant
28,West Toronto,43.651571,-79.48445,2,Italian Restaurant,Coffee Shop,Bakery,Bar,Café,Gift Shop,Cuban Restaurant,Brewery,Furniture / Home Store,Sushi Restaurant
