## Note: All sections of the assignment are in this one notebook. Scroll down as appropriate 
# Part 1, scraping data and setting up data frame

In [1]:
#import the necessary stuff
import pandas as pd
import numpy as np
import requests 

!conda install html5lib beautifulsoup4 lxml --yes


Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - beautifulsoup4
    - html5lib
    - lxml


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    beautifulsoup4-4.9.1       |           py36_0         172 KB
    ca-certificates-2020.6.24  |                0         125 KB
    certifi-2020.6.20          |           py36_0         156 KB
    libxml2-2.9.10             |       he19cac6_1         1.2 MB
    libxslt-1.1.34             |       hc22bd24_0         432 KB
    lxml-4.5.2                 |   py36hefd8a0e_0         1.2 MB
    openssl-1.1.1g             |       h7b6447c_0         2.5 MB
    soupsieve-2.0.1            |             py_0          33 KB
    ------------------------------------------------------------
                                

In [2]:
#scrape the data from the webpage

url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

dfs = pd.read_html(url)


In [3]:
# Clean the data

#pull out the postal code table from the other tables on the page
df=dfs[0]

#remove the "Not assigned" boroughs
df1 = df[df.Borough != 'Not assigned']
df1.reset_index(drop=True, inplace=True)
df1.head(12)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [4]:
#here is the shape command the assignment asked for
df1.shape

(103, 3)

## End first section of assignment

# Part 2, Add Geolocation to DataFrame

In [5]:
#Get the lat / Long data from the file provided in the assignment
geo = pd.read_csv('http://cocl.us/Geospatial_data')
geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [6]:
#Join the Lat / Long data into the Neighborhood data
toronto = pd.merge(left=df1, right=geo, how='left', left_on='Postal Code', right_on='Postal Code')
#toronto.head(12)
toronto

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [7]:
toronto.shape

(103, 5)

## End of second section of assignment
# Part 3 Clustering and Mapping

In [8]:
# Import Libraries

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-2.0.0-pyh9f0ad1d_0

The following packages will be SUPERSEDED by a higher-priority channel:

  ca-certificates    pkgs/main::ca-certificates-2020.6.24-0 --> conda-forge::ca-certificates-2020.6.20-hecda079_0

In [9]:
# Get the lat / Long for Toronto
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


## Since there are multiple Neighborhoods in some Postal Code Areas, it seems better to map / classify by Postal Code and Borough than by Neighborhood

In [10]:
# Map Toronto Postal Codes

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, postal_code in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Postal Code']):
    label = '{}, {}'.format(postal_code, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

The analysis plan is to get the food venues for each postal code and try to categorize on that. Since many of the downtown codes are close together, there will probably be some overlap when using a radius appropriate for the outlying areas. Since downtown will probably cluster together anyway, I don't expect this to be a problem.

In [12]:
search_query = 'Food'
radius = 1500
limit = 50

latitude = toronto.loc[0, 'Latitude']
longitude = toronto.loc[0, 'Longitude']

In [13]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, limit)

In [14]:
results = requests.get(url).json()
#results

In [15]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
# Making the dataframe for one Postal Code
venues = results['response']['venues']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Food Basics,Supermarket,43.760549,-79.326045
1,Family Food Fair Convenience,Convenience Store,43.76062,-79.324459
2,Bruno's Fine Foods,Grocery Store,43.745608,-79.336772
3,Careeb Foods,Caribbean Restaurant,43.757297,-79.310996
4,Viking Foods,Market,43.739869,-79.320581
5,Arz fine foods,Grocery Store,43.74805,-79.30797
6,Global Pet Foods,Pet Store,43.759556,-79.309715
7,Luxmy foods,,43.74141,-79.314378
8,Island Foods,Caribbean Restaurant,43.745866,-79.346035
9,Kraft Foods,Office,43.759407,-79.35073


Function to get the food places as we loop through all the postal codes

In [17]:
def getNearbyVenues(names, latitudes, longitudes):
    
    radius=1500 
    search_query='Food'
    limit = 100
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, search_query, radius, limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['venues']
        
        # return only relevant information for each nearby venue, try-except is to skip blank categories
        for v in results:
            try: test = v['categories'][0] 
            except: pass
            else:
                venues_list.append([
                    name,
                    lat,
                    lng,
                    v['name'], 
                    v['location']['lat'], 
                    v['location']['lng'],
                    v['categories'][0]['name']])
        
       
    nearby_venues2 = pd.DataFrame(venues_list)
    nearby_venues2.columns = ['Postal Code', 
                  'PC Latitude', 
                  'PC Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues2)

In [18]:
#exercise the above function
toronto_venues = getNearbyVenues(names=toronto['Postal Code'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

Now we have all of the food venues by postal code. 

In [19]:
print(toronto_venues.shape)
toronto_venues.head(20)

(1961, 7)


Unnamed: 0,Postal Code,PC Latitude,PC Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,43.753259,-79.329656,Food Basics,43.760549,-79.326045,Supermarket
1,M3A,43.753259,-79.329656,Family Food Fair Convenience,43.76062,-79.324459,Convenience Store
2,M3A,43.753259,-79.329656,Bruno's Fine Foods,43.745608,-79.336772,Grocery Store
3,M3A,43.753259,-79.329656,Careeb Foods,43.757297,-79.310996,Caribbean Restaurant
4,M3A,43.753259,-79.329656,Viking Foods,43.739869,-79.320581,Market
5,M3A,43.753259,-79.329656,Arz fine foods,43.74805,-79.30797,Grocery Store
6,M3A,43.753259,-79.329656,Global Pet Foods,43.759556,-79.309715,Pet Store
7,M3A,43.753259,-79.329656,Island Foods,43.745866,-79.346035,Caribbean Restaurant
8,M3A,43.753259,-79.329656,Kraft Foods,43.759407,-79.35073,Office
9,M4A,43.725882,-79.315572,Latvian Centre Food Market,43.725677,-79.318248,Deli / Bodega


## This section consilidates and sorts the food venues in preparation for clustering

Food venues grouped by postal code

In [20]:
toronto_venues.groupby('Postal Code').count()

Unnamed: 0_level_0,PC Latitude,PC Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,4,4,4,4,4,4
M1C,2,2,2,2,2,2
M1E,7,7,7,7,7,7
M1G,6,6,6,6,6,6
M1H,12,12,12,12,12,12
M1J,5,5,5,5,5,5
M1K,7,7,7,7,7,7
M1L,6,6,6,6,6,6
M1M,1,1,1,1,1,1
M1N,4,4,4,4,4,4


Count the unique categories

In [21]:
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 132 unique categories.


Normalize the data

In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Postal Code'] = toronto_venues['Postal Code'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Postal Code,Adult Education Center,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Breakfast Spot,Building,Burger Joint,Business Center,Business Service,Butcher,Cafeteria,Café,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Coffee Shop,College Cafeteria,Comfort Food Restaurant,Convenience Store,Coworking Space,Deli / Bodega,Dessert Shop,Diner,Distribution Center,Doctor's Office,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Fruit & Vegetable Store,Furniture / Home Store,Gas Station,Gastropub,General Entertainment,Gluten-free Restaurant,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Halal Restaurant,Health Food Store,Herbs & Spices Store,Hong Kong Restaurant,Hospital,Hot Dog Joint,Housing Development,Indian Restaurant,Industrial Estate,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Liquor Store,Market,Medical Center,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Non-Profit,Office,Organic Grocery,Park,Pet Service,Pet Store,Pharmacy,Photography Studio,Pizza Place,Portuguese Restaurant,Poutine Place,Professional & Other Places,Pub,Public Bathroom,Ramen Restaurant,Research Station,Residential Building (Apartment / Condo),Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Smoothie Shop,Snack Place,Soccer Stadium,South Indian Restaurant,Southern / Soul Food Restaurant,Storage Facility,Street Fair,Street Food Gathering,Supermarket,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tech Startup,Thai Restaurant,Theme Park Ride / Attraction,Theme Restaurant,Tour Provider,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Warehouse Store,Weight Loss Center
0,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M3A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Group by Postal Code

In [23]:
toronto_grouped = toronto_onehot.groupby('Postal Code').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Postal Code,Adult Education Center,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Breakfast Spot,Building,Burger Joint,Business Center,Business Service,Butcher,Cafeteria,Café,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Coffee Shop,College Cafeteria,Comfort Food Restaurant,Convenience Store,Coworking Space,Deli / Bodega,Dessert Shop,Diner,Distribution Center,Doctor's Office,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Fruit & Vegetable Store,Furniture / Home Store,Gas Station,Gastropub,General Entertainment,Gluten-free Restaurant,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Halal Restaurant,Health Food Store,Herbs & Spices Store,Hong Kong Restaurant,Hospital,Hot Dog Joint,Housing Development,Indian Restaurant,Industrial Estate,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Liquor Store,Market,Medical Center,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Non-Profit,Office,Organic Grocery,Park,Pet Service,Pet Store,Pharmacy,Photography Studio,Pizza Place,Portuguese Restaurant,Poutine Place,Professional & Other Places,Pub,Public Bathroom,Ramen Restaurant,Research Station,Residential Building (Apartment / Condo),Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Smoothie Shop,Snack Place,Soccer Stadium,South Indian Restaurant,Southern / Soul Food Restaurant,Storage Facility,Street Fair,Street Food Gathering,Supermarket,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tech Startup,Thai Restaurant,Theme Park Ride / Attraction,Theme Restaurant,Tour Provider,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Warehouse Store,Weight Loss Center
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Make the dataframe of most common categories

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Postal Code'] = toronto_grouped['Postal Code']

for ind in np.arange(toronto_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Business Service,Caribbean Restaurant,Indian Restaurant,Pet Store,Weight Loss Center,Fish & Chips Shop,Food,Flower Shop,Flea Market,Fish Market
1,M1C,Pet Service,Miscellaneous Shop,Weight Loss Center,Food & Drink Shop,Eastern European Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant
2,M1E,Grocery Store,Market,Pharmacy,Caribbean Restaurant,Theme Restaurant,Convenience Store,Supermarket,Weight Loss Center,Filipino Restaurant,Flea Market
3,M1G,Grocery Store,Supermarket,Market,Weight Loss Center,Filipino Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
4,M1H,Grocery Store,Bakery,Public Bathroom,Pet Store,Restaurant,Fast Food Restaurant,Shopping Mall,Food Court,Butcher,Market


Clustering and mapping.  After trying several cluster numbers using 4 seemed to return reasonable groupings

In [26]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Postal Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 2, 0, 0, 0, 0, 2, 0], dtype=int32)

In [27]:
# add clustering labels
venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each postal code
toronto_merged = toronto_merged.join(venues_sorted.set_index('Postal Code'), on='Postal Code')

toronto_merged = toronto_merged.dropna() #remove any postal codes that have no food venues
toronto_merged = toronto_merged.astype({"Cluster Labels": int})
toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Grocery Store,Caribbean Restaurant,Office,Market,Pet Store,Supermarket,Convenience Store,Weight Loss Center,Flea Market,Fish Market
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Grocery Store,Food,Deli / Bodega,Pub,Food Truck,Food Court,Restaurant,Market,Coffee Shop,Fish & Chips Shop
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Grocery Store,Food & Drink Shop,Food Truck,Miscellaneous Shop,Middle Eastern Restaurant,Food Court,Chinese Restaurant,Market,Café,Sandwich Place
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Food Court,Café,Asian Restaurant,Food Service,Chinese Restaurant,Convenience Store,Filipino Restaurant,Salad Place,Grocery Store,Miscellaneous Shop
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1,Food Truck,Food Court,Grocery Store,Food & Drink Shop,Chinese Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Restaurant,Convenience Store,Supermarket


Cluster Map. Some of the colors don't show well

In [28]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postal Code'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Here is a list of each of the clusters, with a description of what they could be called

Cluster 0  
Red markers. This is the most numerous cluster. It is a general city location that doesn't have more specific defining features like the other clusters do. It is a good mix of groceries, specialty stores, and restaurants.

In [29]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,0,Grocery Store,Caribbean Restaurant,Office,Market,Pet Store,Supermarket,Convenience Store,Weight Loss Center,Flea Market,Fish Market
1,M4A,0,Grocery Store,Food,Deli / Bodega,Pub,Food Truck,Food Court,Restaurant,Market,Coffee Shop,Fish & Chips Shop
2,M5A,0,Grocery Store,Food & Drink Shop,Food Truck,Miscellaneous Shop,Middle Eastern Restaurant,Food Court,Chinese Restaurant,Market,Café,Sandwich Place
6,M1B,0,Business Service,Caribbean Restaurant,Indian Restaurant,Pet Store,Weight Loss Center,Fish & Chips Shop,Food,Flower Shop,Flea Market,Fish Market
7,M3B,0,Grocery Store,Food Court,Caribbean Restaurant,Deli / Bodega,Pet Store,Middle Eastern Restaurant,Office,Weight Loss Center,Fish & Chips Shop,Flea Market
8,M4B,0,Food & Drink Shop,Deli / Bodega,Cantonese Restaurant,Food Court,Filipino Restaurant,Restaurant,Convenience Store,Grocery Store,Food,Factory
12,M1C,0,Pet Service,Miscellaneous Shop,Weight Loss Center,Food & Drink Shop,Eastern European Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant
14,M4C,0,Cantonese Restaurant,Grocery Store,Restaurant,Food & Drink Shop,Miscellaneous Shop,Chinese Restaurant,Organic Grocery,Caribbean Restaurant,Indian Restaurant,Health Food Store
16,M6C,0,Food & Drink Shop,Grocery Store,Caribbean Restaurant,Chinese Restaurant,Restaurant,Deli / Bodega,Pharmacy,Organic Grocery,Filipino Restaurant,Taco Place
18,M1E,0,Grocery Store,Market,Pharmacy,Caribbean Restaurant,Theme Restaurant,Convenience Store,Supermarket,Weight Loss Center,Filipino Restaurant,Flea Market


Cluster 1  
Purple markers. Business or shopping centers.  It is characterized by food courts, food trucks, and fast food.


In [30]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,M6A,1,Food Court,Café,Asian Restaurant,Food Service,Chinese Restaurant,Convenience Store,Filipino Restaurant,Salad Place,Grocery Store,Miscellaneous Shop
4,M7A,1,Food Truck,Food Court,Grocery Store,Food & Drink Shop,Chinese Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Restaurant,Convenience Store,Supermarket
9,M5B,1,Food Court,Food Truck,Food & Drink Shop,Grocery Store,Mediterranean Restaurant,Fast Food Restaurant,Miscellaneous Shop,Convenience Store,Salad Place,College Cafeteria
10,M6B,1,Food Court,Grocery Store,Food & Drink Shop,Office,Miscellaneous Shop,Chinese Restaurant,Building,Café,Vegetarian / Vegan Restaurant,Fast Food Restaurant
13,M3C,1,Food Truck,Grocery Store,Asian Restaurant,Middle Eastern Restaurant,Weight Loss Center,Fish & Chips Shop,Food,Flower Shop,Flea Market,Fish Market
15,M5C,1,Food Court,Food Truck,Grocery Store,Food & Drink Shop,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Salad Place,Southern / Soul Food Restaurant,Miscellaneous Shop,Convenience Store
20,M5E,1,Food Court,Food Truck,Salad Place,Food & Drink Shop,Fast Food Restaurant,Grocery Store,Vegetarian / Vegan Restaurant,Juice Bar,Coffee Shop,Mediterranean Restaurant
24,M5G,1,Food Court,Food Truck,Grocery Store,Food & Drink Shop,Chinese Restaurant,Mediterranean Restaurant,Miscellaneous Shop,Deli / Bodega,Convenience Store,Salad Place
30,M5H,1,Food Court,Food Truck,Grocery Store,Fast Food Restaurant,Chinese Restaurant,Salad Place,Korean Restaurant,Farmers Market,Convenience Store,Mediterranean Restaurant
36,M5J,1,Food Court,Food Truck,Grocery Store,Salad Place,Food & Drink Shop,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Tour Provider,Office,Café


Cluster 2  
Aqua markers. Residential areas that are heavy on grocery stores and convenience stores, with other markets and restaurants mixed in.

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,M9A,2,Grocery Store,Pet Store,Restaurant,BBQ Joint,Fast Food Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant
11,M9B,2,Grocery Store,Chinese Restaurant,Pet Store,Café,Office,Weight Loss Center,Fish & Chips Shop,Flower Shop,Flea Market,Fish Market
17,M9C,2,Grocery Store,Convenience Store,Pet Store,Office,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Weight Loss Center
22,M1G,2,Grocery Store,Supermarket,Market,Weight Loss Center,Filipino Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
27,M2H,2,Grocery Store,Convenience Store,Bakery,Supermarket,Fast Food Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant
28,M3H,2,Grocery Store,Weight Loss Center,Fast Food Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant,Farmers Market
51,M1M,2,Pet Store,Weight Loss Center,Food & Drink Shop,Dumpling Restaurant,Eastern European Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant
71,M1R,2,Grocery Store,Asian Restaurant,Supermarket,Restaurant,Caribbean Restaurant,American Restaurant,Pet Store,Chinese Restaurant,Filipino Restaurant,Flea Market
72,M2R,2,Grocery Store,Convenience Store,Weight Loss Center,Filipino Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Farmers Market
89,M9V,2,Grocery Store,Convenience Store,Distribution Center,Greek Restaurant,Chinese Restaurant,Miscellaneous Shop,Filipino Restaurant,Flower Shop,Flea Market,Fish Market


Cluster 3  
Yello marker. It is a cluster of one that seems to feature a new development, with no nearby grocery or convenience stores

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
39,M2K,3,Housing Development,Weight Loss Center,Fast Food Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant,Farmers Market
