# IBM Data Science Capstone Project - Toronto Neighbourhoods

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

!pip install bs4
import bs4
from bs4 import BeautifulSoup

import csv

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Libraries imported.


### Scrape Data from Wikipedia - List of postal codes of Canada: M

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url).text # send GET request and store as text data
my_soup = BeautifulSoup(data, 'html5lib') # parse the data with beautifulsoup

# search for target table
tables = my_soup.find_all('table')

for index, table in enumerate(tables):
    if 'Not assigned' in str(table): # find the string in our target table that is unique to the other tables
        target_table_index = index
print('There are {} tables found.\nTarget Table Index : {}'.format(index + 1, target_table_index))

There are 3 tables found.
Target Table Index : 0


### Create Pandas dataframe to store the result

In [3]:
postalcode_df = pd.DataFrame(columns = ['Postalcode', 'Borough', 'Neighbourhood'])

n = 0 # number of assigned postalcodes
for count, box in enumerate(tables[target_table_index].tbody.find_all('td')): # extract content in each cell(box)

    if 'Not assigned' not in box.find('span').getText(): # excluding postalcodes that are not assigned to any neighbourhood
        n += 1
        
        ## get assigned postalcode
        code = box.find('p').contents[0].strip()
        
        ## get borough : check if borough is a Navigatable String or a Tag(in this case link object <a>)
        check_borough = box.find('span').contents[0] 
        if isinstance(check_borough, bs4.element.NavigableString):
            borough = check_borough
        else:
            borough = check_borough.getText()
        
        ## get neighbourhood(s)
        neighbourhood = box.find('span').getText() # find the string in the tag <span>
        start = neighbourhood.index('(') + 1 # find the names between brackets '(...)'
        end = neighbourhood.index(')')
        neighbourhood = neighbourhood[start:end].replace(' /', ',') # replace slash with comma
        
        # print(code, borough, '\n\t', neighbourhood, '\n')
        # print(type(code), type(borough), type(neighbourhood))
        
        ## add them to pandas dataframe
        postalcode_df = postalcode_df.append({'Postalcode': code, 'Borough': borough, 'Neighbourhood': neighbourhood}, ignore_index = True)
            
postalcode_df.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [4]:
print('There are {} unique postalcodes, and {} of them have been assigned.'.format(count + 1, n))
print('The shape of the dataframe is {}.'.format(postalcode_df.shape))

There are 180 unique postalcodes, and 103 of them have been assigned.
The shape of the dataframe is (103, 3).


### Add coordinates to the dataframe

In [5]:
# get coordinates json file
!wget -q -O 'toronto_coordinates.csv' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv
print('Data downloaded!')

toronto_coordinates = pd.read_csv('toronto_coordinates.csv')
toronto_coordinates = toronto_coordinates.rename(columns = {'Postal Code': 'Postalcode'})
print('The shape of the dataframe is {}.'.format(toronto_coordinates.shape))
toronto_coordinates.head()

Data downloaded!
The shape of the dataframe is (103, 3).


Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
# merge the two dataframe
result = pd.merge(postalcode_df, toronto_coordinates, on = 'Postalcode')
result.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


### Use geopy library to get the coordinates of Toronto

In [7]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are ({}, {}).'.format(latitude, longitude))

The geograpical coordinates of Toronto are (43.6534817, -79.3839347).


### Create neighbourhoods the map of Toronto

In [8]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(result['Latitude'], result['Longitude'], result['Borough'], result['Neighbourhood']):
    label = '[ {} ] of {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Foursquare API

In [11]:
CLIENT_ID = '<your_id>' # your Foursquare ID
CLIENT_SECRET = 'your_secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 50 # A default Foursquare API limit value

# function for getting venues
def getNearbyVenues(names, latitudes, longitudes, radius=750):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Create a dataframe of Toronto Venues

In [12]:
toronto_venues = getNearbyVenues(names = result['Neighbourhood'],
                                 latitudes = result['Latitude'],
                                 longitudes = result['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Riverdale


### A brief summary for the venue data

In [13]:
print('The shape of venues dataframe is {}.'.format(toronto_venues.shape))
print('There are {} unique venue categories.'.format(len(toronto_venues['Venue Category'].unique())))
toronto_venues.head()

The shape of venues dataframe is (1935, 7).
There are 273 unique venue categories.


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,DVP at York Mills,43.758899,-79.334099,Intersection
3,Parkwoods,43.753259,-79.329656,TTC Stop #09083,43.759655,-79.332223,Bus Stop
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena


### Analyze the neighbourhoods with the method from lab

In [14]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print('The shape of the new dataframe is {}.'.format(toronto_onehot.shape))
toronto_onehot.head()

The shape of the new dataframe is (1935, 274).


Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Castle,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Stadium,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Financial or Legal Service,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hostel,Hotel,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Moving Target,Museum,Music School,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Newsagent,Nightclub,Noodle House,Optical Shop,Organic Grocery,Other Great Outdoors,Paper / Office Supplies Store,Park,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pier,Pizza Place,Plane,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,River,Rock Climbing Spot,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tech Startup,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Tunnel,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Show the top 5 most common venues

In [15]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()

num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                      venue  freq
0                 Newsagent  0.07
1  Mediterranean Restaurant  0.07
2           Badminton Court  0.07
3        Seafood Restaurant  0.07
4            Breakfast Spot  0.07


----Alderwood, Long Branch----
               venue  freq
0  Convenience Store  0.15
1        Pizza Place  0.15
2        Gas Station  0.08
3                Pub  0.08
4        Coffee Shop  0.08


----Bathurst Manor, Wilson Heights, Downsview North----
         venue  freq
0         Park  0.08
1  Coffee Shop  0.08
2         Bank  0.08
3  Pizza Place  0.08
4     Pharmacy  0.04


----Bayview Village----
                 venue  freq
0                 Bank   0.2
1  Japanese Restaurant   0.2
2         Skating Rink   0.1
3        Grocery Store   0.1
4   Chinese Restaurant   0.1


----Bedford Park, Lawrence Manor East----
                venue  freq
0         Coffee Shop  0.10
1  Italian Restaurant  0.10
2      Sandwich Place  0.07
3         Sports Club  0.03
4    Greek Rest

                venue  freq
0         Pizza Place  0.10
1  Chinese Restaurant  0.10
2          Hobby Shop  0.05
3        Intersection  0.05
4      Shopping Plaza  0.05


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
                  venue  freq
0  Gym / Fitness Center  0.10
1    Italian Restaurant  0.07
2           Yoga Studio  0.03
3           Coffee Shop  0.03
4         Tanning Salon  0.03


----Moore Park, Summerhill East----
             venue  freq
0             Park  0.23
1    Grocery Store  0.15
2   Sandwich Place  0.08
3     Tennis Court  0.08
4  Thai Restaurant  0.08


----New Toronto, Mimico South, Humber Bay Shores----
                venue  freq
0  Mexican Restaurant  0.13
1                 Gym  0.07
2          Restaurant  0.07
3              Bakery  0.07
4        Liquor Store  0.07


----North Park, Maple Leaf Park, Upwood Park----
              venue  freq
0  Basketball Court  0.17
1              Park  0.17
2  Busi

### Create a dataframe with the top 10 venues for each neighbourhood 

In [16]:
# function for sorting the venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# set display to 10
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Discount Store,Newsagent,Breakfast Spot,Lounge,Latin American Restaurant,Supermarket,Shopping Mall,Sushi Restaurant,Seafood Restaurant,Badminton Court
1,"Alderwood, Long Branch",Pizza Place,Convenience Store,Gas Station,Pool,Pub,Park,Dance Studio,Sandwich Place,Donut Shop,Coffee Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pizza Place,Park,Bank,Middle Eastern Restaurant,Sushi Restaurant,Supermarket,Bridal Shop,Shopping Mall,Sandwich Place
3,Bayview Village,Japanese Restaurant,Bank,Playground,Skating Rink,Intersection,Chinese Restaurant,Café,Grocery Store,Electronics Store,Eastern European Restaurant
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Greek Restaurant,Butcher,Bakery,Bank,Sushi Restaurant,Sports Club,Juice Bar


### K-mean clustering

In [17]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

print(kmeans.labels_[0:15])

# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = result

# merge toronto_grouped with toronto data(result) to add latitude/longitude for each neighbourhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

# drop rows with NaN cluster labels
toronto_merged.dropna(subset = ['Cluster Labels'], inplace = True)

# convert the type of cluster labels to int
toronto_merged = toronto_merged.astype({'Cluster Labels': int})

toronto_merged.head()

[3 0 0 0 3 3 3 3 3 1 3 3 3 3 4]


Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,Park,Food & Drink Shop,Intersection,Bus Stop,Ethiopian Restaurant,Elementary School,Electronics Store,Eastern European Restaurant,Event Space,Discount Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Playground,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Park,Dry Cleaner,Discount Store,Distribution Center,Dog Run
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,Coffee Shop,Park,Bakery,Breakfast Spot,French Restaurant,Italian Restaurant,Farmers Market,Mediterranean Restaurant,Mexican Restaurant,Distribution Center
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3,Clothing Store,Vietnamese Restaurant,Coffee Shop,Dessert Shop,Restaurant,Accessories Store,Furniture / Home Store,Bowling Alley,Boutique,Fast Food Restaurant
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,3,Coffee Shop,Park,Sushi Restaurant,Yoga Studio,Sandwich Place,Beer Bar,Mexican Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Burrito Place


### Visualize the clusters

In [18]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.5).add_to(map_clusters)
       
map_clusters

## Examination
### Cluster 1

In [19]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Etobicoke,0,Pharmacy,Café,Playground,Grocery Store,Park,Bank,Shopping Mall,Skating Rink,Drugstore,Distribution Center
8,East York,0,Pizza Place,Pet Store,Gastropub,Soccer Stadium,Fast Food Restaurant,Flea Market,Café,Bank,Rock Climbing Spot,Breakfast Spot
10,North York,0,Gas Station,Grocery Store,Playground,Metro Station,Pizza Place,Latin American Restaurant,Bakery,Japanese Restaurant,Asian Restaurant,Ice Cream Shop
11,Etobicoke,0,Pizza Place,Convenience Store,Mexican Restaurant,Bank,Restaurant,Hotel,Theater,Gym,Cosmetics Shop,Coworking Space
14,East York,0,Curling Ice,Restaurant,Intersection,Bus Line,Athletics & Sports,Park,Video Store,Beer Store,Skating Rink,Dance Studio
16,York,0,Playground,Tennis Court,Hockey Arena,Gastropub,Italian Restaurant,Korean Restaurant,Field,Middle Eastern Restaurant,Convenience Store,Park
17,Etobicoke,0,Café,Park,Shopping Plaza,Gas Station,Liquor Store,Electronics Store,Beer Store,Convenience Store,Pizza Place,Coffee Shop
18,Scarborough,0,Pizza Place,Fast Food Restaurant,Fried Chicken Joint,Restaurant,Beer Store,Bank,Sports Bar,Greek Restaurant,Gourmet Shop,Dessert Shop
27,North York,0,Pharmacy,Chinese Restaurant,Fast Food Restaurant,Shopping Mall,Bank,Bakery,Sandwich Place,Restaurant,Recreation Center,Ice Cream Shop
28,North York,0,Coffee Shop,Pizza Place,Park,Bank,Middle Eastern Restaurant,Sushi Restaurant,Supermarket,Bridal Shop,Shopping Mall,Sandwich Place


### Cluster 2

In [20]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1,Park,Food & Drink Shop,Intersection,Bus Stop,Ethiopian Restaurant,Elementary School,Electronics Store,Eastern European Restaurant,Event Space,Discount Store
1,North York,1,Playground,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Park,Dry Cleaner,Discount Store,Distribution Center,Dog Run
21,York,1,Park,Japanese Restaurant,Gym,Women's Store,Grocery Store,Bakery,Mexican Restaurant,Sporting Goods Shop,Bank,Intersection
22,Scarborough,1,Park,Coffee Shop,Business Service,Eastern European Restaurant,Distribution Center,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant
49,North York,1,Basketball Court,Playground,Garden Center,Bakery,Park,Business Service,Dumpling Restaurant,Dog Run,Donut Shop,Drugstore
52,North York,1,Park,Sandwich Place,Bank,Pizza Place,Coffee Shop,Indian Restaurant,Trail,Discount Store,Distribution Center,Dog Run
61,Central Toronto,1,Fast Food Restaurant,Park,Bus Line,Swim School,Coffee Shop,Business Service,Event Space,Ethiopian Restaurant,Elementary School,Electronics Store
66,North York,1,Park,Convenience Store,Bowling Alley,Pet Store,Eastern European Restaurant,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant
83,Central Toronto,1,Park,Grocery Store,Thai Restaurant,Café,Tennis Court,Sandwich Place,Candy Store,Japanese Restaurant,Gym,Gym / Fitness Center
91,Downtown Toronto,1,Park,Trail,Playground,Candy Store,Dumpling Restaurant,Distribution Center,Dog Run,Donut Shop,Drugstore,Dry Cleaner


### Cluster 3

In [21]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York,2,Pool,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Electronics Store


### Cluster 4

In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,3,Coffee Shop,Park,Bakery,Breakfast Spot,French Restaurant,Italian Restaurant,Farmers Market,Mediterranean Restaurant,Mexican Restaurant,Distribution Center
3,North York,3,Clothing Store,Vietnamese Restaurant,Coffee Shop,Dessert Shop,Restaurant,Accessories Store,Furniture / Home Store,Bowling Alley,Boutique,Fast Food Restaurant
4,Queen's Park,3,Coffee Shop,Park,Sushi Restaurant,Yoga Studio,Sandwich Place,Beer Bar,Mexican Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Burrito Place
6,Scarborough,3,Fast Food Restaurant,Trail,African Restaurant,Coffee Shop,Spa,Hobby Shop,Paper / Office Supplies Store,Dog Run,Donut Shop,Drugstore
7,North York,3,Gym,Japanese Restaurant,Restaurant,Sporting Goods Shop,Coffee Shop,Café,Smoke Shop,Italian Restaurant,Discount Store,Grocery Store
9,Downtown Toronto,3,Clothing Store,Theater,Pizza Place,Miscellaneous Shop,Fast Food Restaurant,Steakhouse,Bookstore,Shopping Mall,Mexican Restaurant,Burger Joint
12,Scarborough,3,Breakfast Spot,Bar,Burger Joint,Italian Restaurant,Falafel Restaurant,Farm,Event Space,Ethiopian Restaurant,Elementary School,Dog Run
13,North York,3,Gym,Japanese Restaurant,Restaurant,Sporting Goods Shop,Coffee Shop,Café,Smoke Shop,Italian Restaurant,Discount Store,Grocery Store
15,Downtown Toronto,3,Café,Gastropub,Farmers Market,Coffee Shop,Cosmetics Shop,Camera Store,Restaurant,Poke Place,Gym,Park
19,East Toronto,3,Pub,Breakfast Spot,Health Food Store,Bagel Shop,French Restaurant,Tea Room,Indian Restaurant,Indie Movie Theater,Ramen Restaurant,Bakery


### Cluster 5

In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
94,Etobicoke,4,Rental Car Location,Drugstore,Lounge,Eastern European Restaurant,Distribution Center,Dog Run,Donut Shop,Dry Cleaner,Dumpling Restaurant,Yoga Studio
