## The Battle of Neighborhoods: Where to buy a house in Scarborough, Toronto

### Import libraries

In [2]:
%matplotlib inline
!pip install geopandas
!pip install geocoder
!pip install folium 
import numpy as np
import pandas as pd
import requests
import io
import geocoder
from shapely.geometry import Point
import geopandas as gpd
import matplotlib.pyplot as plt
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium # map rendering library
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print('Libraries imported.')

Collecting geopandas
  Downloading https://files.pythonhosted.org/packages/f0/5d/916b7268ef551fa9647ce12220e9a68006374f1ce5b0ddaf1cd0df2880b7/geopandas-0.4.1-py2.py3-none-any.whl (922kB)
[K    100% |████████████████████████████████| 931kB 1.1MB/s eta 0:00:01
[?25hRequirement not upgraded as not directly required: pandas in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geopandas)
Collecting pyproj (from geopandas)
  Downloading https://files.pythonhosted.org/packages/94/a6/a8d67fe3c6097ab390da706049e27ab50bf42fc063afa49a0b484ae2203a/pyproj-2.1.3-cp35-cp35m-manylinux1_x86_64.whl (10.8MB)
[K    100% |████████████████████████████████| 10.8MB 93kB/s  eta 0:00:01
[?25hCollecting fiona (from geopandas)
  Downloading https://files.pythonhosted.org/packages/4f/e2/841f27ccf7a00095c251554642025f64c44fbe2a121b2f33a76e9fbff2b2/Fiona-1.8.6-cp35-cp35m-manylinux1_x86_64.whl (17.8MB)
[K    100% |████████████████████████████████| 17.9MB 55kB/s  eta 0:00:01
[?25hCollecting shapely 

## Prepare for data

#### To do this analysis, I will import the list of Postal Codes of Toronto from Wikipedia: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [5]:
source = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df1 = pd.read_html(source, header=0, flavor = 'bs4')[0]
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Data cleaning & processing:  remove all rows that contain a 'Not assigned' Borough

In [9]:
df2 = df1[df1.Borough !='Not assigned']
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


#### To get latitude and longitude values, we import Canada's geospatial data

In [10]:
geo=pd.read_csv("https://cocl.us/Geospatial_data")
geo.rename(columns={'Postal Code':'Postcode'}, inplace = True)
data = pd.merge(df2, geo, on='Postcode')
data = data.reset_index(drop=True)

#### I only select Scarborough borough to do the analysis.

In [11]:
Toronto_data = data[data['Borough'] == 'Scarborough'].reset_index(drop=True)
Toronto_data.rename(columns={'Neighbourhood':'Neighborhood'}, inplace = True)

Toronto_data

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
4,M1C,Scarborough,Port Union,43.784535,-79.160497
5,M1E,Scarborough,Guildwood,43.763573,-79.188711
6,M1E,Scarborough,Morningside,43.763573,-79.188711
7,M1E,Scarborough,West Hill,43.763573,-79.188711
8,M1G,Scarborough,Woburn,43.770992,-79.216917
9,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
print('Final data has {} neighborhoods.'.format(
        Toronto_data.shape[0]
    )
)

Final data has 37 neighborhoods.


### visualize neighbourhoods distribution.

#### Use geopy library to get the latitude and longitude values of Scarborough.

In [13]:
from geopy.geocoders import Nominatim
address = 'Scarborough, CA'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.773077, -79.257774.


In [14]:
# create map of Toroto using latitude and longitude values

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], 
                                           Toronto_data['Borough'], Toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='green',
        fill_opacity=0.7).add_to(Toronto_map)  
    
Toronto_map

## Define Foursquare Credentials and Version

In [15]:
CLIENT_ID = '2H1VW2PPRXM2AHRPXETAL3PYEOXRBC3F4DXQOTOQEDUYOAPC' # your Foursquare ID
CLIENT_SECRET = '1KGSIJRGUENFBJ3AWEON3TOCMCXQOBJTZJTWXGPS3410PGT3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2H1VW2PPRXM2AHRPXETAL3PYEOXRBC3F4DXQOTOQEDUYOAPC
CLIENT_SECRET:1KGSIJRGUENFBJ3AWEON3TOCMCXQOBJTZJTWXGPS3410PGT3


#### Now, let's get the top 100 venues that are in within a radius of 500 meters.

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Explore Neighborhoods in Toronto:

In [17]:
LIMIT = 100
radius = 500
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
# type your answer here

Toronto_venues = getNearbyVenues(names=Toronto_data['Neighborhood'],
                                   latitudes=Toronto_data['Latitude'],
                                   longitudes=Toronto_data['Longitude'])

Rouge
Malvern
Highland Creek
Rouge Hill
Port Union
Guildwood
Morningside
West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park
Ionview
Kennedy Park
Clairlea
Golden Mile
Oakridge
Cliffcrest
Cliffside
Scarborough Village West
Birch Cliff
Cliffside West
Dorset Park
Scarborough Town Centre
Wexford Heights
Maryvale
Wexford
Agincourt
Clarks Corners
Sullivan
Tam O'Shanter
Agincourt North
L'Amoreaux East
Milliken
Steeles East
L'Amoreaux West
Upper Rouge


### Let's check the size of the resulting dataframe

In [19]:
print(Toronto_venues.shape)
Toronto_venues.head()

(190, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rouge,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,Malvern,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,Highland Creek,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,Highland Creek,43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
4,Rouge Hill,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar


In [20]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
Agincourt North,2,2,2,2,2,2
Birch Cliff,4,4,4,4,4,4
Cedarbrae,7,7,7,7,7,7
Clairlea,9,9,9,9,9,9
Clarks Corners,10,10,10,10,10,10
Cliffcrest,2,2,2,2,2,2
Cliffside,2,2,2,2,2,2
Cliffside West,4,4,4,4,4,4
Dorset Park,7,7,7,7,7,7


In [21]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 52 uniques categories.


## Methodology 

####  First I will collect data: location and type (category) of every venue for each neighborhood.  Then get calculation and exploration of 'venues frequencies' across the neighboorhods  Last I will use k-means clustering of those locations to create clusters and identify neighborhoods that are potential places.

## Explore Neighborhoods in Scarborough	

In [22]:
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Grocery Store,Gym Pool,Hakka Restaurant,History Museum,Hobby Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Lounge,Medical Center,Metro Station,Mexican Restaurant,Motel,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Spa,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,Rouge,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Highland Creek,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Highland Creek,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Rouge Hill,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [56]:
Toronto_onehot.shape

(190, 53)

In [23]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Grocery Store,Gym Pool,Hakka Restaurant,History Museum,Hobby Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Lounge,Medical Center,Metro Station,Mexican Restaurant,Motel,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Spa,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
4,Clairlea,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0
5,Clarks Corners,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.1,0.2,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0
6,Cliffcrest,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Cliffside,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Cliffside West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
9,Dorset Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857


In [114]:
Toronto_grouped.shape

(36, 53)

#### Let's print each neighborhood along with the top 5 most common venues

In [24]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                 venue  freq
0               Lounge  0.25
1       Breakfast Spot  0.25
2       Sandwich Place  0.25
3       Clothing Store  0.25
4  American Restaurant  0.00


----Agincourt North----
                 venue  freq
0                 Park   0.5
1           Playground   0.5
2  American Restaurant   0.0
3   Italian Restaurant   0.0
4    Korean Restaurant   0.0


----Birch Cliff----
                   venue  freq
0           Skating Rink  0.25
1  General Entertainment  0.25
2                   Café  0.25
3        College Stadium  0.25
4    American Restaurant  0.00


----Cedarbrae----
                venue  freq
0              Bakery  0.14
1                Bank  0.14
2     Thai Restaurant  0.14
3  Athletics & Sports  0.14
4    Hakka Restaurant  0.14


----Clairlea----
           venue  freq
0         Bakery  0.22
1       Bus Line  0.22
2   Soccer Field  0.11
3    Bus Station  0.11
4  Metro Station  0.11


----Clarks Corners----
                 venue  freq
0

#### Let's put that into a pandas dataframe

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Sandwich Place,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Coffee Shop,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint
1,Agincourt North,Park,Playground,Chinese Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
2,Birch Cliff,General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Clothing Store,Gym Pool,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
3,Cedarbrae,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Bakery,Bank,Fried Chicken Joint,Caribbean Restaurant,College Stadium,Gym Pool,Grocery Store
4,Clairlea,Bakery,Bus Line,Park,Soccer Field,Fast Food Restaurant,Bus Station,Metro Station,Coffee Shop,Grocery Store,General Entertainment
5,Clarks Corners,Pizza Place,Noodle House,Chinese Restaurant,Thai Restaurant,Fried Chicken Joint,Fast Food Restaurant,Italian Restaurant,Rental Car Location,Pharmacy,General Entertainment
6,Cliffcrest,American Restaurant,Motel,History Museum,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
7,Cliffside,American Restaurant,Motel,History Museum,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
8,Cliffside West,General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Clothing Store,Gym Pool,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
9,Dorset Park,Indian Restaurant,Vietnamese Restaurant,Latin American Restaurant,Light Rail Station,Pet Store,Chinese Restaurant,Coffee Shop,Grocery Store,General Entertainment,Fried Chicken Joint


## Cluster Neighborhoods

In [33]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 6

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_


array([0, 1, 0, 0, 0, 0, 5, 5, 0, 0, 4, 0, 0, 3, 4, 4, 1, 0, 2, 0, 1, 0, 0,
       3, 2, 3, 0, 1, 5, 1, 0, 0, 0, 0, 0, 4], dtype=int32)

In [120]:
neighborhoods_venues_sorted
neighborhoods_venues_sorted.shape

(36, 11)

In [34]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [35]:
neighborhoods_venues_sorted.shape
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Agincourt,Sandwich Place,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Coffee Shop,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint
1,1,Agincourt North,Park,Playground,Chinese Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
2,0,Birch Cliff,General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Clothing Store,Gym Pool,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
3,0,Cedarbrae,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Bakery,Bank,Fried Chicken Joint,Caribbean Restaurant,College Stadium,Gym Pool,Grocery Store
4,0,Clairlea,Bakery,Bus Line,Park,Soccer Field,Fast Food Restaurant,Bus Station,Metro Station,Coffee Shop,Grocery Store,General Entertainment


In [36]:
Toronto_merged = Toronto_data
#Toronto_merged
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

#Toronto_merged[['Cluster Labels']]=Toronto_merged[['Cluster Labels']].astype(int)

Toronto_merged # check the last columns!
Toronto_merged = Toronto_merged.dropna()


In [37]:
kmeans.labels_.shape

(36,)

In [38]:
Toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Rouge,43.806686,-79.194353,2.0,Fast Food Restaurant,Vietnamese Restaurant,Thrift / Vintage Store,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Electronics Store,Discount Store
1,M1B,Scarborough,Malvern,43.806686,-79.194353,2.0,Fast Food Restaurant,Vietnamese Restaurant,Thrift / Vintage Store,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Electronics Store,Discount Store
2,M1C,Scarborough,Highland Creek,43.784535,-79.160497,3.0,History Museum,Bar,Coffee Shop,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497,3.0,History Museum,Bar,Coffee Shop,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
4,M1C,Scarborough,Port Union,43.784535,-79.160497,3.0,History Museum,Bar,Coffee Shop,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store


In [39]:
Toronto_merged[['Cluster Labels']]=Toronto_merged[['Cluster Labels']].astype(int)

In [32]:
Toronto_merged.dtypes

Postcode                   object
Borough                    object
Neighborhood               object
Latitude                  float64
Longitude                 float64
1st Most Common Venue      object
2nd Most Common Venue      object
3rd Most Common Venue      object
4th Most Common Venue      object
5th Most Common Venue      object
6th Most Common Venue      object
7th Most Common Venue      object
8th Most Common Venue      object
9th Most Common Venue      object
10th Most Common Venue     object
dtype: object

In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], 
                                  Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

####  We have 5 different clusters as below. We  can define their cluster names according to their neighbourhood

#### Cluster 1 Restaurant & Enterainment zone

In [46]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0,
                     Toronto_merged.columns[[2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Guildwood,0,Rental Car Location,Intersection,Medical Center,Pizza Place,Breakfast Spot,Electronics Store,Mexican Restaurant,Spa,Fast Food Restaurant,Clothing Store
6,Morningside,0,Rental Car Location,Intersection,Medical Center,Pizza Place,Breakfast Spot,Electronics Store,Mexican Restaurant,Spa,Fast Food Restaurant,Clothing Store
7,West Hill,0,Rental Car Location,Intersection,Medical Center,Pizza Place,Breakfast Spot,Electronics Store,Mexican Restaurant,Spa,Fast Food Restaurant,Clothing Store
9,Cedarbrae,0,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Bakery,Bank,Fried Chicken Joint,Caribbean Restaurant,College Stadium,Gym Pool,Grocery Store
14,Clairlea,0,Bakery,Bus Line,Park,Soccer Field,Fast Food Restaurant,Bus Station,Metro Station,Coffee Shop,Grocery Store,General Entertainment
15,Golden Mile,0,Bakery,Bus Line,Park,Soccer Field,Fast Food Restaurant,Bus Station,Metro Station,Coffee Shop,Grocery Store,General Entertainment
16,Oakridge,0,Bakery,Bus Line,Park,Soccer Field,Fast Food Restaurant,Bus Station,Metro Station,Coffee Shop,Grocery Store,General Entertainment
20,Birch Cliff,0,General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Clothing Store,Gym Pool,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
21,Cliffside West,0,General Entertainment,Skating Rink,Café,College Stadium,Vietnamese Restaurant,Clothing Store,Gym Pool,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
22,Dorset Park,0,Indian Restaurant,Vietnamese Restaurant,Latin American Restaurant,Light Rail Station,Pet Store,Chinese Restaurant,Coffee Shop,Grocery Store,General Entertainment,Fried Chicken Joint


#### Cluster 2 Park/Playgroud/Gym/Grocery store

In [47]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1,
                     Toronto_merged.columns[[2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Scarborough Village,1,Playground,Vietnamese Restaurant,Clothing Store,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
31,Agincourt North,1,Park,Playground,Chinese Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
32,L'Amoreaux East,1,Park,Playground,Chinese Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
33,Milliken,1,Park,Playground,Chinese Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
34,Steeles East,1,Park,Playground,Chinese Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store


#### Cluster 3 Park

In [43]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2,
                     Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Thrift / Vintage Store,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Electronics Store,Discount Store
1,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Thrift / Vintage Store,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Electronics Store,Discount Store


#### Cluster 4 Museum

In [129]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3,
                     Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,3,History Museum,Bar,Coffee Shop,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
3,Scarborough,3,History Museum,Bar,Coffee Shop,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
4,Scarborough,3,History Museum,Bar,Coffee Shop,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store


#### Cluster 5 Shopping center

In [44]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4,
                     Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Scarborough,4,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,History Museum,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
11,Scarborough,4,Discount Store,Hobby Shop,Coffee Shop,Bus Station,Department Store,Chinese Restaurant,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment
12,Scarborough,4,Discount Store,Hobby Shop,Coffee Shop,Bus Station,Department Store,Chinese Restaurant,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment
13,Scarborough,4,Discount Store,Hobby Shop,Coffee Shop,Bus Station,Department Store,Chinese Restaurant,Hakka Restaurant,Gym Pool,Grocery Store,General Entertainment


### Restruant, motel.museum

In [45]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 5,
                     Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Scarborough,5,American Restaurant,Motel,History Museum,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
18,Scarborough,5,American Restaurant,Motel,History Museum,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
19,Scarborough,5,American Restaurant,Motel,History Museum,Gym Pool,Grocery Store,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store


### End of page. Thank you!