# Applied Data Science Capstone, Week 2 Assignment - Part 3

## load data from csv-file

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('neighborhood_toronto.csv', index_col=0)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Explore and Clustering neighborhood

View the total Borough

In [3]:
df['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

View neighborhood counts for each Borough

In [4]:
df.groupby('Borough').count()

Unnamed: 0_level_0,Postcode,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,18,18,18,18
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,12,12,12,12
Mississauga,1,1,1,1
North York,24,24,24,24
Queen's Park,1,1,1,1
Scarborough,17,17,17,17
West Toronto,6,6,6,6


I decide to choose Borough == 'Downtown Toronto' to continue, because:

- it has the second largest number of neighborhood
- it is __DOWNTOWN__

In [5]:
toronto = df[df['Borough']=='Downtown Toronto']
toronto.shape

(18, 5)

#### get latlng of Downtown Toronto

In [6]:
from geopy.geocoders import Nominatim

In [7]:
geolocator = Nominatim(user_agent="toronto")
loc = geolocator.geocode("Downtown Toronto, ON")
loc

Location(Downtown Jewellery Mart, 257, Yonge Street, Downtown Yonge, Toronto Centre, Old Toronto, Toronto, Ontario, M5B 1N8, Canada, (43.655115, -79.380219, 0.0))

#### view neighborhood in Downtown Toronto

In [8]:
import folium

In [9]:
map_toronto = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=13)
for lat, lng, borough, neighbor in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighbourhood']):
    label = "{}, {}".format(neighbor, borough)
    abel = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
map_toronto

#### get top 100 venues within 500m range of each neighborhood

In [10]:
from Foursquare.regularapi import venues

In [11]:
def getNearbyVenues(neighbors, lats, lngs, radius=500, limit=100):
    venue_list = []
    for neighbor, lat, lng in zip(neighbors, lats, lngs):
        print(neighbor)
        param = {
            'll': '{},{}'.format(lat, lng),
            'radius': radius,
            'limit': limit
        }
        data = venues('explore', **param)
        if data:
            for v in data:
                venue_list.append([neighbor]+v)
    return venue_list

In [14]:
venue_list = getNearbyVenues(
    neighbors=toronto['Neighbourhood'], lats=toronto['Latitude'], lngs=toronto['Longitude']
)

Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie


In [16]:
len(venue_list)

1287

In [17]:
columns = ['Neighbourhood', 'venue_name', 'venue_lat', 'venue_lng', 'venue_category']
toronto_venue = pd.DataFrame(data=venue_list, columns=columns)
toronto_venue.head(10)

Unnamed: 0,Neighbourhood,venue_name,venue_lat,venue_lng,venue_category
0,Rosedale,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,Milkman's Lane,43.676352,-79.373842,Trail
4,"Cabbagetown, St. James Town",Cranberries,43.667843,-79.369407,Diner
5,"Cabbagetown, St. James Town",Butter Chicken Factory,43.667072,-79.369184,Indian Restaurant
6,"Cabbagetown, St. James Town",F'Amelia,43.667536,-79.368613,Italian Restaurant
7,"Cabbagetown, St. James Town",Kingyo Toronto,43.665895,-79.368415,Japanese Restaurant
8,"Cabbagetown, St. James Town",Merryberry Cafe + Bistro,43.66663,-79.368792,Café
9,"Cabbagetown, St. James Town",Murgatroid,43.667381,-79.369311,Restaurant


In [18]:
toronto_venue.groupby('Neighbourhood').count()

Unnamed: 0_level_0,venue_name,venue_lat,venue_lng,venue_category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Adelaide, King, Richmond",100,100,100,100
Berczy Park,57,57,57,57
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",15,15,15,15
"Cabbagetown, St. James Town",44,44,44,44
Central Bay Street,88,88,88,88
"Chinatown, Grange Park, Kensington Market",100,100,100,100
Christie,16,16,16,16
Church and Wellesley,88,88,88,88
"Commerce Court, Victoria Hotel",100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100


#### analyze venue categories

In [19]:
toronto_venue_ = pd.get_dummies(toronto_venue[['venue_category']])
toronto_venue_['Neighbourhood'] = toronto_venue['Neighbourhood']
fixed_columns = [toronto_venue_.columns[-1]] + list(toronto_venue_.columns[:-1])
toronto_venue_ = toronto_venue_[fixed_columns]
toronto_venue_.head()

Unnamed: 0,Neighbourhood,venue_category_Adult Boutique,venue_category_Afghan Restaurant,venue_category_Airport,venue_category_Airport Food Court,venue_category_Airport Gate,venue_category_Airport Lounge,venue_category_Airport Service,venue_category_Airport Terminal,venue_category_American Restaurant,...,venue_category_Thrift / Vintage Store,venue_category_Toy / Game Store,venue_category_Trail,venue_category_Train Station,venue_category_Vegetarian / Vegan Restaurant,venue_category_Video Game Store,venue_category_Vietnamese Restaurant,venue_category_Wine Bar,venue_category_Wings Joint,venue_category_Yoga Studio
0,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,"Cabbagetown, St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### cluster the toronto neighbourhood

In [21]:
toronto_grouped = toronto_venue_.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,venue_category_Adult Boutique,venue_category_Afghan Restaurant,venue_category_Airport,venue_category_Airport Food Court,venue_category_Airport Gate,venue_category_Airport Lounge,venue_category_Airport Service,venue_category_Airport Terminal,venue_category_American Restaurant,...,venue_category_Thrift / Vintage Store,venue_category_Toy / Game Store,venue_category_Trail,venue_category_Train Station,venue_category_Vegetarian / Vegan Restaurant,venue_category_Video Game Store,venue_category_Vietnamese Restaurant,venue_category_Wine Bar,venue_category_Wings Joint,venue_category_Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.066667,0.066667,0.066667,0.133333,0.2,0.133333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,...,0.0,0.0,0.0,0.0,0.011364,0.0,0.0,0.011364,0.0,0.011364
5,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.01,0.0,0.0,0.06,0.0,0.03,0.01,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.011364,0.011364,0.0,0.0,0.0,0.0,0.0,0.0,0.011364,...,0.0,0.0,0.0,0.0,0.0,0.011364,0.011364,0.0,0.011364,0.011364
8,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0


In [24]:
from sklearn.cluster import KMeans

In [26]:
toronto_grouped_cluster = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=5, random_state=4).fit(toronto_grouped_cluster)
kmeans.labels_[0:10] 

array([0, 0, 2, 0, 0, 4, 3, 0, 0, 0], dtype=int32)

#### show clustered neighbours on the map

In [27]:
toronto_plot = toronto[['Neighbourhood', 'Latitude', 'Longitude']]
toronto_plot.insert(0, 'cluster_label', kmeans.labels_)
toronto_plot

Unnamed: 0,cluster_label,Neighbourhood,Latitude,Longitude
50,0,Rosedale,43.679563,-79.377529
51,0,"Cabbagetown, St. James Town",43.667967,-79.367675
52,2,Church and Wellesley,43.66586,-79.38316
53,0,"Harbourfront, Regent Park",43.65426,-79.360636
54,0,"Ryerson, Garden District",43.657162,-79.378937
55,4,St. James Town,43.651494,-79.375418
56,3,Berczy Park,43.644771,-79.373306
57,0,Central Bay Street,43.657952,-79.387383
58,0,"Adelaide, King, Richmond",43.650571,-79.384568
59,0,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [32]:
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=13)

x = np.arange(5)
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_plot['Latitude'], toronto_plot['Longitude'], toronto_plot['Neighbourhood'], toronto_plot['cluster_label']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### examine clusters

In [37]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Adelaide, King, Richmond",venue_category_Coffee Shop,venue_category_Café,venue_category_Steakhouse,venue_category_American Restaurant,venue_category_Thai Restaurant
1,Berczy Park,venue_category_Coffee Shop,venue_category_Cocktail Bar,venue_category_Beer Bar,venue_category_Restaurant,venue_category_Café
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",venue_category_Airport Service,venue_category_Airport Lounge,venue_category_Airport Terminal,venue_category_Boat or Ferry,venue_category_Plane
3,"Cabbagetown, St. James Town",venue_category_Coffee Shop,venue_category_Italian Restaurant,venue_category_Restaurant,venue_category_Pizza Place,venue_category_Pub
4,Central Bay Street,venue_category_Coffee Shop,venue_category_Café,venue_category_Italian Restaurant,venue_category_Burger Joint,venue_category_Salad Place


In [38]:
toronto_merged = toronto_plot
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
toronto_merged.head()

Unnamed: 0,cluster_label,Neighbourhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
50,0,Rosedale,43.679563,-79.377529,venue_category_Park,venue_category_Playground,venue_category_Trail,venue_category_Dance Studio,venue_category_Dumpling Restaurant
51,0,"Cabbagetown, St. James Town",43.667967,-79.367675,venue_category_Coffee Shop,venue_category_Italian Restaurant,venue_category_Restaurant,venue_category_Pizza Place,venue_category_Pub
52,2,Church and Wellesley,43.66586,-79.38316,venue_category_Coffee Shop,venue_category_Japanese Restaurant,venue_category_Gay Bar,venue_category_Sushi Restaurant,venue_category_Restaurant
53,0,"Harbourfront, Regent Park",43.65426,-79.360636,venue_category_Coffee Shop,venue_category_Pub,venue_category_Bakery,venue_category_Park,venue_category_Café
54,0,"Ryerson, Garden District",43.657162,-79.378937,venue_category_Coffee Shop,venue_category_Clothing Store,venue_category_Cosmetics Shop,venue_category_Café,venue_category_Middle Eastern Restaurant


#### cluster 1

In [40]:

toronto_merged.loc[toronto_merged['cluster_label'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
50,Rosedale,venue_category_Playground,venue_category_Trail,venue_category_Dance Studio,venue_category_Dumpling Restaurant
51,"Cabbagetown, St. James Town",venue_category_Italian Restaurant,venue_category_Restaurant,venue_category_Pizza Place,venue_category_Pub
53,"Harbourfront, Regent Park",venue_category_Pub,venue_category_Bakery,venue_category_Park,venue_category_Café
54,"Ryerson, Garden District",venue_category_Clothing Store,venue_category_Cosmetics Shop,venue_category_Café,venue_category_Middle Eastern Restaurant
57,Central Bay Street,venue_category_Café,venue_category_Italian Restaurant,venue_category_Burger Joint,venue_category_Salad Place
58,"Adelaide, King, Richmond",venue_category_Café,venue_category_Steakhouse,venue_category_American Restaurant,venue_category_Thai Restaurant
59,"Harbourfront East, Toronto Islands, Union Station",venue_category_Hotel,venue_category_Aquarium,venue_category_Café,venue_category_Italian Restaurant
60,"Design Exchange, Toronto Dominion Centre",venue_category_Café,venue_category_Hotel,venue_category_Restaurant,venue_category_Gastropub
66,"Harbord, University of Toronto",venue_category_Restaurant,venue_category_Japanese Restaurant,venue_category_Bar,venue_category_Bakery
67,"Chinatown, Grange Park, Kensington Market",venue_category_Vegetarian / Vegan Restaurant,venue_category_Bar,venue_category_Dumpling Restaurant,venue_category_Mexican Restaurant


#### cluster 2

In [41]:

toronto_merged.loc[toronto_merged['cluster_label'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
68,"CN Tower, Bathurst Quay, Island airport, Harbo...",venue_category_Airport Lounge,venue_category_Airport Terminal,venue_category_Boat or Ferry,venue_category_Plane


#### cluster 3

In [42]:

toronto_merged.loc[toronto_merged['cluster_label'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
52,Church and Wellesley,venue_category_Japanese Restaurant,venue_category_Gay Bar,venue_category_Sushi Restaurant,venue_category_Restaurant


#### cluster 4

In [43]:

toronto_merged.loc[toronto_merged['cluster_label'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
56,Berczy Park,venue_category_Cocktail Bar,venue_category_Beer Bar,venue_category_Restaurant,venue_category_Café


#### cluster 5

In [44]:

toronto_merged.loc[toronto_merged['cluster_label'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0,Neighbourhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
55,St. James Town,venue_category_Café,venue_category_Restaurant,venue_category_Hotel,venue_category_Cosmetics Shop
61,"Commerce Court, Victoria Hotel",venue_category_Café,venue_category_Hotel,venue_category_Restaurant,venue_category_American Restaurant
