### Methodology
---

In [2]:
# importing libraries
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import json
from pandas.io.json import json_normalize
import folium
import geopy.geocoders

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

print('Libraries are imported.')

Libraries are imported.


#### 3.1 Create a dataset of postal codes in Toronto

In [7]:
# same dataset with the one in week 3
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table')

Postalcode = []
Borough = []
Neighbourhood = []

for tr_cell in table.find_all('tr'):
    for td_cell in tr_cell.find_all('td'):
        if td_cell.span.text != 'Not assigned':
            code = td_cell.b.text
            locat = td_cell.span.text
            if '(' in locat:
                bor = locat[:locat.find('(')]
                hood = locat[locat.find('(')+1:locat.find(')')].replace('/',',')
            else:
                bor = locat[locat.find('/')+2:]
                hood = locat[:locat.find('/')-1]
            Postalcode.append(code)
            Borough.append(bor)
            Neighbourhood.append(hood)
            
err1 = 'MississaugaCanada Post Gateway Processing Centre'
Borough[Borough.index(err1)] = 'Mississauga Canada Post Gateway Processing Centre'
err2 = 'East TorontoBusiness reply mail Processing Centre969 Eastern'
Borough[Borough.index(err2)] = 'East Toronto Business reply mail Processing Centre969 Eastern'

coordinate = pd.read_csv('Geospatial_Coordinates.csv')
coordinate.set_index('Postal Code', inplace = True)
latitude = []
longitude = []
for i in range(coordinate.shape[0]):
    latitude.append(coordinate.loc[Postalcode[i],'Latitude'])
    longitude.append(coordinate.loc[Postalcode[i],'Longitude'])
coordinate_dict = {'Postalcode': Postalcode, 
                   'Borough': Borough, 
                   'Neighbourhood': Neighbourhood, 
                   'Latitude': latitude, 
                   'Longitude': longitude}
df_toronto = pd.DataFrame.from_dict(coordinate_dict)
df_toronto.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Ontario Provincial Government,Queen's Park,43.662301,-79.389494


#### 3.2 Create a map of postal codes' region in Toronto

In [8]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6932
toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

#### 3.3 Focus on the Etobicoke in Toronto

In [15]:
# selecting only neighborhoods regarding to Etobicoke
etobicoke_data = df_toronto[df_toronto['Borough'] == 'Etobicoke']
etobicoke_data = etobicoke_data.reset_index(drop=True)
etobicoke_data

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park , Princess Gardens , Martin Gr...",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate , Bloordale Gardens , Old Burnhamthor...",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov...",43.688905,-79.554724
5,M8V,Etobicoke,"New Toronto , Mimico South , Humber Bay Shores",43.605647,-79.501321
6,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam...",43.739416,-79.588437
7,M8W,Etobicoke,"Alderwood , Long Branch",43.602414,-79.543484
8,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
9,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


#### 3.4 Create a map of Etobicoke and its neighbourhoods

In [19]:
address_etbc = 'Etobicoke, Toronto'
latitude_etbc = 43.66
longitude_etbc = -79.54
print('The geograpical coordinate of Etobicoke are: {}, {}.'.format(latitude_etbc, longitude_etbc))

The geograpical coordinate of Etobicoke are: 43.66, -79.54.


In [26]:
map_Etobicoke = folium.Map(location=[latitude_etbc, longitude_etbc], zoom_start=11)

# add markers to map
for lat, lng, label in zip(etobicoke_data['Latitude'], 
                           etobicoke_data['Longitude'], 
                           etobicoke_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_Etobicoke)  
    
map_Etobicoke

#### 3.5 Crawl Foursquare database for venues in the neighborhoods of Etobicoke

In [27]:
CLIENT_ID = 'ADWDOUGGD32EEVAXCTWCNCCA52GVGFU003G1QXRIGOPUACYV'
CLIENT_SECRET = 'CQYLABIZUIXLUGURGP0G2HYTRGAEDL5Y422A5ULVZYTQVFFK'
VERSION = '20180605'

In [34]:
def foursquare_crawler (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            radius,
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code
        tmp_dict['Neighborhood(s)'] = neighborhood 
        tmp_dict['Latitude'] = lat
        tmp_dict['Longitude'] = lng
        tmp_dict['Crawling_result'] = results
        result_ds.append(tmp_dict)
        counter += 1
        print('{}. Postal Code {} and Neighborhoods {}, data collected.'.format(counter, postal_code, neighborhood))
    return result_ds;

In [35]:
print('Crawling different neighborhoods of Etobicoke:')
Etobicoke_foursquare_dataset = foursquare_crawler(list(etobicoke_data['Postalcode']),
                                                   list(etobicoke_data['Neighbourhood']),
                                                   list(etobicoke_data['Latitude']),
                                                   list(etobicoke_data['Longitude']))

Crawling different neighborhoods of Etobicoke:
1. Postal Code M9A and Neighborhoods Islington Avenue, data collected.
2. Postal Code M9B and Neighborhoods West Deane Park , Princess Gardens , Martin Grove , Islington , Cloverdale, data collected.
3. Postal Code M9C and Neighborhoods Eringate , Bloordale Gardens , Old Burnhamthorpe , Markland Wood, data collected.
4. Postal Code M9P and Neighborhoods Westmount, data collected.
5. Postal Code M9R and Neighborhoods Kingsview Village , St. Phillips , Martin Grove Gardens , Richview Gardens, data collected.
6. Postal Code M8V and Neighborhoods New Toronto , Mimico South , Humber Bay Shores, data collected.
7. Postal Code M9V and Neighborhoods South Steeles , Silverstone , Humbergate , Jamestown , Mount Olive , Beaumond Heights , Thistletown , Albion Gardens, data collected.
8. Postal Code M8W and Neighborhoods Alderwood , Long Branch, data collected.
9. Postal Code M8X and Neighborhoods The Kingsway , Montgomery Road , Old Mill North, data 

#### 3.6 Clean the data of venues from Foursqure database

In [44]:
# This function will extract venues for every neighborhood in the dataset(list).
def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']
        neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']
        lng = neigh_dict['Longitude']
        print('Number of Venues in Coordination "{}" Posal Code and "{}" Negihborhood(s) is {} \n'.format(postal_code, neigh, len(neigh_dict['Crawling_result'])))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
                        
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)

    return(result_df)

In [45]:
etobicoke_venues = get_venue_dataset(Etobicoke_foursquare_dataset)

Number of Venues in Coordination "M9A" Posal Code and "Islington Avenue" Negihborhood(s) is 12 

Number of Venues in Coordination "M9B" Posal Code and "West Deane Park , Princess Gardens , Martin Grove , Islington , Cloverdale" Negihborhood(s) is 15 

Number of Venues in Coordination "M9C" Posal Code and "Eringate , Bloordale Gardens , Old Burnhamthorpe , Markland Wood" Negihborhood(s) is 19 

Number of Venues in Coordination "M9P" Posal Code and "Westmount" Negihborhood(s) is 15 

Number of Venues in Coordination "M9R" Posal Code and "Kingsview Village , St. Phillips , Martin Grove Gardens , Richview Gardens" Negihborhood(s) is 17 

Number of Venues in Coordination "M8V" Posal Code and "New Toronto , Mimico South , Humber Bay Shores" Negihborhood(s) is 19 

Number of Venues in Coordination "M9V" Posal Code and "South Steeles , Silverstone , Humbergate , Jamestown , Mount Olive , Beaumond Heights , Thistletown , Albion Gardens" Negihborhood(s) is 18 

Number of Venues in Coordination "

#### 3.7 Show venues for each neighborhood in Etobicoke

In [55]:
etobicoke_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M9A,Islington Avenue,43.667856,-79.532242,Java Joe's Village Cafe,This spot is popular,Café,600
1,M9A,Islington Avenue,43.667856,-79.532242,St Georges Golf and Country Club,This spot is popular,Golf Course,827
2,M9A,Islington Avenue,43.667856,-79.532242,TD Canada Trust,This spot is popular,Bank,592
3,M9A,Islington Avenue,43.667856,-79.532242,Shoppers Drug Mart,This spot is popular,Pharmacy,534
4,M9A,Islington Avenue,43.667856,-79.532242,COBS Bread,This spot is popular,Bakery,1000


In [57]:
# save a cleaned version of dataframe
etobicoke_venues.to_csv('etobicoke_venues.csv')
# load data from csv file
etobicoke_venues = pd.read_csv('etobicoke_venues.csv')

In [66]:
# summarize the data information
neigh_list = list(etobicoke_venues['Neighborhood'].unique())
neigh_venue_summary = etobicoke_venues.groupby('Neighborhood').count()
neigh_venue_summary.drop(columns = ['Unnamed: 0'], inplace = True)
neigh_venue_summary

Unnamed: 0_level_0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Alderwood , Long Branch",26,26,26,26,26,26,26
"Eringate , Bloordale Gardens , Old Burnhamthorpe , Markland Wood",19,19,19,19,19,19,19
Islington Avenue,12,12,12,12,12,12,12
"Kingsview Village , St. Phillips , Martin Grove Gardens , Richview Gardens",17,17,17,17,17,17,17
"Mimico NW , The Queensway West , South of Bloor , Kingsway Park South West , Royal York South West",60,60,60,60,60,60,60
"New Toronto , Mimico South , Humber Bay Shores",19,19,19,19,19,19,19
"Old Mill South , King's Mill Park , Sunnylea , Humber Bay , Mimico NE , The Queensway East , Royal York South East , Kingsway Park South East",7,7,7,7,7,7,7
"South Steeles , Silverstone , Humbergate , Jamestown , Mount Olive , Beaumond Heights , Thistletown , Albion Gardens",18,18,18,18,18,18,18
"The Kingsway , Montgomery Road , Old Mill North",47,47,47,47,47,47,47
"West Deane Park , Princess Gardens , Martin Grove , Islington , Cloverdale",15,15,15,15,15,15,15


In [68]:
print('There are {} uniques categories.\n'.format(len(etobicoke_venues['Venue Category'].unique())))
print('Here is the list of different categories:')
list(etobicoke_venues['Venue Category'].unique())

There are 91 uniques categories.

Here is the list of different categories:


['Café',
 'Golf Course',
 'Bank',
 'Pharmacy',
 'Bakery',
 'Grocery Store',
 'Shopping Mall',
 'Playground',
 'Park',
 'Skating Rink',
 'Convenience Store',
 'Fish & Chips Shop',
 'Restaurant',
 'Pizza Place',
 'Gym',
 'Theater',
 'Mexican Restaurant',
 'Hotel',
 'Clothing Store',
 'Liquor Store',
 'Coffee Shop',
 'Beer Store',
 'Farmers Market',
 'Cosmetics Shop',
 'College Rec Center',
 'Gas Station',
 'Shopping Plaza',
 'Pet Store',
 'Transportation Service',
 'Chinese Restaurant',
 'Sandwich Place',
 'Supermarket',
 'Intersection',
 'Discount Store',
 'Ice Cream Shop',
 'Flea Market',
 'Breakfast Spot',
 'Bus Line',
 'Mobile Phone Shop',
 'American Restaurant',
 'Supplement Shop',
 'Electronics Store',
 'Dessert Shop',
 'Italian Restaurant',
 'Indian Restaurant',
 'Pub',
 'Fried Chicken Joint',
 'Fast Food Restaurant',
 'Hardware Store',
 'Gym Pool',
 'Caribbean Restaurant',
 'Construction & Landscaping',
 'Moroccan Restaurant',
 'Pool',
 'Donut Shop',
 'Trail',
 'Garden Center',
 

#### 3.8 One-hot encode the "Categories" column

In [93]:
# Mannually select related features for the food warehouse
important_list_of_features = [
    'Neighborhood',
    'Neighborhood Latitude',
    'Neighborhood Longitude',
    'Café',
    'Bakery',
    'Fish & Chips Shop',
    'Restaurant',
    'Pizza Place',
    'Mexican Restaurant',
    'Chinese Restaurant',
    'Sandwich Place',
    'Ice Cream Shop',
    'Breakfast Spot',
    'American Restaurant',
    'Dessert Shop',
    'Italian Restaurant',
    'Indian Restaurant',
    'Fried Chicken Joint',
    'Fast Food Restaurant',
    'Caribbean Restaurant',
    'Moroccan Restaurant',
    'Donut Shop',
    'Sushi Restaurant',
    'French Restaurant',
    'Tapas Restaurant',
    'Seafood Restaurant',
    'Thai Restaurant',
    'Burger Joint',
    'Greek Restaurant',
    'Gourmet Shop',
    'Cupcake Shop',
    'Eastern European Restaurant',
    'Wings Joint',
    'Burrito Place',
    'Middle Eastern Restaurant',
    'BBQ Joint',
    'Comfort Food Restaurant',
    'Asian Restaurant',
    'Buffet',
    'Mediterranean Restaurant']

In [117]:
etobicoke_onehot = pd.get_dummies(data = etobicoke_venues, 
                                  drop_first  = False, 
                                  prefix = "", 
                                  prefix_sep = "", 
                                  columns = ['Venue Category'])
etobicoke_onehot.drop(columns = ['Unnamed: 0'], inplace = True)
etobicoke_onehot.head()
etobicoke_onehot = etobicoke_onehot[important_list_of_features]
etobicoke_onehot.drop(columns = ['Neighborhood Latitude', 'Neighborhood Longitude'], inplace = True)
etobicoke_onehot = etobicoke_onehot.groupby('Neighborhood').sum()
etobicoke_onehot.head()

Unnamed: 0_level_0,Café,Bakery,Fish & Chips Shop,Restaurant,Pizza Place,Mexican Restaurant,Chinese Restaurant,Sandwich Place,Ice Cream Shop,Breakfast Spot,American Restaurant,Dessert Shop,Italian Restaurant,Indian Restaurant,Fried Chicken Joint,Fast Food Restaurant,Caribbean Restaurant,Moroccan Restaurant,Donut Shop,Sushi Restaurant,French Restaurant,Tapas Restaurant,Seafood Restaurant,Thai Restaurant,Burger Joint,Greek Restaurant,Gourmet Shop,Cupcake Shop,Eastern European Restaurant,Wings Joint,Burrito Place,Middle Eastern Restaurant,BBQ Joint,Comfort Food Restaurant,Asian Restaurant,Buffet,Mediterranean Restaurant
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
"Alderwood , Long Branch",0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Eringate , Bloordale Gardens , Old Burnhamthorpe , Markland Wood",1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Islington Avenue,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Kingsview Village , St. Phillips , Martin Grove Gardens , Richview Gardens",0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Mimico NW , The Queensway West , South of Bloor , Kingsway Park South West , Royal York South West",1,2,1,5,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,2,0,0,0,1,2,0,0,0,0,1,3,1,1,1,1,1,1


#### 3.9 Integrating different restaurants

In [118]:
feat_name_list = list(etobicoke_onehot.columns)
restaurant_list = []

for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
etobicoke_onehot['Total Restaurants'] = etobicoke_onehot[restaurant_list].sum(axis = 1)
etobicoke_onehot = etobicoke_onehot.drop(columns = restaurant_list)


feat_name_list = list(etobicoke_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
etobicoke_onehot['Total Joints'] = etobicoke_onehot[joint_list].sum(axis = 1)
etobicoke_onehot = etobicoke_onehot.drop(columns = joint_list)
etobicoke_onehot

Unnamed: 0_level_0,Café,Bakery,Fish & Chips Shop,Pizza Place,Sandwich Place,Ice Cream Shop,Breakfast Spot,Dessert Shop,Donut Shop,Gourmet Shop,Cupcake Shop,Burrito Place,Buffet,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"Alderwood , Long Branch",0,0,0,2,1,0,0,0,1,0,0,0,0,1,0
"Eringate , Bloordale Gardens , Old Burnhamthorpe , Markland Wood",1,0,1,1,0,0,0,0,0,0,0,0,0,0,0
Islington Avenue,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
"Kingsview Village , St. Phillips , Martin Grove Gardens , Richview Gardens",0,0,0,1,1,0,0,0,0,0,0,0,0,2,0
"Mimico NW , The Queensway West , South of Bloor , Kingsway Park South West , Royal York South West",1,2,1,0,1,0,0,0,0,0,0,3,1,15,4
"New Toronto , Mimico South , Humber Bay Shores",1,1,0,1,0,0,1,1,0,0,0,0,0,6,1
"Old Mill South , King's Mill Park , Sunnylea , Humber Bay , Mimico NE , The Queensway East , Royal York South East , Kingsway Park South East",0,0,0,0,0,1,0,0,0,0,0,0,0,3,0
"South Steeles , Silverstone , Humbergate , Jamestown , Mount Olive , Beaumond Heights , Thistletown , Albion Gardens",0,0,0,3,1,0,0,0,0,0,0,0,0,2,1
"The Kingsway , Montgomery Road , Old Mill North",1,1,0,2,0,0,2,2,0,1,1,0,0,12,2
"West Deane Park , Princess Gardens , Martin Grove , Islington , Cloverdale",0,0,1,2,0,0,0,0,0,0,0,0,0,2,0


#### 3.10 Run K-means algorithm to cluster the neighborhoods into 3 groups

In [132]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 3, random_state = 2).fit(etobicoke_onehot)

In [133]:
# show center of each cluster
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = etobicoke_onehot.columns
means_df.index = ['G1','G2','G3']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Café,Bakery,Fish & Chips Shop,Pizza Place,Sandwich Place,Ice Cream Shop,Breakfast Spot,Dessert Shop,Donut Shop,Gourmet Shop,Cupcake Shop,Burrito Place,Buffet,Total Restaurants,Total Joints,Total Sum
G3,1.0,1.5,0.5,1.0,0.5,0.0,1.0,1.0,0.0,0.5,0.5,1.5,0.5,13.5,3.0,26.0
G1,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,1.0,12.0
G2,0.25,0.125,0.25,1.375,0.5,0.25,0.125,0.0,0.125,-1.387779e-17,-1.387779e-17,0.0,-1.387779e-17,1.375,0.125,4.5


> We can see the best is G3, second best is G1, third best is G2.

In [135]:
neigh_summary = pd.DataFrame([etobicoke_onehot.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,"Alderwood , Long Branch",2
1,"Eringate , Bloordale Gardens , Old Burnhamthor...",2
2,Islington Avenue,2
3,"Kingsview Village , St. Phillips , Martin Grov...",2
4,"Mimico NW , The Queensway West , South of Bloo...",3
5,"New Toronto , Mimico South , Humber Bay Shores",1
6,"Old Mill South , King's Mill Park , Sunnylea ,...",2
7,"South Steeles , Silverstone , Humbergate , Jam...",2
8,"The Kingsway , Montgomery Road , Old Mill North",3
9,"West Deane Park , Princess Gardens , Martin Gr...",2


#### 3.11 Top 3 neighborhoods are...

In [145]:
neigh_summary[neigh_summary['Group'] == 3]

Unnamed: 0,Neighborhood,Group
4,"Mimico NW , The Queensway West , South of Bloo...",3
8,"The Kingsway , Montgomery Road , Old Mill North",3


In [146]:
neigh_summary[neigh_summary['Group'] == 1]

Unnamed: 0,Neighborhood,Group
5,"New Toronto , Mimico South , Humber Bay Shores",1


## Thanks for reading!