## Web scraping for Toronto neighborhood post codes

In [1]:
import requests
from bs4 import BeautifulSoup
import lxml
import pandas as pd
import numpy as np

In [2]:
canada_postcode_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
web_document = requests.get(canada_postcode_url)

In [3]:
soup = BeautifulSoup(web_document.text, 'lxml')

In [4]:
# Fortunate in that the first table is the table we want
# So the find function will give it to us without needing to loop
table = soup.find('table')

In [5]:
# Get Column names
column_names = []
th_tags = table.find_all('th') 
if len(th_tags) > 0 and len(column_names) == 0:
    for th in th_tags:
        column_names.append(th.get_text().strip())
        
column_names

['Postcode', 'Borough', 'Neighbourhood']

In [7]:
# Create an empty data frame using column names already scraped
df = pd.DataFrame(columns=column_names)
previous_postcode =''

# For each scraped row, append to the data frame where the postcode is unique
row_tags = table.find_all('tr')
for row in row_tags[1:]:
    row_entries = row.find_all('td')
    tmp = []
    for ii, entry in enumerate(row_entries):
        tmp.append(entry.get_text().strip()) 
    
    # If we have a borough but no neighbourhood, we make the neighbourhood = borough
    if tmp[-1]=='Not assigned' and tmp[-2] != 'Not assigned':
        tmp[-1] = tmp[-2]
        
    new_postcode = tmp[0]
    if len(df) > 0:
 
        # If the current row has a borough previously written to the data frame
        # update the neighbourhood of the row already in the data frame
        new_postcode = tmp[0]
        previous_postcode = df.iloc[-1,0]
        
        if new_postcode == previous_postcode:
            df.iloc[-1,-1] = tmp[-1] + ", " + df.iloc[-1,-1]
            
        else:
            df = df.append(pd.DataFrame([tmp], columns=df.columns))
            
    else:

        df = df.append(pd.DataFrame([tmp], columns=df.columns))
        
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
0,M2A,Not assigned,Not assigned
0,M3A,North York,Parkwoods
0,M4A,North York,Victoria Village
0,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [9]:
# Ignore all unassigned Boroughs
df = df[df['Borough'] != 'Not assigned']

# Clean up
df.rename(columns={'Postcode':'PostalCode', 'Neighbourhood': 'Neighborhood'}, inplace=True)
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Queen's Park


## Retrieving Latitude and Longitude using post code information

In [13]:
import geocoder

In [16]:
tmp_lat_long = pd.DataFrame(columns=['Latitude', 'Longitude'])
post_codes = df['PostalCode'].values   

for ii in range(len(post_codes)):
    print(post_codes[ii], ": ", ii+1, " of ", len(post_codes))
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g= geocoder.google('{}, Toronto, Ontario'.format(post_codes[ii]))
        lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    tmp_lat_long = tmp_lat_long.append(pd.DataFrame([[latitude, longitude]], columns=tmp_lat_long.columns))

M3A :  1  of  103
M4A :  2  of  103
M5A :  3  of  103
M6A :  4  of  103
M7A :  5  of  103
M9A :  6  of  103
M1B :  7  of  103
M3B :  8  of  103
M4B :  9  of  103
M5B :  10  of  103
M6B :  11  of  103
M9B :  12  of  103
M1C :  13  of  103
M3C :  14  of  103
M4C :  15  of  103
M5C :  16  of  103
M6C :  17  of  103
M9C :  18  of  103
M1E :  19  of  103
M4E :  20  of  103
M5E :  21  of  103
M6E :  22  of  103
M1G :  23  of  103
M4G :  24  of  103
M5G :  25  of  103
M6G :  26  of  103
M1H :  27  of  103
M2H :  28  of  103
M3H :  29  of  103
M4H :  30  of  103
M5H :  31  of  103
M6H :  32  of  103
M1J :  33  of  103
M2J :  34  of  103
M3J :  35  of  103
M4J :  36  of  103
M5J :  37  of  103
M6J :  38  of  103
M1K :  39  of  103
M2K :  40  of  103
M3K :  41  of  103
M4K :  42  of  103
M5K :  43  of  103
M6K :  44  of  103
M1L :  45  of  103
M2L :  46  of  103
M3L :  47  of  103
M4L :  48  of  103
M5L :  49  of  103
M6L :  50  of  103
M9L :  51  of  103
M1M :  52  of  103
M2M :  53  of  103
M3

In [17]:
tmp_lat_long.reset_index(inplace=True)
tmp_lat_long.drop('index', axis=1, inplace=True)
tmp_lat_long.head()

Unnamed: 0,Latitude,Longitude
0,43.753259,-79.329656
1,43.725882,-79.315572
2,43.65426,-79.360636
3,43.718518,-79.464763
4,43.662301,-79.389494


In [18]:
df_merged = pd.concat([df, tmp_lat_long], axis=1)

In [19]:
df_merged.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Pulling venue information from Foursquare

In [22]:
# Foursquare

In [378]:
# Hidden cell with credentials

In [24]:
# Function used in labs to pull down venues near a given location
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    
    VERSION = '20180605' # Foursquare API version
    LIMIT = 100

    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name'],
            v['venue']['categories'][0]['icon']['prefix']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category',
                  'Icon Prefix']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=df_merged['Neighborhood'],
                   latitudes=df_merged['Latitude'],
                   longitudes=df_merged['Longitude']
                  )
#toronto_venues = pd.read_csv("toronto_venues_capston.csv")
#toronto_venues.drop('Unnamed: 0', axis=1, inplace=True)
#toronto_venues.head()

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Port Union, Rouge Hill, Highland Creek
Don Mills South, Flemingdon Park
Woodbine Heights
St. James Town
Humewood-Cedarvale
Old Burnhamthorpe, Markland Wood, Eringate, Bloordale Gardens
West Hill, Morningside, Guildwood]]
The Beaches
Berczy Park
Caledonia-Fairbanks]]
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Wilson Heights, Downsview North, Bathurst Manor
Thorncliffe Park
Richmond, King, Adelaide
Dufferin, Dovercourt Village
Scarborough Village
Oriole, Henry Farm, Fairview
York University, Northwood Park
East Toronto
Union Station, Toronto Islands, Harbourfront East
Trinity, Little Portugal
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East, CFB Toronto
R

In [39]:
toronto_venues.head(20)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Icon Prefix
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park,https://ss3.4sqi.net/img/categories_v2/parks_o...
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant,https://ss3.4sqi.net/img/categories_v2/food/fa...
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop,https://ss3.4sqi.net/img/categories_v2/shops/f...
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena,https://ss3.4sqi.net/img/categories_v2/arts_en...
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop,https://ss3.4sqi.net/img/categories_v2/food/co...
5,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant,https://ss3.4sqi.net/img/categories_v2/food/po...
6,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.31362,Intersection,https://ss3.4sqi.net/img/categories_v2/travel/...
7,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery,https://ss3.4sqi.net/img/categories_v2/food/ba...
8,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop,https://ss3.4sqi.net/img/categories_v2/food/co...
9,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa,https://ss3.4sqi.net/img/categories_v2/shops/spa_


In [40]:
toronto_venues.isnull().sum()

Neighborhood              0
Neighborhood Latitude     0
Neighborhood Longitude    0
Venue                     0
Venue Latitude            0
Venue Longitude           0
Venue Category            0
Icon Prefix               0
dtype: int64

## Filtering venues to retain restaurants/food vendors only

In [77]:
toronto_food_venues = toronto_venues[['/food/' in item for item in toronto_venues["Icon Prefix"].values]]
toronto_food_venues.reset_index(inplace=True)
toronto_food_venues = toronto_food_venues.drop('index', axis=1)
toronto_food_venues.drop('Icon Prefix', axis=1, inplace=True)
toronto_food_venues.head(5)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
1,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
2,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
3,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
4,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop


In [79]:
toronto_food_venues.shape

(1298, 7)

In [83]:
toronto_food_venues['Neighborhood'].value_counts()

Kensington Market, Grange Park, Chinatown                                  73
Central Bay Street                                                         70
Victoria Hotel, Commerce Court                                             67
Underground city, First Canadian Place                                     67
Toronto Dominion Centre, Design Exchange                                   62
Richmond, King, Adelaide                                                   60
St. James Town                                                             59
Garden District, Ryerson                                                   55
Church and Wellesley                                                       54
Union Station, Toronto Islands, Harbourfront East                          53
Stn A PO Boxes 25 The Esplanade                                            51
Trinity, Little Portugal                                                   39
Riverdale, The Danforth West                                    

## Assignment of cuisines to the emerging cuisine category. For this project, an emerging cuisine is one of ethiopian origin

In [84]:
emerging_cuisine = np.zeros(toronto_food_venues.shape[0], dtype='int64')

In [85]:
msk = ['ethiopian' in item.lower() for item in toronto_food_venues["Venue Category"].values]
emerging_cuisine[msk] = 1
sum(emerging_cuisine)

2

In [86]:
toronto_food_venues['Emerging Cuisine'] = emerging_cuisine

In [373]:
toronto_food_venues[toronto_food_venues['Emerging Cuisine']==1].head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Emerging Cuisine
125,"Garden District, Ryerson",43.657162,-79.378937,Ethiopiques,43.656513,-79.377078,Ethiopian Restaurant,1
1244,Church and Wellesley,43.66586,-79.38316,Ethiopian House,43.666599,-79.385669,Ethiopian Restaurant,1


If we are to use clustering, a decent verification check is for the above two neighbourhoods to be in  the same cluster.

In [89]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_food_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
# Neighborhood was recorded as a venue so changing the name
toronto_onehot['Neighborhood_List'] = toronto_food_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood_List,Afghan Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Belgian Restaurant,Bistro,...,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Feature selection from correlation

In [110]:
# Get total number of each restaurant category in each neighborhood group
toronto_onehot_count = toronto_onehot.groupby('Neighborhood_List').sum(axis=0)
toronto_onehot_count.head()

Unnamed: 0_level_0,Afghan Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Belgian Restaurant,Bistro,Brazilian Restaurant,...,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint
Neighborhood_List,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Bayview Village,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Berczy Park,0,0,0,0,0,1,2,1,1,0,...,0,0,0,1,1,0,0,0,0,0
Business reply mail Processing Centre969 Eastern,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Caledonia-Fairbanks]],0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [143]:
# Review correlation of Ethiopian Restaurant type with other types
# Might help with feature selection
toronto_onehot_count.corr().loc['Ethiopian Restaurant'].sort_values(axis=0)

Bakery                            -0.088593
Fried Chicken Joint               -0.059880
Brewery                           -0.055152
French Restaurant                 -0.053179
New American Restaurant           -0.051988
Deli / Bodega                     -0.050276
Asian Restaurant                  -0.049605
Smoothie Shop                     -0.048305
Comfort Food Restaurant           -0.046004
Salad Place                       -0.045341
Bagel Shop                        -0.044426
Noodle House                      -0.042166
Bistro                            -0.040291
Latin American Restaurant         -0.035806
Fish & Chips Shop                 -0.035806
Food Truck                        -0.035806
Greek Restaurant                  -0.032715
Gluten-free Restaurant            -0.030812
Portuguese Restaurant             -0.030812
Hawaiian Restaurant               -0.030812
Belgian Restaurant                -0.030812
Eastern European Restaurant       -0.030812
Dim Sum Restaurant              

In [144]:
toronto_emerging_corr = toronto_onehot_count.corr().loc['Ethiopian Restaurant'].sort_values(axis=0)

In [145]:

# Top 5 negatively correlated categories with Ethiopian Restaurant
top5_neg_corr = list(toronto_emerging_corr[0:5].index)
toronto_emerging_corr[0:5]

Bakery                    -0.088593
Fried Chicken Joint       -0.059880
Brewery                   -0.055152
French Restaurant         -0.053179
New American Restaurant   -0.051988
Name: Ethiopian Restaurant, dtype: float64

In [146]:
# Top 5 negatively correlated categories with Ethiopian Restaurant
top5_pos_corr = list(toronto_emerging_corr[-5:].index)
toronto_emerging_corr[-5:]

Juice Bar               0.562731
Bubble Tea Shop         0.574324
Japanese Restaurant     0.644599
Afghan Restaurant       0.702728
Ethiopian Restaurant    1.000000
Name: Ethiopian Restaurant, dtype: float64

In [147]:
print("From correlation, features to consider for WORST neighborhoods to consider for opening an Ethiopian food service")
print(top5_neg_corr)

From correlation, features to consider for WORST neighborhoods to consider for opening an Ethiopian food service
['Bakery', 'Fried Chicken Joint', 'Brewery', 'French Restaurant', 'New American Restaurant']


In [148]:
print("From correlation, features to consider for BEST neighborhoods to consider for opening an Ethiopian food service")
print(top5_pos_corr)

From correlation, features to consider for BEST neighborhoods to consider for opening an Ethiopian food service
['Juice Bar', 'Bubble Tea Shop', 'Japanese Restaurant', 'Afghan Restaurant', 'Ethiopian Restaurant']


In [372]:
# Let's recreate the onehost encoding matrix but only using the top 5 most positively and negatively correlated 
# features with Ethiopian Restaurant


In [371]:
toronto_onehot_filtered = toronto_onehot[['Neighborhood_List']+top5_neg_corr + top5_pos_corr]
toronto_onehot_filtered.head(20)

Unnamed: 0,Neighborhood_List,Bakery,Fried Chicken Joint,Brewery,French Restaurant,New American Restaurant,Juice Bar,Bubble Tea Shop,Japanese Restaurant,Afghan Restaurant,Ethiopian Restaurant
0,Parkwoods,0,0,0,0,0,0,0,0,0,0
1,Victoria Village,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",1,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0
5,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0
6,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0
7,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0
8,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0
9,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0


In [261]:
toronto_grouped = toronto_onehot_filtered.groupby('Neighborhood_List').sum().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood_List,Bakery,Fried Chicken Joint,Brewery,French Restaurant,New American Restaurant,Juice Bar,Bubble Tea Shop,Japanese Restaurant,Afghan Restaurant,Ethiopian Restaurant
0,Agincourt,0,0,0,0,0,0,0,0,0,0
1,Bayview Village,0,0,0,0,0,0,0,1,0,0
2,Berczy Park,2,0,0,1,0,0,0,0,0,0
3,Business reply mail Processing Centre969 Eastern,0,0,1,0,0,0,0,0,0,0
4,Caledonia-Fairbanks]],0,0,0,0,0,0,0,0,0,0


## Clustering to see which neighbourhoods seem likely to embrace the emerging cuisine

In [262]:
from sklearn.cluster import KMeans
import folium # map rendering library
import matplotlib.cm as cm
import matplotlib.colors as colors

# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood_List', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 2, 0, 0, 0, 0, 1, 0, 1], dtype=int32)

In [298]:
tmp = toronto_food_venues.groupby("Neighborhood").mean()
tmp.reset_index()

toronto_merged = toronto_grouped[toronto_grouped.columns.values]

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

toronto_merged['Latitude'] = tmp[['Neighborhood Latitude']].values
toronto_merged['Longitude'] =tmp[['Neighborhood Longitude']].values
toronto_merged.head()

Unnamed: 0,Neighborhood_List,Bakery,Fried Chicken Joint,Brewery,French Restaurant,New American Restaurant,Juice Bar,Bubble Tea Shop,Japanese Restaurant,Afghan Restaurant,Ethiopian Restaurant,Cluster Labels,Latitude,Longitude
0,Agincourt,0,0,0,0,0,0,0,0,0,0,0,43.7942,-79.262029
1,Bayview Village,0,0,0,0,0,0,0,1,0,0,0,43.786947,-79.385975
2,Berczy Park,2,0,0,1,0,0,0,0,0,0,2,43.644771,-79.373306
3,Business reply mail Processing Centre969 Eastern,0,0,1,0,0,0,0,0,0,0,0,43.662744,-79.321558
4,Caledonia-Fairbanks]],0,0,0,0,0,0,0,0,0,0,0,43.689026,-79.453512


In [359]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11, width=800, height=600)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.jet(np.linspace(0.3, 1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


In [376]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood_List'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster))
    folium.CircleMarker(
        [lat, lon],
        radius=100,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

In [379]:
map_clusters

In [368]:
print("Cluster 0: Neighborhoods neither likely or unlikely to embrace a new Ethiopian Restaurant")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]

Cluster 0: Neighborhoods neither likely or unlikely to embrace a new Ethiopian Restaurant


Unnamed: 0,Neighborhood_List,Bakery,Fried Chicken Joint,Brewery,French Restaurant,New American Restaurant,Juice Bar,Bubble Tea Shop,Japanese Restaurant,Afghan Restaurant,Ethiopian Restaurant,Cluster Labels,Latitude,Longitude
0,Agincourt,0,0,0,0,0,0,0,0,0,0,0,43.794200,-79.262029
1,Bayview Village,0,0,0,0,0,0,0,1,0,0,0,43.786947,-79.385975
3,Business reply mail Processing Centre969 Eastern,0,0,1,0,0,0,0,0,0,0,0,43.662744,-79.321558
4,Caledonia-Fairbanks]],0,0,0,0,0,0,0,0,0,0,0,43.689026,-79.453512
5,Canada Post Gateway Processing Centre,0,1,0,0,0,0,0,0,0,0,0,43.636966,-79.615819
6,Cedarbrae,1,1,0,0,0,0,0,0,0,0,0,43.773136,-79.239476
8,Christie,0,0,0,0,0,0,0,0,0,0,0,43.669542,-79.422564
10,"Cliffside West, Birch Cliff",0,0,0,0,0,0,0,0,0,0,0,43.692657,-79.264848
11,Davisville,0,1,1,0,0,0,0,0,0,0,0,43.704324,-79.388790
12,Davisville North,0,0,0,0,0,0,0,0,0,0,0,43.712751,-79.390197


In [369]:
print("Cluster 1: Neighborhoods likely to embrace a new Ethiopian Restaurant")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]

Cluster 1: Neighborhoods likely to embrace a new Ethiopian Restaurant


Unnamed: 0,Neighborhood_List,Bakery,Fried Chicken Joint,Brewery,French Restaurant,New American Restaurant,Juice Bar,Bubble Tea Shop,Japanese Restaurant,Afghan Restaurant,Ethiopian Restaurant,Cluster Labels,Latitude,Longitude
7,Central Bay Street,0,0,0,1,0,0,2,3,0,0,1,43.657952,-79.387383
9,Church and Wellesley,0,0,0,0,0,1,2,6,1,1,1,43.66586,-79.38316
18,"Garden District, Ryerson",0,0,0,0,0,1,1,3,0,1,1,43.657162,-79.378937


In [370]:
print("Cluster 2: Neighborhoods unlikely to embrace a new Ethiopian Restaurant")
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]

Cluster 2: Neighborhoods unlikely to embrace a new Ethiopian Restaurant


Unnamed: 0,Neighborhood_List,Bakery,Fried Chicken Joint,Brewery,French Restaurant,New American Restaurant,Juice Bar,Bubble Tea Shop,Japanese Restaurant,Afghan Restaurant,Ethiopian Restaurant,Cluster Labels,Latitude,Longitude
2,Berczy Park,2,0,0,1,0,0,0,0,0,0,2,43.644771,-79.373306
16,"Dufferin, Dovercourt Village",2,0,1,0,0,0,0,0,0,0,2,43.669005,-79.442259
24,"Kensington Market, Grange Park, Chinatown",5,0,0,0,0,0,1,1,0,0,2,43.653206,-79.400049
35,"Oriole, Henry Farm, Fairview",3,0,0,0,0,0,0,1,0,0,2,43.778518,-79.346556
40,"Regent Park, Harbourfront",3,0,1,1,0,0,0,0,0,0,2,43.65426,-79.360636
41,"Richmond, King, Adelaide",2,0,0,0,1,0,0,2,0,0,2,43.650571,-79.384568
48,St. James Town,3,0,0,0,1,0,0,3,0,0,2,43.651494,-79.375418
49,"St. James Town, Cabbagetown",2,0,0,0,0,0,0,1,0,0,2,43.667967,-79.367675
53,Stn A PO Boxes 25 The Esplanade,2,0,0,1,0,0,0,2,0,0,2,43.646435,-79.374846
54,Studio District,2,0,1,0,1,1,0,0,0,0,2,43.659525,-79.340923
