# Capstone Assignment - Battle of Neigborhoods

### Scrape data from wiki webpage and transform into a pandas dataframe

In [1]:
import pandas as pd
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Remove rows where Borough is not assigned

In [2]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


### Group neighborhoods by postal code

In [3]:
df_grouped=df.groupby("Postcode").agg(lambda x:','.join(set(x)))
df_grouped.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"West Hill,Guildwood,Morningside"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Update Neigborhood and Borough name to be the same if a borough is not assigned

In [4]:
df_grouped.loc[df_grouped['Neighborhood']=="Not assigned",'Neighborhood']=df_grouped.loc[df_grouped['Neighborhood']=="Not assigned",'Borough']
df_grouped.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"West Hill,Guildwood,Morningside"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Load neigborhood geographical coordinates data

In [5]:
file_name='https://cocl.us/Geospatial_data'
df1=pd.read_csv(file_name)
df1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge dataframes so that the resultant dataframe contains PostCode, Borough, Neighborhhod, Latitude and Longitude columns

In [6]:
df1.rename(columns = {'Postal Code':'Postcode'}, inplace = True) 
result=pd.merge(df_grouped,df1[['Postcode','Latitude','Longitude']],on='Postcode')
result.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Guildwood,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Installing and importing dependencies

In [8]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
import requests
from pandas.io.json import json_normalize
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-4.0.0               |             py_0         606 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.1 MB

The following NEW packages will be 

### Use geopy library to get the latitude and longitude values of Toronto

In [9]:
address ='Toronto, ON'
geolocator=Nominatim(user_agent="toronto_explorer")
location= geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('The geographical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordinates of Toronto are 43.653963, -79.387207.


### Create a map of Toronto with neighborhoods superimposed on top.

In [10]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(result['Latitude'], result['Longitude'], result['Borough'], result['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
map_toronto

###  From the Toronto dataframe filter to and create a  dataframe of Etobicoke which is a Borough within Toronto

In [11]:
etobicoke_data=result[result['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke_data.head(11)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M8V,Etobicoke,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484
2,M8X,Etobicoke,"Old Mill North,The Kingsway,Montgomery Road",43.653654,-79.506944
3,M8Y,Etobicoke,"King's Mill Park,Humber Bay,Old Mill South,Mim...",43.636258,-79.498509
4,M8Z,Etobicoke,"The Queensway West,South of Bloor,Mimico NW,Ro...",43.628841,-79.520999
5,M9B,Etobicoke,"Martin Grove,West Deane Park,Cloverdale,Prince...",43.650943,-79.554724
6,M9C,Etobicoke,"Old Burnhamthorpe,Bloordale Gardens,Markland W...",43.643515,-79.577201
7,M9P,Etobicoke,Westmount,43.696319,-79.532242
8,M9R,Etobicoke,"Richview Gardens,Martin Grove Gardens,St. Phil...",43.688905,-79.554724
9,M9V,Etobicoke,"Humbergate,Jamestown,South Steeles,Mount Olive...",43.739416,-79.588437


### Use geopy library to get the latitude and longitude values of Etobicoke which is a Borough within Toronto

In [12]:
address ='Etobicoke,ON'
geolocator=Nominatim(user_agent="toronto_explorer")
location= geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('The geograpical coordinates of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Etobicoke are 43.67145915, -79.5524920661167.


###  Create a map of Etobicoke with neighborhoods superimposed on top.

In [13]:
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], etobicoke_data['Borough'], etobicoke_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)

map_etobicoke

### Define Foursquare Credentials and Version

In [14]:
CLIENT_ID = 'SINHNMALADY235YVGA53UZS5RDCMYEGZB330V2CN5RNTL1IA'
CLIENT_SECRET = 'YAX4F5HJ11XAPIJHWNU0ML3XURXAAGNZNJL1ZMSXZIVES22E'
VERSION = '20191227' 
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SINHNMALADY235YVGA53UZS5RDCMYEGZB330V2CN5RNTL1IA
CLIENT_SECRET:YAX4F5HJ11XAPIJHWNU0ML3XURXAAGNZNJL1ZMSXZIVES22E


### Explore first neighborhood in the Etobicoke dataframe.

In [15]:
etobicoke_data.loc[0, 'Neighborhood']
neighborhood_latitude = etobicoke_data.loc[0, 'Latitude']
neighborhood_longitude = etobicoke_data.loc[0, 'Longitude']
neighborhood_name = etobicoke_data.loc[0, 'Neighborhood']
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, neighborhood_latitude,neighborhood_longitude))

Latitude and longitude values of New Toronto,Mimico South,Humber Bay Shores are 43.6056466, -79.50132070000001.


### Using the GET request url, get the top 100 venues that are in New Toronto,Mimico South,Humber Bay Shores within a radius of 5 km

In [16]:
LIMIT = 100
radius = 5000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        neighborhood_latitude, 
        neighborhood_longitude, 
        radius, 
        LIMIT)
url
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e07c7be618f43001b0a04a2'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 162,
  'suggestedBounds': {'ne': {'lat': 43.650646645000045,
    'lng': -79.43929089309577},
   'sw': {'lat': 43.560646554999956, 'lng': -79.56335050690424}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5395d784498e085ff3c18198',
       'name': 'Huevos Gourmet',
       'location': {'address': '2888 Lakeshore Blvd. W.',
        'lat': 43.601187646118454,
        'lng': -79.50371705335499,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.601187646118

### Create a function that extracts the category of the venue

In [17]:
def get_category_type(row):
    try:
            categories_list = row['categories']
    except:
            categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Transform json into DataFrame

In [18]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head(110)

Unnamed: 0,name,categories,lat,lng
0,Huevos Gourmet,Mexican Restaurant,43.601188,-79.503717
1,LCBO,Liquor Store,43.602281,-79.499302
2,Sweet Olenka's,Dessert Shop,43.601099,-79.500325
3,Kitchen on 6th,Breakfast Spot,43.601396,-79.504563
4,SanRemo Bakery,Bakery,43.618542,-79.499485
5,Birds and Beans Cafe,Café,43.613942,-79.489062
6,Cellar Door Restaurant,Italian Restaurant,43.600221,-79.507638
7,Mastercard Centre For Hockey Excellence,Skating Rink,43.603126,-79.519818
8,Colonel Samuel Smith Park,Park,43.592141,-79.512305
9,Black Oak Brewing,Brewery,43.613747,-79.516339


In [19]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


### Module - 3 function to repeat the same process to all the neighborhoods in Etobicoke

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
etobicoke_venues = getNearbyVenues(names=etobicoke_data['Neighborhood'],
                                   latitudes=etobicoke_data['Latitude'],
                                   longitudes=etobicoke_data['Longitude']
                                  )

New Toronto,Mimico South,Humber Bay Shores
Alderwood,Long Branch
Old Mill North,The Kingsway,Montgomery Road
King's Mill Park,Humber Bay,Old Mill South,Mimico NE,The Queensway East,Kingsway Park South East,Royal York South East,Sunnylea
The Queensway West,South of Bloor,Mimico NW,Royal York South West,Kingsway Park South West
Martin Grove,West Deane Park,Cloverdale,Princess Gardens,Islington
Old Burnhamthorpe,Bloordale Gardens,Markland Wood,Eringate
Westmount
Richview Gardens,Martin Grove Gardens,St. Phillips,Kingsview Village
Humbergate,Jamestown,South Steeles,Mount Olive,Beaumond Heights,Albion Gardens,Thistletown,Silverstone
Northwest


In [22]:
print(etobicoke_venues.shape)
etobicoke_venues.head()

(1082, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,Huevos Gourmet,43.601188,-79.503717,Mexican Restaurant
1,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
2,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,Sweet Olenka's,43.601099,-79.500325,Dessert Shop
3,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,Kitchen on 6th,43.601396,-79.504563,Breakfast Spot
4,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,SanRemo Bakery,43.618542,-79.499485,Bakery


In [23]:
etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood,Long Branch",100,100,100,100,100,100
"Humbergate,Jamestown,South Steeles,Mount Olive,Beaumond Heights,Albion Gardens,Thistletown,Silverstone",82,82,82,82,82,82
"King's Mill Park,Humber Bay,Old Mill South,Mimico NE,The Queensway East,Kingsway Park South East,Royal York South East,Sunnylea",100,100,100,100,100,100
"Martin Grove,West Deane Park,Cloverdale,Princess Gardens,Islington",100,100,100,100,100,100
"New Toronto,Mimico South,Humber Bay Shores",100,100,100,100,100,100
Northwest,100,100,100,100,100,100
"Old Burnhamthorpe,Bloordale Gardens,Markland Wood,Eringate",100,100,100,100,100,100
"Old Mill North,The Kingsway,Montgomery Road",100,100,100,100,100,100
"Richview Gardens,Martin Grove Gardens,St. Phillips,Kingsview Village",100,100,100,100,100,100
"The Queensway West,South of Bloor,Mimico NW,Royal York South West,Kingsway Park South West",100,100,100,100,100,100


In [24]:
print('There are {} unique categories.'.format(len(etobicoke_venues['Venue Category'].unique())))

There are 132 unique categories.


In [26]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
etobicoke_onehot['Neighborhood'] = etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_onehot.head()

Unnamed: 0,Neighborhood,ATM,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Arts & Crafts Store,Asian Restaurant,Auto Dealership,...,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yoga Studio
0,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"New Toronto,Mimico South,Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
etobicoke_onehot.shape

(1082, 133)

In [28]:
etobicoke_grouped = etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
etobicoke_grouped

Unnamed: 0,Neighborhood,ATM,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Arts & Crafts Store,Asian Restaurant,Auto Dealership,...,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wings Joint,Women's Store,Yoga Studio
0,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,...,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.01
1,"Humbergate,Jamestown,South Steeles,Mount Olive...",0.012195,0.0,0.0,0.0,0.012195,0.0,0.0,0.036585,0.012195,...,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0
2,"King's Mill Park,Humber Bay,Old Mill South,Mim...",0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,...,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01
3,"Martin Grove,West Deane Park,Cloverdale,Prince...",0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,...,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.01
4,"New Toronto,Mimico South,Humber Bay Shores",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,...,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.02
5,Northwest,0.0,0.01,0.01,0.04,0.03,0.0,0.0,0.02,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Old Burnhamthorpe,Bloordale Gardens,Markland W...",0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,...,0.01,0.0,0.0,0.02,0.0,0.02,0.01,0.0,0.01,0.01
7,"Old Mill North,The Kingsway,Montgomery Road",0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,...,0.0,0.01,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.01
8,"Richview Gardens,Martin Grove Gardens,St. Phil...",0.0,0.0,0.01,0.02,0.03,0.0,0.0,0.02,0.0,...,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0
9,"The Queensway West,South of Bloor,Mimico NW,Ro...",0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,...,0.01,0.01,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.01


In [29]:
etobicoke_grouped.shape

(11, 133)

In [30]:
num_top_venues = 5

for hood in etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood,Long Branch----
            venue  freq
0     Coffee Shop  0.05
1    Burger Joint  0.05
2          Bakery  0.05
3            Café  0.04
4  Breakfast Spot  0.03


----Humbergate,Jamestown,South Steeles,Mount Olive,Beaumond Heights,Albion Gardens,Thistletown,Silverstone----
                  venue  freq
0           Coffee Shop  0.15
1  Fast Food Restaurant  0.11
2        Sandwich Place  0.05
3              Pharmacy  0.05
4    Chinese Restaurant  0.04


----King's Mill Park,Humber Bay,Old Mill South,Mimico NE,The Queensway East,Kingsway Park South East,Royal York South East,Sunnylea----
                venue  freq
0  Italian Restaurant  0.09
1         Coffee Shop  0.06
2                Park  0.06
3                Café  0.05
4    Sushi Restaurant  0.04


----Martin Grove,West Deane Park,Cloverdale,Princess Gardens,Islington----
                 venue  freq
0          Coffee Shop  0.09
1         Burger Joint  0.04
2        Grocery Store  0.04
3       Sandwich Place  0.03
4  Sp

In [31]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [32]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = etobicoke_grouped['Neighborhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood,Long Branch",Coffee Shop,Bakery,Burger Joint,Café,Furniture / Home Store,Pizza Place,Liquor Store,Seafood Restaurant,Breakfast Spot,Grocery Store
1,"Humbergate,Jamestown,South Steeles,Mount Olive...",Coffee Shop,Fast Food Restaurant,Sandwich Place,Pharmacy,Italian Restaurant,Caribbean Restaurant,Asian Restaurant,Chinese Restaurant,Skating Rink,Sushi Restaurant
2,"King's Mill Park,Humber Bay,Old Mill South,Mim...",Italian Restaurant,Park,Coffee Shop,Café,Bakery,Pizza Place,Sushi Restaurant,Burrito Place,Ice Cream Shop,Liquor Store
3,"Martin Grove,West Deane Park,Cloverdale,Prince...",Coffee Shop,Burger Joint,Grocery Store,Sandwich Place,Liquor Store,Bakery,Furniture / Home Store,Golf Course,Sporting Goods Shop,Seafood Restaurant
4,"New Toronto,Mimico South,Humber Bay Shores",Park,Coffee Shop,Italian Restaurant,Pizza Place,Café,Bakery,Breakfast Spot,Burger Joint,Sushi Restaurant,Liquor Store


In [33]:
# set number of clusters
kclusters = 5

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 3, 0, 2, 0, 1, 2, 0, 4, 0], dtype=int32)

In [34]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

etobicoke_merged = etobicoke_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
etobicoke_merged = etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etobicoke_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"New Toronto,Mimico South,Humber Bay Shores",43.605647,-79.501321,0,Park,Coffee Shop,Italian Restaurant,Pizza Place,Café,Bakery,Breakfast Spot,Burger Joint,Sushi Restaurant,Liquor Store
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,2,Coffee Shop,Bakery,Burger Joint,Café,Furniture / Home Store,Pizza Place,Liquor Store,Seafood Restaurant,Breakfast Spot,Grocery Store
2,M8X,Etobicoke,"Old Mill North,The Kingsway,Montgomery Road",43.653654,-79.506944,0,Coffee Shop,Café,Italian Restaurant,Bakery,Bar,Park,Brewery,Pizza Place,Dessert Shop,Ice Cream Shop
3,M8Y,Etobicoke,"King's Mill Park,Humber Bay,Old Mill South,Mim...",43.636258,-79.498509,0,Italian Restaurant,Park,Coffee Shop,Café,Bakery,Pizza Place,Sushi Restaurant,Burrito Place,Ice Cream Shop,Liquor Store
4,M8Z,Etobicoke,"The Queensway West,South of Bloor,Mimico NW,Ro...",43.628841,-79.520999,0,Coffee Shop,Park,Bakery,Burger Joint,Liquor Store,Italian Restaurant,Furniture / Home Store,Seafood Restaurant,Café,Dessert Shop


In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etobicoke_merged['Latitude'], etobicoke_merged['Longitude'], etobicoke_merged['Neighborhood'], etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[4],
        fill=True,
       fill_color=rainbow[4],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [47]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 0, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Etobicoke,0,Coffee Shop,Fast Food Restaurant,Sandwich Place,Pharmacy,Italian Restaurant,Caribbean Restaurant,Asian Restaurant,Chinese Restaurant,Skating Rink,Sushi Restaurant


### Cluster 2

In [48]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 1, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,1,Coffee Shop,Bakery,Burger Joint,Café,Breakfast Spot,Seafood Restaurant,Grocery Store,Burrito Place,Furniture / Home Store,Liquor Store
5,Etobicoke,1,Coffee Shop,Grocery Store,Burger Joint,Seafood Restaurant,Sandwich Place,Liquor Store,Bakery,Furniture / Home Store,Sporting Goods Shop,Golf Course
6,Etobicoke,1,Grocery Store,Coffee Shop,Burger Joint,Bakery,Liquor Store,Sporting Goods Shop,Furniture / Home Store,Middle Eastern Restaurant,Clothing Store,Japanese Restaurant


###  Cluster 3

In [49]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 2, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,2,Park,Coffee Shop,Italian Restaurant,Pizza Place,Café,Burger Joint,Sushi Restaurant,Bakery,Breakfast Spot,Grocery Store
2,Etobicoke,2,Coffee Shop,Café,Italian Restaurant,Bakery,Park,Brewery,Bar,Pizza Place,Dessert Shop,Ice Cream Shop
3,Etobicoke,2,Italian Restaurant,Park,Coffee Shop,Café,Bakery,Sushi Restaurant,Pizza Place,Burrito Place,Liquor Store,Ice Cream Shop
4,Etobicoke,2,Coffee Shop,Park,Bakery,Dessert Shop,Liquor Store,Italian Restaurant,Breakfast Spot,Furniture / Home Store,Burger Joint,Seafood Restaurant


### Cluster 4

In [50]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 3, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Etobicoke,3,Coffee Shop,Hotel,Steakhouse,Airport Lounge,Chinese Restaurant,Rental Car Location,Indian Restaurant,American Restaurant,Fast Food Restaurant,Burger Joint


### Cluster 5

In [51]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 4, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Etobicoke,4,Coffee Shop,Sandwich Place,Bank,Bakery,Pizza Place,Golf Course,Pharmacy,Hotel,Chinese Restaurant,Fast Food Restaurant
8,Etobicoke,4,Coffee Shop,Hotel,Pharmacy,Steakhouse,Restaurant,American Restaurant,Golf Course,Bank,Bakery,Chinese Restaurant
