# Segmenting and Clustering Neighborhoods in Toronto city.

### Importing required modules.

In [46]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as bs
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
import json 
from pandas.io.json import json_normalize

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



### 1.Creating a beautiful soup object after the parsing the wikipedia page using request.get() method.


In [47]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page=requests.get(url,headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"})
soup = bs(page.content,"html.parser")

### Create a dataframe with list

In [48]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

[{'PostalCode': 'M3A', 'Borough': 'North York', 'Neighborhood': 'Parkwoods'}, {'PostalCode': 'M4A', 'Borough': 'North York', 'Neighborhood': 'Victoria Village'}, {'PostalCode': 'M5A', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Regent Park, Harbourfront'}, {'PostalCode': 'M6A', 'Borough': 'North York', 'Neighborhood': 'Lawrence Manor, Lawrence Heights'}, {'PostalCode': 'M7A', 'Borough': "Queen's Park", 'Neighborhood': 'Ontario Provincial Government'}, {'PostalCode': 'M9A', 'Borough': 'Etobicoke', 'Neighborhood': 'Islington Avenue'}, {'PostalCode': 'M1B', 'Borough': 'Scarborough', 'Neighborhood': 'Malvern, Rouge'}, {'PostalCode': 'M3B', 'Borough': 'North York', 'Neighborhood': 'Don Mills North'}, {'PostalCode': 'M4B', 'Borough': 'East York', 'Neighborhood': 'Parkview Hill, Woodbine Gardens'}, {'PostalCode': 'M5B', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Garden District, Ryerson'}, {'PostalCode': 'M6B', 'Borough': 'North York', 'Neighborhood': 'Glencairn'}, {'PostalCode': 'M9

In [49]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [50]:
df.shape

(103, 3)

### Using dataset Geospatial_Coordinates.csv

In [51]:
df1=pd.read_csv("Geospatial_Coordinates.csv")
df1

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


### Adding Latitude and Longitude columns to original dataframe

In [52]:
Latitude=df1['Latitude']
Longitude=df1['Longitude']
df['Latitude']=Latitude
df['Longitude']=Longitude
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.806686,-79.194353
1,M4A,North York,Victoria Village,43.784535,-79.160497
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.763573,-79.188711
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.770992,-79.216917
4,M7A,Queen's Park,Ontario Provincial Government,43.773136,-79.239476
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.706876,-79.518188
99,M4Y,Downtown Toronto,Church and Wellesley,43.696319,-79.532242
100,M7Y,East Toronto Business,Enclave of M4L,43.688905,-79.554724
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.739416,-79.588437


In [53]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 15 boroughs and 103 neighborhoods.


### Use geopy library to get the latitude and longitude values of Toronto City.

In [54]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="on_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto.

In [55]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### segment and cluster only the neighborhoods in Etobicoke.

In [56]:
Etobicoke_data = df[df['Borough'] == 'Etobicoke'].reset_index(drop=True)
Etobicoke_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.744734,-79.239476
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.750071,-79.295849
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.803762,-79.363452
3,M9P,Etobicoke,Westmount,43.648429,-79.38228
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.647927,-79.41975


geographical coordinates of Etobicoke.

In [57]:
address = 'Etobicoke,Toronto'

geolocator = Nominatim(user_agent="on_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Etobicoke are 43.6435559, -79.5656326.


In [58]:
map_Etobicoke = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Etobicoke_data['Latitude'], Etobicoke_data['Longitude'], Etobicoke_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Etobicoke)  
    
map_Etobicoke

### Define Foursquare Credentials and Version

In [59]:
CLIENT_ID = 'LSPWVHSDELOATFSUPNO2SI2G1NRK3VPEACWOTTSTFLB3DATA' # your Foursquare ID
CLIENT_SECRET = 'W2IAIU5GS4GWG54QLSOZZDPKFLNKGQ5DJ0POSEMCVVPBWL0P' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LSPWVHSDELOATFSUPNO2SI2G1NRK3VPEACWOTTSTFLB3DATA
CLIENT_SECRET:W2IAIU5GS4GWG54QLSOZZDPKFLNKGQ5DJ0POSEMCVVPBWL0P


Get the neighborhood's name.

In [60]:
Etobicoke_data.loc[0, 'Neighborhood']

'Islington Avenue'

Get the neighborhood's latitude and longitude values.

In [61]:
neighborhood_latitude = Etobicoke_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Etobicoke_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Etobicoke_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Islington Avenue are 43.7447342, -79.2394761.


### Get the top 100 venues that are in Islington Avenue within a radius of 500 meters.

In [62]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
#url # display URL

In [63]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '610d3d2036f10c6c33437119'},
  'headerLocation': 'Eglinton East',
  'headerFullLocation': 'Eglinton East, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.749234204500006,
    'lng': -79.23325872538939},
   'sw': {'lat': 43.7402341955, 'lng': -79.24569347461062}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5150a8dae4b045dfb6581c85',
       'name': 'McCowan Park',
       'location': {'lat': 43.74508851212816,
        'lng': -79.239335687338,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.74508851212816,
          'lng': -79.239335687338}],
        'distance': 41,
        'cc': 'CA',
        'country': 'Canada',
        'formattedAddress': ['Can

### Borrow the get_category_type function from the Foursquare lab.

In [64]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

clean the json and structure it into a pandas dataframe.

In [65]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,McCowan Park,Playground,43.745089,-79.239336
1,More Then Wax,Business Service,43.742264,-79.242138


In [66]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

2 venues were returned by Foursquare.


# 2. Explore Neighborhoods in Etobicoke

In [67]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Creating a new dataframe called Etobicoke_venues.

In [68]:
Etobicoke_venues = getNearbyVenues(names=Etobicoke_data['Neighborhood'],
                                   latitudes=Etobicoke_data['Latitude'],
                                   longitudes=Etobicoke_data['Longitude']
                                  )

Islington Avenue
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Westmount
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
New Toronto, Mimico South, Humber Bay Shores
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens
Alderwood, Long Branch
The Kingsway, Montgomery Road, Old Mill North
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West


In [69]:
print(Etobicoke_venues.shape)
Etobicoke_venues.head()

(190, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Islington Avenue,43.744734,-79.239476,McCowan Park,43.745089,-79.239336,Playground
1,Islington Avenue,43.744734,-79.239476,More Then Wax,43.742264,-79.242138,Business Service
2,"West Deane Park, Princess Gardens, Martin Grov...",43.750071,-79.295849,Crown Pastries,43.746098,-79.293142,Bakery
3,"West Deane Park, Princess Gardens, Martin Grov...",43.750071,-79.295849,Subway,43.746267,-79.293193,Sandwich Place
4,"West Deane Park, Princess Gardens, Martin Grov...",43.750071,-79.295849,Lebanese bakery,43.746701,-79.292896,Middle Eastern Restaurant


how many venues were returned for each neighborhood

In [70]:
Etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",5,5,5,5,5,5
Islington Avenue,2,2,2,2,2,2
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",42,42,42,42,42,42
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",4,4,4,4,4,4
"New Toronto, Mimico South, Humber Bay Shores",12,12,12,12,12,12
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",9,9,9,9,9,9
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",7,7,7,7,7,7
"The Kingsway, Montgomery Road, Old Mill North",2,2,2,2,2,2
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",7,7,7,7,7,7
Westmount,100,100,100,100,100,100


how many unique categories can be curated from all the returned venues

In [71]:
print('There are {} uniques categories.'.format(len(Etobicoke_venues['Venue Category'].unique())))

There are 88 uniques categories.


## 3. Analyze Each Neighborhood

In [72]:
# one hot encoding
Etobicoke_onehot = pd.get_dummies(Etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Etobicoke_onehot['Neighborhood'] = Etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Etobicoke_onehot.columns[-1]] + list(Etobicoke_onehot.columns[:-1])
Etobicoke_onehot = Etobicoke_onehot[fixed_columns]

Etobicoke_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Art Gallery,Asian Restaurant,Auto Garage,Bakery,Bar,Beer Bar,Beer Store,Bistro,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Train Station,Truck Stop,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Islington Avenue,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Islington Avenue,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [73]:
Etobicoke_onehot.shape

(190, 89)

### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [74]:
Etobicoke_grouped = Etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
Etobicoke_grouped

Unnamed: 0,Neighborhood,American Restaurant,Art Gallery,Asian Restaurant,Auto Garage,Bakery,Bar,Beer Bar,Beer Store,Bistro,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Train Station,Truck Stop,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Islington Avenue,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Kingsview Village, St. Phillips, Martin Grove ...",0.0,0.02381,0.047619,0.0,0.02381,0.071429,0.0,0.02381,0.02381,...,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.047619,0.02381,0.02381
3,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
4,"New Toronto, Mimico South, Humber Bay Shores",0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"West Deane Park, Princess Gardens, Martin Grov...",0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
9,Westmount,0.03,0.01,0.03,0.0,0.03,0.02,0.01,0.0,0.0,...,0.02,0.01,0.02,0.01,0.01,0.0,0.01,0.0,0.01,0.0


### Print each neighborhood along with the top 5 most common venues

In [75]:
num_top_venues = 5

for hood in Etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Etobicoke_grouped[Etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
                      venue  freq
0                   Dog Run   0.2
1      Fast Food Restaurant   0.2
2  Mediterranean Restaurant   0.2
3               Golf Course   0.2
4                      Pool   0.2


----Islington Avenue----
                 venue  freq
0           Playground   0.5
1     Business Service   0.5
2  American Restaurant   0.0
3                  Pub   0.0
4                Plaza   0.0


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
                   venue  freq
0                    Bar  0.07
1            Men's Store  0.07
2                   Café  0.05
3       Asian Restaurant  0.05
4  Vietnamese Restaurant  0.05


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
                 venue  freq
0            Drugstore  0.25
1  Rental Car Location  0.25
2           Truck Stop  0.25
3        Garden Center  0.25
4  Amer

### Function to sort the venues in descending order.

In [76]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Create the new dataframe and display the top 10 venues for each neighborhood.

In [77]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Etobicoke_grouped['Neighborhood']

for ind in np.arange(Etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Dog Run,Fast Food Restaurant,Mediterranean Restaurant,Golf Course,Pool,American Restaurant,New American Restaurant,Playground,Pizza Place,Pharmacy
1,Islington Avenue,Playground,Business Service,American Restaurant,Pub,Plaza,Pizza Place,Pharmacy,Park,Opera House,New American Restaurant
2,"Kingsview Village, St. Phillips, Martin Grove ...",Bar,Men's Store,Café,Asian Restaurant,Vietnamese Restaurant,Restaurant,Italian Restaurant,Greek Restaurant,Ice Cream Shop,Art Gallery
3,"Mimico NW, The Queensway West, South of Bloor,...",Drugstore,Rental Car Location,Truck Stop,Garden Center,American Restaurant,Monument / Landmark,Pizza Place,Pharmacy,Park,Opera House
4,"New Toronto, Mimico South, Humber Bay Shores",Café,Liquor Store,Fast Food Restaurant,Bakery,Restaurant,Hobby Shop,Pizza Place,Gym,Mexican Restaurant,Pharmacy


# 4. Cluster Neighborhoods

k-means to cluster the neighborhood into 5 clusters.

In [78]:
# set number of clusters
kclusters = 5

Etobicoke_grouped_clustering = Etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 4, 0, 3, 0, 0, 0, 2, 0, 0])

create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [79]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Etobicoke_merged = Etobicoke_data

# merge Etobicoke_grouped with Etobicoke_data to add latitude/longitude for each neighborhood
Etobicoke_merged = Etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Etobicoke_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M9A,Etobicoke,Islington Avenue,43.744734,-79.239476,4.0,Playground,Business Service,American Restaurant,Pub,Plaza,Pizza Place,Pharmacy,Park,Opera House,New American Restaurant
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.750071,-79.295849,0.0,Middle Eastern Restaurant,Auto Garage,Bakery,Vietnamese Restaurant,Sandwich Place,Shopping Mall,Smoke Shop,Monument / Landmark,Pizza Place,Pharmacy
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.803762,-79.363452,1.0,Dog Run,Fast Food Restaurant,Mediterranean Restaurant,Golf Course,Pool,American Restaurant,New American Restaurant,Playground,Pizza Place,Pharmacy
3,M9P,Etobicoke,Westmount,43.648429,-79.38228,0.0,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Gym,American Restaurant,Seafood Restaurant,Asian Restaurant,Bakery
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.647927,-79.41975,0.0,Bar,Men's Store,Café,Asian Restaurant,Vietnamese Restaurant,Restaurant,Italian Restaurant,Greek Restaurant,Ice Cream Shop,Art Gallery


### Visualize the resulting clusters

In [80]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Etobicoke_merged['Latitude'], Etobicoke_merged['Longitude'], Etobicoke_merged['Neighborhood'], Etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# 5. Examine Clusters

### Cluster 1

In [81]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 0, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,0.0,Middle Eastern Restaurant,Auto Garage,Bakery,Vietnamese Restaurant,Sandwich Place,Shopping Mall,Smoke Shop,Monument / Landmark,Pizza Place,Pharmacy
3,Etobicoke,0.0,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Gym,American Restaurant,Seafood Restaurant,Asian Restaurant,Bakery
4,Etobicoke,0.0,Bar,Men's Store,Café,Asian Restaurant,Vietnamese Restaurant,Restaurant,Italian Restaurant,Greek Restaurant,Ice Cream Shop,Art Gallery
5,Etobicoke,0.0,Café,Liquor Store,Fast Food Restaurant,Bakery,Restaurant,Hobby Shop,Pizza Place,Gym,Mexican Restaurant,Pharmacy
6,Etobicoke,0.0,Pizza Place,Pub,Playground,Gym,Sandwich Place,Coffee Shop,Museum,Plaza,Pharmacy,Park
9,Etobicoke,0.0,Grocery Store,Fast Food Restaurant,Pizza Place,Fried Chicken Joint,Sandwich Place,Beer Store,Pharmacy,Coffee Shop,American Restaurant,New American Restaurant


### Cluster 2

In [82]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 1, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,1.0,Dog Run,Fast Food Restaurant,Mediterranean Restaurant,Golf Course,Pool,American Restaurant,New American Restaurant,Playground,Pizza Place,Pharmacy


### Cluster 3

In [83]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 2, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Etobicoke,2.0,Convenience Store,Park,American Restaurant,Pub,Plaza,Playground,Pizza Place,Pharmacy,Opera House,New American Restaurant


### Cluster 4

In [84]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 3, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Etobicoke,3.0,Drugstore,Rental Car Location,Truck Stop,Garden Center,American Restaurant,Monument / Landmark,Pizza Place,Pharmacy,Park,Opera House


### Cluster 5

In [85]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 4, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,4.0,Playground,Business Service,American Restaurant,Pub,Plaza,Pizza Place,Pharmacy,Park,Opera House,New American Restaurant
