# Capstone project - 1

## Week - 3

#### First off, we will start by importing the required files.

In [143]:
import requests
import lxml.html as lh
import pandas as pd

#### Extracting the table from URL 

In [144]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')

In [145]:
[len(T) for T in tr_elements[:12]]

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

#### Extracting the table contents

In [146]:
col=[]
i=0

for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    print('%d: "%s"'%(i,name))
    col.append((name,[]))
col

1: "Postal Code
"
2: "Borough
"
3: "Neighborhood
"


[('Postal Code\n', []), ('Borough\n', []), ('Neighborhood\n', [])]

#### Removing the new line character from the column names

In [147]:
tr_elements = doc.xpath('//tr')
col=[]
i=0
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    col.append(name)
col_new=[]
for element in col:
    col_new.append((element.strip(), []))
col_new

[('Postal Code', []), ('Borough', []), ('Neighborhood', [])]

In [148]:
for j in range(1,len(tr_elements)):
    T=tr_elements[j]
    
    if len(T)!=3:
        break
    
    i=0    
    for t in T.iterchildren():
        data=t.text_content() 
        if i>0:
            try:
                data=int(data)
            except:
                pass
        col_new[i][1].append(data)
        i+=1

#### Loading the data into pandas dataframe

In [149]:
Dict={title:column for (title,column) in col_new}
df=pd.DataFrame(Dict)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


In [150]:
df.shape

(181, 3)

#### Removing the newline character from the entire table

In [151]:
df.replace(r'\n', '', regex = True, inplace = True)

In [152]:
df_new = df[(df.Borough != 'Not assigned')]

In [153]:
df_new.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [154]:
df_new.set_index(['Postal Code'], inplace = True)
df_new.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [155]:
df_new.shape

(104, 2)

#### Sorting the table to get the desired result

In [156]:
df_new = df_new.sort_values('Postal Code')
df_new = df_new.iloc[1:]

In [157]:
df_new.reset_index(inplace = True)

In [158]:
df_new.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [159]:
df_new.shape

(103, 3)

#### From the csv file, loading the data into the pandas dataframe

In [160]:
df1 = pd.read_csv(r'Geospatial_Coordinates.csv')

In [161]:
df1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [162]:
df1.shape

(103, 3)

#### Merging the two dataframes to get the desired result

In [163]:
df_new = df_new.join(df1['Latitude'])
df_new = df_new.join(df1['Longitude'])

In [164]:
df_new.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [165]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_new['Borough'].unique()),
        df_new.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


#### Installing geopy and including the required libraries for clustering and plotting

In [166]:
!pip install geopy



In [167]:
from geopy.geocoders import Nominatim
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

#### Getting the coordinates of Toronto city

In [168]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.6534817, -79.3839347.


#### Plotting the map of Toronto with neighborhoods superimposed on top

In [169]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [170]:
northy_data = df_new[df_new['Borough'] == 'North York'].reset_index(drop=True)
northy_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714
4,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493


#### Getting the lattitude and longitude of North York, Toronto

In [171]:
address = 'North York, Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York are 43.7543263, -79.44911696639593.


#### Plotting the neighbohoods in the North York borough

In [172]:
map_ny = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(northy_data['Latitude'], northy_data['Longitude'], northy_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ny)  
    
map_ny

In [173]:
northy_data.loc[0, 'Neighborhood']

'Hillcrest Village'

In [174]:
neighborhood_latitude = northy_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = northy_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = northy_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Hillcrest Village are 43.8037622, -79.3634517.


#### Defining Foursquare credentials and getting the top 100 venues in the Hillcrest village within a radius of 500 metres.

In [175]:
LIMIT = 100
radius = 500
CLIENT_ID = 'UJ3EDMSZXOBZALCHQ10LAB5IVXLZYEWDEQUWMCHTK405UWAD' 
CLIENT_SECRET = 'U33AEEMOA4YZE5ANZRIPBFEF0UJKB2ZAE5Y32HPHCO1NMKAR'
VERSION = '20180605'
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=UJ3EDMSZXOBZALCHQ10LAB5IVXLZYEWDEQUWMCHTK405UWAD&client_secret=U33AEEMOA4YZE5ANZRIPBFEF0UJKB2ZAE5Y32HPHCO1NMKAR&v=20180605&ll=43.8037622,-79.3634517&radius=500&limit=100'

In [176]:
results = requests.get(url).json()

In [177]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Importing Json, cleaning it and structuring it into pandas dataframe

In [178]:
import json
import requests
from pandas.io.json import json_normalize

In [179]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Eagle's Nest Golf Club,Golf Course,43.805455,-79.364186
1,New York Fries,Fast Food Restaurant,43.803664,-79.363905
2,AY Jackson Pool,Pool,43.804515,-79.366138
3,Villa Madina,Mediterranean Restaurant,43.801685,-79.363938
4,Duncan Creek Park,Dog Run,43.805539,-79.360695


In [180]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


#### Function to explore all the neighborhoods in North York, Toronto

In [181]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [182]:
norhty_venues = getNearbyVenues(names=northy_data['Neighborhood'],
                                   latitudes=northy_data['Latitude'],
                                   longitudes=northy_data['Longitude']
                                  )

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Manor, Lawrence Heights
Glencairn
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Humberlea, Emery


In [183]:
print(norhty_venues.shape)
norhty_venues.head()

(247, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,New York Fries,43.803664,-79.363905,Fast Food Restaurant
2,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
3,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
4,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run


In [184]:
norhty_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Don Mills,27,27,27,27,27,27
Downsview,15,15,15,15,15,15
"Fairview, Henry Farm, Oriole",66,66,66,66,66,66
Glencairn,6,6,6,6,6,6
Hillcrest Village,5,5,5,5,5,5
Humber Summit,3,3,3,3,3,3
"Humberlea, Emery",2,2,2,2,2,2


In [185]:
print('There are {} uniques categories.'.format(len(norhty_venues['Venue Category'].unique())))

There are 106 uniques categories.


#### Analysing each neighborhood

In [186]:
norhty_onehot = pd.get_dummies(norhty_venues[['Venue Category']], prefix="", prefix_sep="")

norhty_onehot['Neighborhood'] = norhty_venues['Neighborhood'] 

fixed_columns = [norhty_onehot.columns[-1]] + list(norhty_onehot.columns[:-1])
norhty_onehot = norhty_onehot[fixed_columns]

norhty_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [187]:
norhty_onehot.shape

(247, 107)

In [188]:
norhty_grouped = norhty_onehot.groupby('Neighborhood').mean().reset_index()
norhty_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,...,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0
3,Don Mills,0.0,0.0,0.0,0.037037,0.0,0.074074,0.0,0.0,0.0,...,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Fairview, Henry Farm, Oriole",0.015152,0.0,0.015152,0.0,0.0,0.015152,0.0,0.030303,0.030303,...,0.0,0.0,0.0,0.030303,0.0,0.015152,0.030303,0.0,0.015152,0.0
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [189]:
norhty_grouped.shape

(19, 107)

#### Each neighborhood along with the top 5 most common venues

In [190]:
num_top_venues = 5

for hood in norhty_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = norhty_grouped[norhty_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
            venue  freq
0     Coffee Shop  0.10
1            Bank  0.10
2     Bridal Shop  0.05
3  Ice Cream Shop  0.05
4     Gas Station  0.05


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0         Coffee Shop  0.09
1  Italian Restaurant  0.09
2          Restaurant  0.09
3      Sandwich Place  0.09
4      Breakfast Spot  0.04


----Don Mills----
                 venue  freq
0                  Gym  0.11
1          Coffee Shop  0.07
2     Asian Restaurant  0.07
3           Restaurant  0.07
4  Japanese Restaurant  0.07


----Downsview----
           venue  freq
0  Grocery Store  0.13
1           Park  0.13
2           Bank  0.07
3  Shopping Mall  0.07
4    Snack Place  0.07


----Fairview, Henry Farm, Oriole--

#### Converting those into a dataframe

In [191]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [192]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = norhty_grouped['Neighborhood']

for ind in np.arange(norhty_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(norhty_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Restaurant,Pizza Place,Bridal Shop,Ice Cream Shop,Pharmacy,Park,Diner,Sandwich Place
1,Bayview Village,Chinese Restaurant,Japanese Restaurant,Café,Bank,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Restaurant,Liquor Store,Juice Bar,Café,Butcher,Comfort Food Restaurant,Pharmacy
3,Don Mills,Gym,Restaurant,Asian Restaurant,Japanese Restaurant,Beer Store,Coffee Shop,Discount Store,Shopping Mall,Dim Sum Restaurant,Bike Shop
4,Downsview,Grocery Store,Park,Discount Store,Airport,Food Truck,Athletics & Sports,Gym / Fitness Center,Bank,Business Service,Baseball Field


#### Clustering the neighborhood into 5 clusters using K-means

In [141]:
kclusters = 5

norhty_grouped_clustering = norhty_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(norhty_grouped_clustering)

kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 4, 3])

#### Dataframe with top 10 venues for each neighborhood

In [193]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

norhty_merged = northy_data

norhty_merged = norhty_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

norhty_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,1.0,Golf Course,Pool,Mediterranean Restaurant,Fast Food Restaurant,Dog Run,Diner,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,1.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Restaurant,Toy / Game Store,Bakery,Tea Room,Bank,Food Court
2,M2K,North York,Bayview Village,43.786947,-79.385975,1.0,Chinese Restaurant,Japanese Restaurant,Café,Bank,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
3,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714,2.0,Park,Vietnamese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
4,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,,,,,,,,,,,


#### Visualizing the clusters

In [194]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
count = 0

markers_colors = []
for lat, lon, poi, cluster in zip(norhty_merged['Latitude'], norhty_merged['Longitude'], norhty_merged['Neighborhood'], norhty_merged['Cluster Labels']):
    if count<4:
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=7,
            popup=label,
            color=rainbow[int(cluster-1)],
            fill=True,
            fill_color=rainbow[int(cluster-1)],
            fill_opacity=0.7).add_to(map_clusters)
        count = count+1
map_clusters

#### Examining the clusters

In [200]:
norhty_merged.loc[norhty_merged['Cluster Labels'] == 0, norhty_merged.columns[[1] + list(range(5, norhty_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,North York,0.0,Park,Convenience Store,Vietnamese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,Department Store
8,North York,0.0,Construction & Landscaping,Food & Drink Shop,Park,Vietnamese Restaurant,Discount Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop


In [201]:
norhty_merged.loc[norhty_merged['Cluster Labels'] == 1, norhty_merged.columns[[1] + list(range(5, norhty_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Golf Course,Pool,Mediterranean Restaurant,Fast Food Restaurant,Dog Run,Diner,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
1,North York,1.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Restaurant,Toy / Game Store,Bakery,Tea Room,Bank,Food Court
2,North York,1.0,Chinese Restaurant,Japanese Restaurant,Café,Bank,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
5,North York,1.0,Ramen Restaurant,Japanese Restaurant,Sushi Restaurant,Restaurant,Sandwich Place,Café,Coffee Shop,Pizza Place,Ice Cream Shop,Indonesian Restaurant
7,North York,1.0,Coffee Shop,Discount Store,Grocery Store,Pharmacy,Pizza Place,Butcher,Home Service,Bank,Comfort Food Restaurant,Construction & Landscaping
9,North York,1.0,Gym,Restaurant,Asian Restaurant,Japanese Restaurant,Beer Store,Coffee Shop,Discount Store,Shopping Mall,Dim Sum Restaurant,Bike Shop
10,North York,1.0,Gym,Restaurant,Asian Restaurant,Japanese Restaurant,Beer Store,Coffee Shop,Discount Store,Shopping Mall,Dim Sum Restaurant,Bike Shop
11,North York,1.0,Coffee Shop,Bank,Restaurant,Pizza Place,Bridal Shop,Ice Cream Shop,Pharmacy,Park,Diner,Sandwich Place
12,North York,1.0,Coffee Shop,Miscellaneous Shop,Caribbean Restaurant,Massage Studio,Bar,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
13,North York,1.0,Grocery Store,Park,Discount Store,Airport,Food Truck,Athletics & Sports,Gym / Fitness Center,Bank,Business Service,Baseball Field


In [197]:
norhty_merged.loc[norhty_merged['Cluster Labels'] == 2, norhty_merged.columns[[1] + list(range(5, norhty_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,2.0,Park,Vietnamese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store


In [198]:
norhty_merged.loc[norhty_merged['Cluster Labels'] == 3, norhty_merged.columns[[1] + list(range(5, norhty_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,North York,3.0,Food Service,Baseball Field,Vietnamese Restaurant,Distribution Center,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


In [199]:
norhty_merged.loc[norhty_merged['Cluster Labels'] == 4, norhty_merged.columns[[1] + list(range(5, norhty_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,North York,4.0,Furniture / Home Store,Pizza Place,Construction & Landscaping,Vietnamese Restaurant,Discount Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop
