# Part 1
## Loading data from Wikipedia

In [1]:
import pandas as pd
print("done")

done


### To load the data straight from the webpage, I will use pd.read_html function

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

df=pd.read_html(url, index_col=None, header=0)[0]

df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### In the assginment the following is mentioned: "Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned" so I decided to delete those rows from the dataframe

In [3]:
df = df[df.Borough != "Not assigned"]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### After that I would need to reset the index

In [4]:
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
df.shape

(103, 3)

# Part 2
## Getting coordinates from zipcodes

### First I should install useful libraries

In [6]:
!pip install geopy
print("done")

done


In [7]:
!pip install geocoder
print("Done")

Done


### Before I start getting coordinates, I would like to change the column name from "Postal Code" to "PostalCode" for simplicity and avoiding errors

In [8]:
df.columns = df.columns.str.replace(' ', '')
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Finally I will run a slightly modified version of the helpful loop function which was posted in the instructions of the assignment

In [9]:
import geocoder


def get_geocoder(postalCode_from_df):
     # initialize your variable to None
     lat_lng_coords = None
     # loop until you get the coordinates
     while(lat_lng_coords is None):
       g = geocoder.arcgis('{}, Toronto, Ontario'.format(postalCode_from_df))
       lat_lng_coords = g.latlng
     latitude = lat_lng_coords[0]
     longitude = lat_lng_coords[1]
     return latitude,longitude


df['latitude'], df['longitude'] = zip(*df['PostalCode'].apply(get_geocoder))

In [10]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,latitude,longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66253,-79.39188


# Part 3
## Clustering neighbourhoods

### For this part I would like to apply the same analysis to the Toronto neighbourhoods which was discussed in the "Segmenting and Clustering Neighborhoods in New York City" notebook of this course

### To start I will choose the North York borough

In [11]:
north_york = df[df['Borough'] == 'North York'].reset_index(drop=True)
north_york.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,latitude,longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
3,M3B,North York,Don Mills,43.74923,-79.36186
4,M6B,North York,Glencairn,43.70687,-79.44812


In [13]:
!pip install folium
import folium
from geopy.geocoders import Nominatim
print ("Done")

Done


In [14]:
address = 'North York'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York are 43.7543263, -79.44911696639593.


### Now I can create the map of the North York borrough and its neighbourhoods

In [15]:
map_northyork = folium.Map(location=[latitude, longitude], zoom_start=11)


for lat, lng, label in zip(north_york['latitude'], north_york['longitude'], north_york['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_northyork)  
    
map_northyork

### Necessary information to connect ro Foursquare

In [16]:
CLIENT_ID = 'KKCM3R2MEVSSOYEVYNJ2423YBANRFA0C53RNMVKYSQ3ACZZ3' # your Foursquare ID
CLIENT_SECRET = 'D13PGQND0YJFVIZMRHMVVGHWIPJ0FDGHUEEJDBQMPWX5F3GD' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KKCM3R2MEVSSOYEVYNJ2423YBANRFA0C53RNMVKYSQ3ACZZ3
CLIENT_SECRET:D13PGQND0YJFVIZMRHMVVGHWIPJ0FDGHUEEJDBQMPWX5F3GD


### Get the coordinates of the first neighbourhood in North York borrough

In [17]:
neighbourhood_latitude = north_york.loc[0, 'latitude'] # neighborhood latitude value
neighbourhood_longitude = north_york.loc[0, 'longitude'] # neighborhood longitude value

neighbourhood_name = north_york.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Parkwoods are 43.75245000000007, -79.32990999999998.


### Generate neccesary URL to connect to the Foursquare and get the results

In [19]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id=KKCM3R2MEVSSOYEVYNJ2423YBANRFA0C53RNMVKYSQ3ACZZ3&client_secret=D13PGQND0YJFVIZMRHMVVGHWIPJ0FDGHUEEJDBQMPWX5F3GD&v=20180605&ll=43.75245000000007,-79.32990999999998&radius=500&limit=100'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=KKCM3R2MEVSSOYEVYNJ2423YBANRFA0C53RNMVKYSQ3ACZZ3&client_secret=D13PGQND0YJFVIZMRHMVVGHWIPJ0FDGHUEEJDBQMPWX5F3GD&v=20180605&ll=43.75245000000007,-79.32990999999998&radius=500&limit=100'

### Import libraries which I missed

In [20]:
import json
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
import numpy as np # library to handle data in a vectorized manner
print("Done")

Done


### Get results for Parkwood

In [21]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fc8b33370ed9c382a9b105d'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.75695000450007,
    'lng': -79.32369182386579},
   'sw': {'lat': 43.747949995500065, 'lng': -79.33612817613418}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 186,
        'cc': 'CA',
      

In [22]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Nearby venues dataframe for Parkwood

In [23]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114


### Creating the function to get nearby venues for all neighbourhoods in North York

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
york_venues = getNearbyVenues(names=north_york['Neighbourhood'],
                                   latitudes=north_york['latitude'],
                                   longitudes=north_york['longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


In [26]:
print(york_venues.shape)
york_venues.head()

(283, 7)


Unnamed: 0,Neighborhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park
3,Victoria Village,43.73057,-79.31306,Memories of Africa,43.726602,-79.312427,Grocery Store
4,Victoria Village,43.73057,-79.31306,Guardian Drug,43.730584,-79.307432,Pharmacy


In [27]:
york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",19,19,19,19,19,19
Don Mills,15,15,15,15,15,15
Downsview,41,41,41,41,41,41
"Fairview, Henry Farm, Oriole",55,55,55,55,55,55
Glencairn,13,13,13,13,13,13
Hillcrest Village,2,2,2,2,2,2
Humber Summit,2,2,2,2,2,2
"Humberlea, Emery",4,4,4,4,4,4
"Lawrence Manor, Lawrence Heights",51,51,51,51,51,51


In [28]:
print('There are {} uniques categories.'.format(len(york_venues['Venue Category'].unique())))

There are 105 uniques categories.


### Analyzing each neighbourhood

In [29]:
# one hot encoding
york_onehot = pd.get_dummies(york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
york_onehot['Neighborhood'] = york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [york_onehot.columns[-1]] + list(york_onehot.columns[:-1])
york_onehot = york_onehot[fixed_columns]

york_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Arts & Crafts Store,Auto Workshop,Automotive Shop,Bakery,Bank,Bar,Baseball Field,Beer Store,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
york_onehot.shape

(283, 106)

In [31]:
york_grouped = york_onehot.groupby('Neighborhood').mean().reset_index()
york_grouped

Unnamed: 0,Neighborhood,American Restaurant,Arts & Crafts Store,Auto Workshop,Automotive Shop,Bakery,Bank,Bar,Baseball Field,Beer Store,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Women's Store
0,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
1,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Don Mills,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Downsview,0.0,0.0,0.02439,0.0,0.04878,0.0,0.0,0.0,0.04878,...,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.073171,0.0
4,"Fairview, Henry Farm, Oriole",0.0,0.0,0.0,0.0,0.0,0.036364,0.018182,0.018182,0.0,...,0.0,0.018182,0.0,0.018182,0.018182,0.0,0.0,0.018182,0.0,0.054545
5,Glencairn,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Lawrence Manor, Lawrence Heights",0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.039216,0.0,0.0,0.0,0.0,0.039216


### Let's print each neighborhood along with the top 5 most common venues

In [32]:
num_top_venues = 5

for hood in york_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = york_grouped[york_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bayview Village----
                        venue  freq
0                       Trail  0.25
1  Construction & Landscaping  0.25
2                        Park  0.25
3          Golf Driving Range  0.25
4         American Restaurant  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0      Sandwich Place  0.11
1  Italian Restaurant  0.11
2         Coffee Shop  0.11
3           Juice Bar  0.05
4             Butcher  0.05


----Don Mills----
          venue  freq
0  Intersection  0.13
1   Coffee Shop  0.13
2           Gym  0.07
3           Spa  0.07
4   Supermarket  0.07


----Downsview----
                   venue  freq
0         Discount Store  0.07
1  Vietnamese Restaurant  0.07
2            Pizza Place  0.07
3                 Bakery  0.05
4     Turkish Restaurant  0.05


----Fairview, Henry Farm, Oriole----
                  venue  freq
0        Clothing Store  0.11
1         Women's Store  0.05
2  Fast Food Restaurant  0.05
3           Coffee Shop  0.05
4

In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Crreate dataframe where top 10 venues for each neighbourhood are displayed

In [34]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = york_grouped['Neighborhood']

for ind in np.arange(york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bayview Village,Golf Driving Range,Construction & Landscaping,Trail,Park,Women's Store,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint
1,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Sandwich Place,Juice Bar,Sports Club,Greek Restaurant,Liquor Store,Pharmacy,Indian Restaurant,Comfort Food Restaurant
2,Don Mills,Intersection,Coffee Shop,Supermarket,Smoke Shop,Soccer Field,Spa,Burger Joint,Bubble Tea Shop,Grocery Store,Gym
3,Downsview,Pizza Place,Vietnamese Restaurant,Discount Store,Sandwich Place,Turkish Restaurant,Shopping Mall,Bakery,Beer Store,Grocery Store,Coffee Shop
4,"Fairview, Henry Farm, Oriole",Clothing Store,Women's Store,Coffee Shop,Fast Food Restaurant,Restaurant,Bank,Cosmetics Shop,Food Court,Japanese Restaurant,Juice Bar


### Clustering neighbourhoods
### I will create 5 clusters for the North York and visualize them

In [35]:
# set number of clusters
kclusters = 5

york_grouped_clustering = york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 1, 3, 4, 0], dtype=int32)

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'cluster Labels', kmeans.labels_)

york_merged = north_york


york_merged = york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

york_merged.head() 

Unnamed: 0,PostalCode,Borough,Neighbourhood,latitude,longitude,cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,1.0,Food & Drink Shop,Park,Women's Store,Dessert Shop,Electronics Store,Fast Food Restaurant,Food Court,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store
1,M4A,North York,Victoria Village,43.73057,-79.31306,4.0,Pharmacy,Park,Grocery Store,Women's Store,Gift Shop,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,0.0,Clothing Store,Furniture / Home Store,Women's Store,Men's Store,Bookstore,Coffee Shop,Cosmetics Shop,Food Court,American Restaurant,Toy / Game Store
3,M3B,North York,Don Mills,43.74923,-79.36186,0.0,Intersection,Coffee Shop,Supermarket,Smoke Shop,Soccer Field,Spa,Burger Joint,Bubble Tea Shop,Grocery Store,Gym
4,M6B,North York,Glencairn,43.70687,-79.44812,0.0,Pizza Place,Grocery Store,Japanese Restaurant,Latin American Restaurant,Rental Car Location,Fast Food Restaurant,Gas Station,Mediterranean Restaurant,Pub,Bank


In [37]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(york_merged['latitude'], york_merged['longitude'], york_merged['Neighbourhood'], york_merged['cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Exploring created clusters

In [39]:
york_merged.loc[york_merged['cluster Labels'] == 0, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,North York,0.0,Clothing Store,Furniture / Home Store,Women's Store,Men's Store,Bookstore,Coffee Shop,Cosmetics Shop,Food Court,American Restaurant,Toy / Game Store
3,North York,0.0,Intersection,Coffee Shop,Supermarket,Smoke Shop,Soccer Field,Spa,Burger Joint,Bubble Tea Shop,Grocery Store,Gym
4,North York,0.0,Pizza Place,Grocery Store,Japanese Restaurant,Latin American Restaurant,Rental Car Location,Fast Food Restaurant,Gas Station,Mediterranean Restaurant,Pub,Bank
5,North York,0.0,Intersection,Coffee Shop,Supermarket,Smoke Shop,Soccer Field,Spa,Burger Joint,Bubble Tea Shop,Grocery Store,Gym
8,North York,0.0,Clothing Store,Women's Store,Coffee Shop,Fast Food Restaurant,Restaurant,Bank,Cosmetics Shop,Food Court,Japanese Restaurant,Juice Bar
9,North York,0.0,Furniture / Home Store,Japanese Restaurant,Caribbean Restaurant,Restaurant,Road,Miscellaneous Shop,Coffee Shop,Metro Station,Massage Studio,Fast Food Restaurant
10,North York,0.0,Golf Driving Range,Construction & Landscaping,Trail,Park,Women's Store,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint
11,North York,0.0,Pizza Place,Vietnamese Restaurant,Discount Store,Sandwich Place,Turkish Restaurant,Shopping Mall,Bakery,Beer Store,Grocery Store,Coffee Shop
13,North York,0.0,Pizza Place,Vietnamese Restaurant,Discount Store,Sandwich Place,Turkish Restaurant,Shopping Mall,Bakery,Beer Store,Grocery Store,Coffee Shop
16,North York,0.0,Middle Eastern Restaurant,Korean Restaurant,Pizza Place,Café,Supermarket,Sandwich Place,Fried Chicken Joint,Dessert Shop,Sporting Goods Shop,Grocery Store


In [41]:
york_merged.loc[york_merged['cluster Labels'] == 1, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Food & Drink Shop,Park,Women's Store,Dessert Shop,Electronics Store,Fast Food Restaurant,Food Court,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store
6,North York,1.0,Residential Building (Apartment / Condo),Park,Women's Store,Gift Shop,Discount Store,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint
14,North York,1.0,Bakery,Park,Women's Store,Greek Restaurant,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint,Frozen Yogurt Shop


In [42]:
york_merged.loc[york_merged['cluster Labels'] == 2, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,North York,2.0,Park,Women's Store,Dessert Shop,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store


In [44]:
york_merged.loc[york_merged['cluster Labels'] == 3, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,North York,3.0,Construction & Landscaping,Furniture / Home Store,Women's Store,Dessert Shop,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint,Frozen Yogurt Shop


In [46]:
york_merged.loc[york_merged['cluster Labels'] == 4, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,4.0,Pharmacy,Park,Grocery Store,Women's Store,Gift Shop,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint
19,North York,4.0,Coffee Shop,Park,Nightclub,Women's Store,Gift Shop,Electronics Store,Fast Food Restaurant,Food & Drink Shop,Food Court,Fried Chicken Joint
23,North York,4.0,Pizza Place,Coffee Shop,Park,Grocery Store,Butcher,Women's Store,Gift Shop,Fast Food Restaurant,Food & Drink Shop,Food Court
