# Q1 Transform data from Wiki to pandas dataframe

In [1]:
# setup
import numpy as np
import pandas as pd

In [2]:
# setup a empty dataframe
data = pd.DataFrame()

# use panda read_html 
url = ['https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M']
for url in url:
    data = data.append(pd.read_html(url))
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,Borough,Neighbourhood,Postcode
0,,,,,,,,,,,...,,,,,,,,Not assigned,Not assigned,M1A
1,,,,,,,,,,,...,,,,,,,,Not assigned,Not assigned,M2A
2,,,,,,,,,,,...,,,,,,,,North York,Parkwoods,M3A
3,,,,,,,,,,,...,,,,,,,,North York,Victoria Village,M4A
4,,,,,,,,,,,...,,,,,,,,Downtown Toronto,Harbourfront,M5A


In [3]:
# only need postcode, borough and neighbourhood 
data = data[['Postcode', 'Borough', 'Neighbourhood']]
data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
# delete all rows that borough equals not assigned
data = data[data['Borough'] != "Not assigned"]

In [5]:
data

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
...,...,...,...
1,,,
2,,,
3,,,
0,,,


In [6]:
# first according postcode and borough to group, and add it together in neighbourhood with comma to separate
data = data.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
data

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [7]:
# iterate dataframe and copy content of borough when its neighbourhhod is not assigned
for index, row in data.iterrows():
    if row['Neighbourhood'] == 'Not assigned':
        row['Neighbourhood'] = row['Borough']

In [8]:
data.shape

(103, 3)

# Q2 add latitude and longitude

In [15]:
pip install geocoder

Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Note: you may need to restart the kernel to use updated packages.


In [9]:
import geocoder

In [10]:
data['Postcode'][0]

'M1B'

In [11]:
# define a function to get latitude and longitude
def get_latlng(postcode):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postcode))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [12]:
# extract postcode in list
postcodes = data['Postcode'].tolist()

# loop for all postcodes in data
coords = [get_latlng(post_code) for post_code in postcodes]

In [13]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
data['Latitude'] = df_coords['Latitude']
data['Longitude'] = df_coords['Longitude']

In [15]:
data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.217590
4,M1H,Scarborough,Cedarbrae,43.769688,-79.239440
...,...,...,...,...,...
98,M9N,York,Weston,43.704845,-79.517546
99,M9P,Etobicoke,Westmount,43.696505,-79.530252
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.686810,-79.557284
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.743145,-79.584664


# Q3 Explore and cluster the neighborhoods in Toronto

#### download all denpendencies

In [18]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\camch\Anaconda3

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.0.1               |             py_0         575 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    certifi-2019.9.11          |           py37_0         147 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-

#### use geopy to get latitude and longitude of toronto

In [20]:
adress = 'Toronto'
geolocator = Nominatim(user_agent='t_ex')
location = geolocator.geocode(adress)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### creat a map of toronto

In [25]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10.2)

# add marker to each postcode
for lat, lng in zip(data['Latitude'], data['Longitude']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        fill=True,
        parse_html=False).add_to(map_toronto)
    
map_toronto

#### for simplicity, only cluster the borough of Scarborough

In [26]:
scar_data = data[data['Borough']=='Scarborough'].reset_index(drop=True)

adress = 'Scarborough, Toronto'
geolocator = Nominatim(user_agent='t_ex')
location = geolocator.geocode(adress)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough Toronto are 43.773077, -79.257774.


In [29]:
map_scar = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# add marker to each postcode
for lat, lng in zip(scar_data['Latitude'], scar_data['Longitude']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        fill=True,
        parse_html=False).add_to(map_scar)
    
map_scar

#### explore neighborhoods in Scarborough

In [42]:
# creat a function to repeat extract all location data, 

def getNearbyVenues(postcode, latitude, longitude, radius=500):
    venues_list=[]
    
    for post, lat, lng in zip(postcode, latitude, longitude):
        print(post)
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # get relevant info from file
        venues_list.append([(
            post, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Postcode Latitude', 
                  'Postcode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)  

In [43]:
LIMIT = 100
scar_venues = getNearbyVenues(postcode=scar_data['Postcode'],
                                latitude=scar_data['Latitude'],
                                longitude=scar_data['Longitude']
                                  )
scar_venues.head()

M1B
M1C
M1E
M1G
M1H
M1J
M1K
M1L
M1M
M1N
M1P
M1R
M1S
M1T
M1V
M1W
M1X


Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.811525,-79.195517,Wood Bison Paddock,43.811732,-79.200708,Zoo Exhibit
1,M1B,43.811525,-79.195517,Canadian Appliance Source Whitby,43.808353,-79.191331,Home Service
2,M1C,43.785665,-79.158725,Royal Canadian Legion,43.782533,-79.163085,Bar
3,M1E,43.765815,-79.175193,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping
4,M1E,43.765815,-79.175193,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center


In [41]:
scar_venues.shape

(83, 7)

#### analyse postcode area

In [45]:
# one hot encoding
scar_onehot = pd.get_dummies(scar_venues[['Venue Category']], prefix="", prefix_sep="")

# add postcode column back to dataframe
scar_onehot['Postcode'] = scar_venues['Postcode'] 

# move neighborhood column to the first column
fixed_columns = [scar_onehot.columns[-1]] + list(scar_onehot.columns[:-1])
scar_onehot = scar_onehot[fixed_columns]

scar_onehot.head()

Unnamed: 0,Postcode,Auto Garage,Badminton Court,Bakery,Bank,Bar,Bistro,Brewery,Bubble Tea Shop,Bus Line,Bus Station,Bus Stop,Business Service,Chinese Restaurant,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Department Store,Discount Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hobby Shop,Home Service,Indian Restaurant,Intersection,Korean Restaurant,Liquor Store,Metro Station,Other Great Outdoors,Park,Pharmacy,Pizza Place,Playground,Pool,Rental Service,Restaurant,Sandwich Place,Shanghai Restaurant,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Trail,Train Station,Vietnamese Restaurant,Wine Shop,Zoo Exhibit
0,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M1C,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M1E,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M1E,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### caculate the mean of frequency of occurrence of each category

In [48]:
scar_grouped = scar_onehot.groupby('Postcode').mean().reset_index()
scar_grouped

Unnamed: 0,Postcode,Auto Garage,Badminton Court,Bakery,Bank,Bar,Bistro,Brewery,Bubble Tea Shop,Bus Line,Bus Station,Bus Stop,Business Service,Chinese Restaurant,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Department Store,Discount Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hobby Shop,Home Service,Indian Restaurant,Intersection,Korean Restaurant,Liquor Store,Metro Station,Other Great Outdoors,Park,Pharmacy,Pizza Place,Playground,Pool,Rental Service,Restaurant,Sandwich Place,Shanghai Restaurant,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Trail,Train Station,Vietnamese Restaurant,Wine Shop,Zoo Exhibit
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
1,M1C,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0
5,M1J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
6,M1K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1L,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M1M,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M1N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### get top 10 venues per postcode

In [49]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [59]:
indicators = ['st', 'nd', 'rd']

num_top_venues = 10

# create columns
columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Postcode'] = scar_grouped['Postcode']

for ind in np.arange(scar_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(scar_grouped.iloc[ind, :], num_top_venues)

venues_sorted.head()

Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Zoo Exhibit,Home Service,Gym / Fitness Center,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Discount Store
1,M1C,Bar,Zoo Exhibit,College Stadium,Gym,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
2,M1E,Gym / Fitness Center,Park,Construction & Landscaping,Zoo Exhibit,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
3,M1G,Coffee Shop,Korean Restaurant,Park,Business Service,College Stadium,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint
4,M1H,Trail,Playground,Zoo Exhibit,Coffee Shop,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant


#### cluster

In [60]:
# set number of clusters
kclusters = 5

scar_grouped_clustering = scar_grouped.drop('Postcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scar_grouped_clustering)

# check cluster labels
kmeans.labels_[0:10] 

array([0, 2, 1, 1, 4, 1, 1, 1, 1, 1])

In [61]:
# add clustering labels
venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
scar_merged = scar_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scar_merged = scar_merged.join(venues_sorted.set_index('Postcode'), on='Postcode')

scar_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,0.0,Zoo Exhibit,Home Service,Gym / Fitness Center,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Discount Store
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725,2.0,Bar,Zoo Exhibit,College Stadium,Gym,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,1.0,Gym / Fitness Center,Park,Construction & Landscaping,Zoo Exhibit,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
3,M1G,Scarborough,Woburn,43.768369,-79.21759,1.0,Coffee Shop,Korean Restaurant,Park,Business Service,College Stadium,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944,4.0,Trail,Playground,Zoo Exhibit,Coffee Shop,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant


In [69]:
scar_merged['Cluster Labels'] = scar_merged['Cluster Labels'].astype(int)
scar_merged.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,0,Zoo Exhibit,Home Service,Gym / Fitness Center,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Discount Store
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725,2,Bar,Zoo Exhibit,College Stadium,Gym,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,1,Gym / Fitness Center,Park,Construction & Landscaping,Zoo Exhibit,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant
3,M1G,Scarborough,Woburn,43.768369,-79.21759,1,Coffee Shop,Korean Restaurant,Park,Business Service,College Stadium,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944,4,Trail,Playground,Zoo Exhibit,Coffee Shop,Grocery Store,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant


#### visualize the clusters

In [70]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scar_merged['Latitude'], scar_merged['Longitude'], scar_merged['Postcode'], scar_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters