# Notebook for final pfoject of IBM's professional Data Science specialization

In [1]:
import pandas as pd
import numpy as np
from pandas.io.html import read_html

In [2]:
page = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
DFs = read_html(page, match='Borough', header=0) # a list of dataframes
df = DFs[0]#getting the dataframe frm the list

## Exploring the data:

In [3]:
df.describe()

Unnamed: 0,Postcode,Borough,Neighbourhood
count,288,288,288
unique,180,12,209
top,M8Y,Not assigned,Not assigned
freq,8,77,78


In [4]:
print('not assigned boroughs: ', df.loc[df.Borough == 'Not assigned', 'Borough'].count())
print('not assigned neighbourhoods: ', df.loc[df.Neighbourhood == 'Not assigned', 'Neighbourhood'].count())

not assigned boroughs:  77
not assigned neighbourhoods:  78


### Ignore cells with a borough that is Not assigned:

In [5]:
df.Borough.replace('Not assigned', np.nan, inplace=True)

In [6]:
df.dropna(subset=['Borough'], inplace=True)
print('not assigned boroughs: ', df.loc[df.Borough == 'Not assigned', 'Borough'].count())

not assigned boroughs:  0


### grouping repeated neighbourhoods per postcode:

In [7]:
df.set_index(['Postcode', 'Borough'], inplace=True)

In [9]:
d = df.groupby(level=['Postcode','Borough']).agg(','.join)
d.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### processing not assigned neighbourhoods:

In [10]:
d.reset_index(level=['Borough'], inplace=True)
d.columns

Index(['Borough', 'Neighbourhood'], dtype='object')

In [11]:
d.loc['M1B']

Borough            Scarborough
Neighbourhood    Rouge,Malvern
Name: M1B, dtype: object

In [12]:
print('not assigned neighbourhoods: ', d.loc[d.Neighbourhood == 'Not assigned', 'Neighbourhood'].count())


not assigned neighbourhoods:  1


In [13]:
d.loc[d.Neighbourhood == 'Not assigned', 'Borough']

Postcode
M7A    Queen's Park
Name: Borough, dtype: object

In [14]:
d.loc[d.Neighbourhood == 'Not assigned', 'Neighbourhood'] = 'Queen\'s park'

In [15]:
d.shape

(103, 2)

### Getting longitude and latitude of postcodes:

In [240]:
import geocoder

In [75]:
latitude = []
longitude = []
coordinates = None
for code in df.Postcode:
    while (coordinates == None):
        coordinates = geocoder.google(code + ', Toronto, Ontario').latlng
    latitude.append(coordinates[0])
    longitude.append(coordinates[1])

KeyboardInterrupt: 

##### the geocoder package is very un reliable reliable, hence I will use a file that provides the required coordinates

In [16]:
coordinates = pd.read_csv('Geospatial_Coordinates.csv')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
d['Latitude'] = 0
d['Longitude'] = 0

In [18]:
for code in d.index:
    if code in coordinates['Postal Code'].values:
        d.loc[code, 'Latitude'] = coordinates.loc[coordinates['Postal Code'] == code, 'Latitude'].values
        d.loc[code, 'Longitude'] = coordinates.loc[coordinates['Postal Code'] == code, 'Longitude'].values        

In [19]:
d.head(40)

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
M1J,Scarborough,Scarborough Village,43.744734,-79.239476
M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [20]:
d.describe()

Unnamed: 0,Latitude,Longitude
count,103.0,103.0
mean,43.704608,-79.397153
std,0.052463,0.097146
min,43.602414,-79.615819
25%,43.660567,-79.464763
50%,43.696948,-79.38879
75%,43.74532,-79.340923
max,43.836125,-79.160497


## Putting data on map:


In [21]:
import folium

In [22]:
#Toronto cental location is [43.70, -79.42 ]

# create map of Toronot using latitude and longitude values
toronot_map = folium.Map(location=[43.70, -79.42 ], zoom_start=11.5)
 
#add markers of neighbourhoods
for lat, lng, label, code in zip(d['Latitude'], d['Longitude'], d['Neighbourhood'], d.index):
    label = folium.Popup(label, parse_html=True)
    code = folium.Popup(code, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='red',
        fill=True).add_to(toronot_map) 
    
    
toronot_map

### Getting the venues in each neigbourhood for clustring

In [23]:
# Define Foursquare Credentials and Version:

CLIENT_ID = ***
CLIENT_SECRET = ***
VERSION = '20180605' # Foursquare API version
LIMIT = 100 #get the top 100 venues in each neighbourhood

In [24]:
# function that extracts the category of the venue that would be requested from Foursquare
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
import requests

# requesting the venues data
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
venues = getNearbyVenues(names=d['Neighbourhood'],
                                   latitudes=d['Latitude'],
                                   longitudes=d['Longitude']
                                  )

In [27]:
venues.head(20)

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge,Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
3,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
5,"Guildwood,Morningside,West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
6,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa
7,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
8,"Guildwood,Morningside,West Hill",43.763573,-79.188711,chatr Mobile,43.765917,-79.191672,Tech Startup
9,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location


### Analyzing the venues

In [29]:
#groub by neighbourhood:

grouped_venues = venues.groupby(by='Neighbourhood')
grouped_venues.count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",3,3,3,3,3,3
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",11,11,11,11,11,11
"Alderwood,Long Branch",10,10,10,10,10,10
"Bathurst Manor,Downsview North,Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",25,25,25,25,25,25
Berczy Park,55,55,55,55,55,55
"Birch Cliff,Cliffside West",4,4,4,4,4,4


In [38]:
len(venues.Neighbourhood.unique())

100

In [81]:
# there are 3 neighbourhoods not assigned with venues. I need to find them and remove them
s=0
d = d.reset_index()
for hood in d['Neighbourhood']:
    if hood in venues['Neighbourhood'].values:
        pass
    else:
        s += 1
        print(s, hood, "not found", " index: {}".format(d.index[d.Neighbourhood == hood].values))

1 Upper Rouge not found  index: [16]
2 Newtonbrook,Willowdale not found  index: [21]
3 Islington Avenue not found  index: [93]


In [89]:
# removing these 3 neighbourhoods:
d.drop([16, 21, 93], inplace=True)
d.drop(columns='index', inplace=True)
d

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [90]:
len(venues['Venue Category'].unique())

280

In [98]:
venue_cats = pd.get_dummies(venues['Venue Category']) # one hot encoding of the categories
venue_cats['Neighbourhood'] = venues['Neighbourhood']
# swap the neighborhoods and the first columns:
columns = [venue_cats.columns[-1]] + list(venue_cats.columns[:-1])
venue_cats = venue_cats[columns]
venue_cats.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [101]:
grouped = venue_cats.groupby(by='Neighbourhood').mean().reset_index()
grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [109]:
# print 5 top categories in each neighborhood

num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
             venue  freq
0      Coffee Shop  0.06
1             Café  0.05
2              Bar  0.04
3       Steakhouse  0.04
4  Thai Restaurant  0.04


----Agincourt----
                venue  freq
0      Sandwich Place  0.25
1      Breakfast Spot  0.25
2              Lounge  0.25
3  Chinese Restaurant  0.25
4   Accessories Store  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                venue  freq
0          Playground  0.33
1    Asian Restaurant  0.33
2                Park  0.33
3   Accessories Store  0.00
4  Miscellaneous Shop  0.00


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
                 venue  freq
0        Grocery Store  0.18
1             Pharmacy  0.09
2           Beer Store  0.09
3         Liquor Store  0.09
4  Fried Chicken Joint  0.09


----Alderwood,Long Branch----
            venue  freq
0     Pizza Place   0.2
1        Pharmacy   0.1
2  Sandw

                 venue  freq
0       Baseball Field   1.0
1    Accessories Store   0.0
2   Miscellaneous Shop   0.0
3                Motel   0.0
4  Monument / Landmark   0.0


----Fairview,Henry Farm,Oriole----
                  venue  freq
0        Clothing Store  0.12
1  Fast Food Restaurant  0.09
2           Coffee Shop  0.08
3            Restaurant  0.05
4      Toy / Game Store  0.03


----First Canadian Place,Underground city----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.07
2   Restaurant  0.04
3        Hotel  0.04
4    Gastropub  0.03


----Flemingdon Park,Don Mills South----
                   venue  freq
0       Asian Restaurant  0.09
1             Beer Store  0.09
2                    Gym  0.09
3            Coffee Shop  0.09
4  General Entertainment  0.04


----Forest Hill North,Forest Hill West----
               venue  freq
0      Jewelry Store  0.25
1               Park  0.25
2   Sushi Restaurant  0.25
3              Trail  0.25
4  Accessories Store  0.00


                 venue  freq
0                 Café  0.11
1          Coffee Shop  0.08
2               Bakery  0.05
3            Gastropub  0.05
4  American Restaurant  0.05


----The Annex,North Midtown,Yorkville----
               venue  freq
0        Coffee Shop  0.13
1     Sandwich Place  0.13
2               Café  0.13
3        Pizza Place  0.09
4  Indian Restaurant  0.04


----The Beaches----
                  venue  freq
0     Health Food Store   0.2
1                 Trail   0.2
2          Neighborhood   0.2
3  Other Great Outdoors   0.2
4                   Pub   0.2


----The Beaches West,India Bazaar----
                  venue  freq
0                  Park  0.11
1             Pet Store  0.06
2                   Pub  0.06
3  Fast Food Restaurant  0.06
4     Fish & Chips Shop  0.06


----The Danforth West,Riverdale----
                    venue  freq
0        Greek Restaurant  0.21
1             Coffee Shop  0.10
2          Ice Cream Shop  0.07
3      Italian Restaurant  0.07


In [110]:
# a function to get num_top_venues per neighborhood
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [113]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Bar,Thai Restaurant,American Restaurant,Steakhouse,Burger Joint,Bakery,Gym,Restaurant
1,Agincourt,Lounge,Breakfast Spot,Sandwich Place,Chinese Restaurant,Yoga Studio,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Asian Restaurant,Yoga Studio,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Fried Chicken Joint,Pizza Place,Sandwich Place,Fast Food Restaurant,Liquor Store,Coffee Shop,Beer Store,Video Store,Pharmacy
4,"Alderwood,Long Branch",Pizza Place,Gym,Coffee Shop,Pharmacy,Skating Rink,Pub,Dance Studio,Pool,Sandwich Place,Diner


### Clustering and ploting results on map

In [114]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, n_init=20).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
labels = pd.DataFrame({'clusters': kmeans.labels_})
labels.clusters.unique()

array([0, 2, 3, 1, 4])

In [115]:
neighborhoods_venues_sorted.insert(0, 'Cluster', kmeans.labels_)

In [116]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide,King,Richmond",Coffee Shop,Café,Bar,Thai Restaurant,American Restaurant,Steakhouse,Burger Joint,Bakery,Gym,Restaurant
1,0,Agincourt,Lounge,Breakfast Spot,Sandwich Place,Chinese Restaurant,Yoga Studio,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop
2,2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Asian Restaurant,Yoga Studio,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
3,0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Fried Chicken Joint,Pizza Place,Sandwich Place,Fast Food Restaurant,Liquor Store,Coffee Shop,Beer Store,Video Store,Pharmacy
4,0,"Alderwood,Long Branch",Pizza Place,Gym,Coffee Shop,Pharmacy,Skating Rink,Pub,Dance Studio,Pool,Sandwich Place,Diner


In [117]:
merged = d.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

In [118]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 0 to 102
Data columns (total 16 columns):
Postcode                  100 non-null object
Borough                   100 non-null object
Neighbourhood             100 non-null object
Latitude                  100 non-null float64
Longitude                 100 non-null float64
Cluster                   100 non-null int32
1st Most Common Venue     100 non-null object
2nd Most Common Venue     100 non-null object
3rd Most Common Venue     100 non-null object
4th Most Common Venue     100 non-null object
5th Most Common Venue     100 non-null object
6th Most Common Venue     100 non-null object
7th Most Common Venue     100 non-null object
8th Most Common Venue     100 non-null object
9th Most Common Venue     100 non-null object
10th Most Common Venue    100 non-null object
dtypes: float64(2), int32(1), object(13)
memory usage: 12.9+ KB


In [120]:
import matplotlib.cm as cm
import matplotlib.colors as colors


# create map
map_clusters = folium.Map(location=[43.70, -79.42], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters