# Part 1 - Creating the Dataframe

### The below code imports necessary libraries and packages.

In [1]:
import pandas as pd
import numpy as np
import requests as rqst
!conda install lxml --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - lxml


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    lxml-4.4.2                 |   py36hefd8a0e_0         1.6 MB

The following packages will be UPDATED:

    lxml: 4.3.1-py36hefd8a0e_0 --> 4.4.2-py36hefd8a0e_0


Downloading and Extracting Packages
lxml-4.4.2           | 1.6 MB    | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


### The below code loads the wikipedia table and uses read_html from Pandas to read the table.

In [2]:
Wiki_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
Website = rqst.get(Wiki_url).text
Data = pd.read_html(Website)
df = Data[0]
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


### 1. First line of below code removes the locations with Boroughs not assigned
### 2. second line of below code replaces the unassigned neighbourhood name with its Borough name

In [3]:
df = df[df['Borough'] != "Not assigned" ] 
df.loc[df[df['Neighbourhood'] == "Not assigned"].index,'Neighbourhood'] = df.loc[df[df['Neighbourhood'] == "Not assigned"].index,'Borough'] 
df = df.reset_index(drop = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


### The below code groups the Dataframe by Postcode and aggregates all the neighbourhoods under that Postcode into one row

In [4]:
grouped_df = df.groupby('Postcode')
grouped_df['Neighbourhood'].agg(', '.join)
df2 = pd.DataFrame(grouped_df.agg(', '.join))
df2['Borough'] = grouped_df.first()['Borough']
df2 = df2.reset_index(drop = False)
df2.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### The below code prints the shape of the cleaned up Dataframe

In [5]:
df2.shape

(103, 3)

# Part 2 - Getting geolocation data

In [6]:
#!conda install -c conda-forge geocoder
#import geocoder # import geocoder

### The below code was used to get latitude and longitude data using geocoder but it was taking forever or was stuck in the loop, hence used the csv file provided.

In [7]:
# latitude = np.zeros((df2.shape[0],1),dtype = float)
# longitude = np.zeros((df2.shape[0],1),dtype = float)


# for loc,Postcode in enumerate(df2['Postcode']):
#    lat_lng_coords = None
#    while(lat_lng_coords is None):
#      geo = geocoder.google('{}, Toronto, Ontario'.format(Postcode))
#      lat_lng_coords = geo.latlng
#    latitude[loc] = lat_lng_coords[0]
#    longitude[loc] = lat_lng_coords[1]
#   disp(loc)
#df2['Latitude'] = latitude
#df2['Longitude'] = longitude

### Loading the csv file and adding the data to the dataframe

In [6]:
lat_lng_coords = pd.read_csv('https://cocl.us/Geospatial_data')
lat_lng_coords.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Assigns the loaded csv to the Main Dataframe

In [7]:
df2['Latitude'] = lat_lng_coords['Latitude']
df2['Longitude'] = lat_lng_coords['Longitude']
df2.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Part 3 - Clustering analysis

### Importing necessary packages

In [8]:
import json
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import folium
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    altair-4.0.1               |             py_0         575 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be 

### Let's see how many boroughs have the name "Toronto"

In [9]:
has_Toronto = []
for loc,string in enumerate(df2['Borough']):
    has_Toronto.append("Toronto" in string)
sum(has_Toronto)

39

### 39 looks small. We shall use the whole Dataframe

In [12]:
#df_Toronto = df2[has_Toronto]
#df_Toronto.head(10)

### Let's locate Toronto

In [10]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location1 = geolocator.geocode(address)
latitude1 = location1.latitude
longitude1 = location1.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude1, longitude1))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


### Let's visualize the locations on the map using Folium

In [11]:
map_Toronto = folium.Map(location=[latitude1, longitude1], zoom_start=11)

# add markers to map
for lat, lng, Postcode, Borough in zip(df2['Latitude'], df2['Longitude'], df2['Postcode'], df2['Borough']):
    label = '{}, {}'.format(Postcode, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### Define Foursquare Credentials and Version

In [12]:
CLIENT_ID = '1ATIMOM43KUB35ZQNGIKVZTN3XWDKDWTHUONBTQFICYEHTXH' # your Foursquare ID
CLIENT_SECRET = 'PTVPVPUPIN4D0CXTIIZAOWV0UO1B33GCDRXZSSGOLB1A50FW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1ATIMOM43KUB35ZQNGIKVZTN3XWDKDWTHUONBTQFICYEHTXH
CLIENT_SECRET:PTVPVPUPIN4D0CXTIIZAOWV0UO1B33GCDRXZSSGOLB1A50FW


### function that extracts the category of the venue

In [13]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Let's get the venues for all the locations

In [14]:
import requests
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Postcode Latitude', 
                  'Postcode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
LIMIT = 500
Toronto_venues = getNearbyVenues(names=df2['Postcode'],
                                   latitudes=df2['Latitude'],
                                   longitudes=df2['Longitude']
                                  )

In [16]:
print(Toronto_venues.shape)
Toronto_venues.head()

(2215, 7)


Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,M1C,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1E,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,M1E,43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


In [17]:
countVenues = Toronto_venues.groupby('Postcode').count()
countVenues.head(10)

Unnamed: 0_level_0,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,1,1,1,1,1,1
M1C,1,1,1,1,1,1
M1E,7,7,7,7,7,7
M1G,4,4,4,4,4,4
M1H,8,8,8,8,8,8
M1J,3,3,3,3,3,3
M1K,6,6,6,6,6,6
M1L,10,10,10,10,10,10
M1M,2,2,2,2,2,2
M1N,4,4,4,4,4,4


### We shall use top 5 venues as the featureset for the Posrcodes.
### Notice there are Postcodes with less than 5 venues returned. So we shall remove these Postcodes from our analysis

In [18]:
Keep_Postcodes = countVenues[countVenues["Venue"]>4]
Keep_Postcodes.shape
Keep_Postcodes.reset_index(drop=False,inplace = True)
Postcode_list = Keep_Postcodes.Postcode.values
Postcode_list

array(['M1E', 'M1H', 'M1K', 'M1L', 'M1P', 'M1R', 'M1T', 'M1W', 'M2J',
       'M2N', 'M2P', 'M2R', 'M3C', 'M3H', 'M3J', 'M3K', 'M3L', 'M3N',
       'M4A', 'M4B', 'M4C', 'M4E', 'M4G', 'M4H', 'M4K', 'M4L', 'M4M',
       'M4P', 'M4R', 'M4S', 'M4V', 'M4X', 'M4Y', 'M5A', 'M5B', 'M5C',
       'M5E', 'M5G', 'M5H', 'M5J', 'M5K', 'M5L', 'M5M', 'M5R', 'M5S',
       'M5T', 'M5V', 'M5W', 'M5X', 'M6A', 'M6E', 'M6G', 'M6H', 'M6J',
       'M6K', 'M6M', 'M6P', 'M6R', 'M6S', 'M7A', 'M7R', 'M7Y', 'M8V',
       'M8W', 'M8Z', 'M9C', 'M9P', 'M9V'], dtype=object)

### We're going to work with these 66 Postcodes 

In [19]:
Toronto_venues2 = Toronto_venues[Toronto_venues[["Postcode"]].isin(Postcode_list)["Postcode"]]
print(Toronto_venues2.shape)
Toronto_venues2.head(10)

(2123, 7)


Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
2,M1E,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,M1E,43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
5,M1E,43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location
6,M1E,43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center
7,M1E,43.763573,-79.188711,Lawrence Ave E & Kingston Rd,43.767704,-79.18949,Intersection
8,M1E,43.763573,-79.188711,Eggsmart,43.7678,-79.190466,Breakfast Spot
13,M1H,43.773136,-79.239476,Federick Restaurant,43.774697,-79.241142,Hakka Restaurant
14,M1H,43.773136,-79.239476,Drupati's Roti & Doubles,43.775222,-79.241678,Caribbean Restaurant
15,M1H,43.773136,-79.239476,Centennial Recreation Centre,43.774593,-79.2365,Athletics & Sports


In [20]:
print('There are {} uniques categories.'.format(len(Toronto_venues2['Venue Category'].unique())))

There are 252 uniques categories.


In [21]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues2[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Postcode'] = Toronto_venues2['Postcode'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_grouped = Toronto_onehot.groupby('Postcode').mean().reset_index()

In [22]:
Toronto_grouped.head(10)

Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0
5,M1R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0
6,M1T,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M2J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,...,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.016129,0.048387,0.016129
9,M2N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
Toronto_grouped.shape

(68, 253)

In [24]:
num_top_venues = 5

for hood in Toronto_grouped['Postcode']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Postcode'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1E----
                 venue  freq
0          Pizza Place  0.14
1    Electronics Store  0.14
2         Intersection  0.14
3  Rental Car Location  0.14
4   Mexican Restaurant  0.14


----M1H----
                  venue  freq
0  Caribbean Restaurant  0.12
1       Thai Restaurant  0.12
2                Bakery  0.12
3           Gas Station  0.12
4    Athletics & Sports  0.12


----M1K----
               venue  freq
0     Discount Store  0.17
1  Convenience Store  0.17
2        Bus Station  0.17
3        Coffee Shop  0.17
4   Department Store  0.17


----M1L----
          venue  freq
0        Bakery   0.2
1      Bus Line   0.2
2  Intersection   0.1
3  Soccer Field   0.1
4          Park   0.1


----M1P----
                   venue  freq
0      Indian Restaurant  0.33
1              Pet Store  0.17
2                Brewery  0.17
3  Vietnamese Restaurant  0.17
4     Chinese Restaurant  0.17


----M1R----
                       venue  freq
0  Middle Eastern Restaurant  0.25
1      Vietnam

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [31]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Postcode_venues_sorted = pd.DataFrame(columns=columns)
Postcode_venues_sorted['Postcode'] = Toronto_grouped['Postcode']

for ind in np.arange(Toronto_grouped.shape[0]):
    Postcode_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

Postcode_venues_sorted.head(10)

Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1E,Medical Center,Intersection,Electronics Store,Pizza Place,Rental Car Location
1,M1H,Bank,Fried Chicken Joint,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant
2,M1K,Department Store,Coffee Shop,Discount Store,Convenience Store,Bus Station
3,M1L,Bakery,Bus Line,Soccer Field,Intersection,Park
4,M1P,Indian Restaurant,Pet Store,Chinese Restaurant,Vietnamese Restaurant,Brewery
5,M1R,Middle Eastern Restaurant,Sandwich Place,Auto Garage,Bakery,Breakfast Spot
6,M1T,Pizza Place,Fast Food Restaurant,Intersection,Pharmacy,Noodle House
7,M1W,Fast Food Restaurant,Chinese Restaurant,Grocery Store,Nail Salon,Breakfast Spot
8,M2J,Clothing Store,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Women's Store
9,M2N,Ramen Restaurant,Café,Sandwich Place,Pizza Place,Restaurant


### Now that we finally have our featureset,, we shall shart with the clustering analysis

In [32]:
# set number of clusters
kclusters = 5

Postcode_grouped_clustering = Toronto_grouped.drop('Postcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Postcode_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 2, 0, 2, 3, 2, 4, 4, 0, 0], dtype=int32)

In [33]:
Postcode_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
df3 = df2[df2[["Postcode"]].isin(Postcode_list)["Postcode"]]
Toronto_merged = df3

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(Postcode_venues_sorted.set_index('Postcode'), on='Postcode')

Toronto_merged.head(10) # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,4,Medical Center,Intersection,Electronics Store,Pizza Place,Rental Car Location
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2,Bank,Fried Chicken Joint,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,0,Department Store,Coffee Shop,Discount Store,Convenience Store,Bus Station
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,2,Bakery,Bus Line,Soccer Field,Intersection,Park
10,M1P,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,3,Indian Restaurant,Pet Store,Chinese Restaurant,Vietnamese Restaurant,Brewery
11,M1R,Scarborough,"Maryvale, Wexford",43.750072,-79.295849,2,Middle Eastern Restaurant,Sandwich Place,Auto Garage,Bakery,Breakfast Spot
13,M1T,Scarborough,"Clarks Corners, Sullivan, Tam O'Shanter",43.781638,-79.304302,4,Pizza Place,Fast Food Restaurant,Intersection,Pharmacy,Noodle House
15,M1W,Scarborough,L'Amoreaux West,43.799525,-79.318389,4,Fast Food Restaurant,Chinese Restaurant,Grocery Store,Nail Salon,Breakfast Spot
18,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0,Clothing Store,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Women's Store
22,M2N,North York,Willowdale South,43.77012,-79.408493,0,Ramen Restaurant,Café,Sandwich Place,Pizza Place,Restaurant


### We shall now visualize the Postcodes as clusters 

In [37]:
map_clusters = folium.Map(location=[latitude1, longitude1], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Postcode'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## The above map shows the Postcodes in clusters of similar colors. This ends the assignment