# IBM Toronto Neighbors

**Import the always necessary libraries**

In [1]:
import pandas as pd
import numpy as np

# 1. Scraping the data from Wikipedia

In [2]:
from bs4 import BeautifulSoup
import requests

In [3]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url, "html.parser")
soup.title

<title>List of postal codes of Canada: M - Wikipedia</title>

**Parsing to CSV file**

In [4]:
import csv

In [5]:
table =soup.find("table", class_ = "wikitable sortable")

output_rows = []
for table_row in table.findAll('tr'):
    columns = table_row.findAll('td')
    output_row = []
    for column in columns:
        output_row.append(column.text)
    output_rows.append(output_row)
    
with open('wikipedia.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(output_rows)

**Import dataframe**

In [6]:
df = pd.read_csv('wikipedia.csv', header=None)
header = ['Postalcode', 'Borough', 'Neighborhood']
df.columns = header
df = df.replace(['\r', '\n'],'', regex=True)
df.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern, Rouge"


In [7]:
df.shape

(180, 3)

**Clean Dataframe**

In [8]:
df = df[df["Borough"] != "Not assigned"]
df.shape

(103, 3)

In [9]:
df = df.groupby(['Postalcode','Borough']).Neighborhood.apply(lambda x: ', '.join(x)).to_frame()
df.reset_index(inplace=True)
df.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [10]:
df.shape

(103, 3)

# 2. Finding Geographical Coordinates

Geocoder package did not work, so utilizing the csv from the submission guides.

In [11]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


**Merging the data**

In [12]:
df.sort_values('Postalcode', ascending=True, inplace=True)
df_geo.sort_values('Postal Code', ascending=True, inplace=True)

In [13]:
df['Latitude'] = df_geo['Latitude']
df['Longitude'] = df_geo['Longitude']

In [14]:
df.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# 3. Explore and cluster the neighborhoods in Toronto

**Focusing only Toronto**

In [15]:
df_toronto = df[df['Borough'].str.contains("Toronto")]
df_toronto.shape

(39, 5)

In [16]:
df_toronto.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


**Creating a map of Toronto**

In [17]:
from geopy.geocoders import Nominatim
import folium

In [18]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.6534817 -79.3839347


In [19]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

**Focusing on Central Toronto**

In [20]:
df_c_toronto = df_toronto[df_toronto['Borough'].str.contains("Central")]
df_c_toronto = df_c_toronto.reset_index()
df_c_toronto.head()

Unnamed: 0,index,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,47,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [21]:
address = 'Central Toronto'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.6534817 -79.3839347


In [22]:
map_c_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, label in zip(df_c_toronto['Latitude'], df_c_toronto['Longitude'], df_c_toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_c_toronto)  
    
map_c_toronto

# 4. Using Foursquare to analyze venues

**ENTER YOUR CREDENTIALS HERE!!!!!!!**

In [23]:
CLIENT_ID = '' 
CLIENT_SECRET = '' 
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZYCMLX5WUBZZEOFKK1WRSVMJI1IMIVMQY0A5RITLWS3LHX10
CLIENT_SECRET:44VM5QL4K3BNVOT5TCOPLIL3YQBXDML1SE4ZVNSFLQANXHIH


**Let's take a look at the first Neighborhood**

In [24]:
df_c_toronto.loc[0, 'Neighborhood']

'Lawrence Park'

In [25]:
neighborhood_latitude = df_c_toronto.loc[0, 'Latitude'] 
neighborhood_longitude = df_c_toronto.loc[0, 'Longitude'] 

neighborhood_name = df_c_toronto.loc[0, 'Neighborhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Lawrence Park are 43.7280205, -79.3887901.


**Top 100 venues within a radius of 500 m**

In [26]:
LIMIT = 100 

radius = 500 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=ZYCMLX5WUBZZEOFKK1WRSVMJI1IMIVMQY0A5RITLWS3LHX10&client_secret=44VM5QL4K3BNVOT5TCOPLIL3YQBXDML1SE4ZVNSFLQANXHIH&v=20180604&ll=43.7280205,-79.3887901&radius=500&limit=100'

In [27]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eb6dbc85fb726001b7deda9'},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.7325205045, 'lng': -79.3825744605273},
   'sw': {'lat': 43.7235204955, 'lng': -79.3950057394727}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '50e6da19e4b0d8a78a0e9794',
       'name': 'Lawrence Park Ravine',
       'location': {'address': '3055 Yonge Street',
        'crossStreet': 'Lawrence Avenue East',
        'lat': 43.72696303913755,
        'lng': -79.39438246708775,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.72696303913755,
          'lng': -79.39438246708775}],
        'distance': 465,
        'cc': 'CA',
  

In [28]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

**Get relevant part of JSON and transform it into a pandas dataframe**

In [29]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) 

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Lawrence Park Ravine,Park,43.726963,-79.394382
1,Zodiac Swim School,Swim School,43.728532,-79.38286
2,TTC Bus #162 - Lawrence-Donway,Bus Line,43.728026,-79.382805


In [30]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


# 5. Let's explore all neighborhoods in Central Toronto

In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
central_toronto_venues = getNearbyVenues(names=df_c_toronto['Neighborhood'],
                                   latitudes=df_c_toronto['Latitude'],
                                   longitudes=df_c_toronto['Longitude']
                                  )

Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Roselawn
Forest Hill North & West
The Annex, North Midtown, Yorkville


In [33]:
print(central_toronto_venues.shape)
central_toronto_venues.head()

(117, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
2,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
3,Davisville North,43.712751,-79.390197,Homeway Restaurant & Brunch,43.712641,-79.391557,Breakfast Spot
4,Davisville North,43.712751,-79.390197,Sherwood Park,43.716551,-79.387776,Park


In [34]:
central_toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,38,38,38,38,38,38
Davisville North,7,7,7,7,7,7
Forest Hill North & West,4,4,4,4,4,4
Lawrence Park,3,3,3,3,3,3
"Moore Park, Summerhill East",1,1,1,1,1,1
North Toronto West,23,23,23,23,23,23
Roselawn,3,3,3,3,3,3
"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park",16,16,16,16,16,16
"The Annex, North Midtown, Yorkville",22,22,22,22,22,22


In [35]:
print('There are {} uniques categories.'.format(len(central_toronto_venues['Venue Category'].unique())))

There are 68 uniques categories.


**One hot encoding**

In [36]:
central_toronto_onehot = pd.get_dummies(central_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

central_toronto_onehot['Neighborhood'] = central_toronto_venues['Neighborhood'] 

fixed_columns = [central_toronto_onehot.columns[-1]] + list(central_toronto_onehot.columns[:-1])
central_toronto_onehot = central_toronto_onehot[fixed_columns]

central_toronto_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bank,Bar,Breakfast Spot,Brewery,Burger Joint,Bus Line,...,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,Davisville North,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
central_toronto_grouped = central_toronto_onehot.groupby('Neighborhood').mean().reset_index()
central_toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bank,Bar,Breakfast Spot,Brewery,Burger Joint,Bus Line,...,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,...,0.0,0.0,0.052632,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Forest Hill North & West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
4,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
6,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Summerhill West, Rathnelly, South Hill, Forest...",0.0625,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,...,0.0625,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0
8,"The Annex, North Midtown, Yorkville",0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0


**Top 5 venues in each Neighborhood**

In [38]:
num_top_venues = 5

for hood in central_toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = central_toronto_grouped[central_toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
              venue  freq
0    Sandwich Place  0.08
1      Dessert Shop  0.08
2       Pizza Place  0.08
3              Café  0.05
4  Sushi Restaurant  0.05


----Davisville North----
               venue  freq
0               Park  0.14
1  Food & Drink Shop  0.14
2              Hotel  0.14
3     Sandwich Place  0.14
4     Breakfast Spot  0.14


----Forest Hill North & West----
                 venue  freq
0        Jewelry Store  0.25
1                Trail  0.25
2                 Park  0.25
3     Sushi Restaurant  0.25
4  American Restaurant  0.00


----Lawrence Park----
                 venue  freq
0                 Park  0.33
1          Swim School  0.33
2             Bus Line  0.33
3  American Restaurant  0.00
4        Movie Theater  0.00


----Moore Park, Summerhill East----
                 venue  freq
0           Playground   1.0
1  American Restaurant   0.0
2        Movie Theater   0.0
3          Pizza Place   0.0
4             Pharmacy   0.0


----North Toron

**Now we transform the data into a dataframe**

In [39]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [40]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = central_toronto_grouped['Neighborhood']

for ind in np.arange(central_toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(central_toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Café,Sushi Restaurant,Coffee Shop,Gym,Italian Restaurant,Gas Station,Diner
1,Davisville North,Hotel,Food & Drink Shop,Gym,Department Store,Park,Breakfast Spot,Sandwich Place,Farmers Market,Fast Food Restaurant,Flower Shop
2,Forest Hill North & West,Trail,Park,Sushi Restaurant,Jewelry Store,Yoga Studio,Fried Chicken Joint,Donut Shop,Farmers Market,Fast Food Restaurant,Flower Shop
3,Lawrence Park,Bus Line,Park,Swim School,Yoga Studio,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden
4,"Moore Park, Summerhill East",Playground,Fried Chicken Joint,Discount Store,Donut Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Yoga Studio,Home Service


# 6. And finally we cluster our neighborhoods

In [41]:
from sklearn.cluster import KMeans

In [42]:
kclusters = 5

central_toronto_grouped_clustering = central_toronto_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(central_toronto_grouped_clustering)

kmeans.labels_[0:10] 

array([1, 1, 4, 0, 2, 1, 3, 1, 1])

**Insert Cluster Labels**

In [43]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

**Merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood**

In [44]:
central_toronto_merged = df_c_toronto

central_toronto_merged = central_toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

central_toronto_merged.head() 

Unnamed: 0,index,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Bus Line,Park,Swim School,Yoga Studio,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden
1,45,M4P,Central Toronto,Davisville North,43.712751,-79.390197,1,Hotel,Food & Drink Shop,Gym,Department Store,Park,Breakfast Spot,Sandwich Place,Farmers Market,Fast Food Restaurant,Flower Shop
2,46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,1,Clothing Store,Coffee Shop,Salon / Barbershop,Gift Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Miscellaneous Shop,Dessert Shop,Park
3,47,M4S,Central Toronto,Davisville,43.704324,-79.38879,1,Dessert Shop,Sandwich Place,Pizza Place,Café,Sushi Restaurant,Coffee Shop,Gym,Italian Restaurant,Gas Station,Diner
4,48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2,Playground,Fried Chicken Joint,Discount Store,Donut Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Yoga Studio,Home Service


**Creating a map of the clusters**

In [45]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [46]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(central_toronto_merged['Latitude'], central_toronto_merged['Longitude'], central_toronto_merged['Neighborhood'], central_toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# 7. Let's explore the clusters

In [47]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 0, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Postalcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,-79.38879,0,Bus Line,Park,Swim School,Yoga Studio,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


In [48]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 1, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Postalcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M4P,-79.390197,1,Hotel,Food & Drink Shop,Gym,Department Store,Park,Breakfast Spot,Sandwich Place,Farmers Market,Fast Food Restaurant,Flower Shop
2,M4R,-79.405678,1,Clothing Store,Coffee Shop,Salon / Barbershop,Gift Shop,Fast Food Restaurant,Mexican Restaurant,Diner,Miscellaneous Shop,Dessert Shop,Park
3,M4S,-79.38879,1,Dessert Shop,Sandwich Place,Pizza Place,Café,Sushi Restaurant,Coffee Shop,Gym,Italian Restaurant,Gas Station,Diner
5,M4V,-79.400049,1,Coffee Shop,Pub,American Restaurant,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Restaurant,Pizza Place
8,M5R,-79.405678,1,Sandwich Place,Café,Coffee Shop,American Restaurant,History Museum,Indian Restaurant,Liquor Store,Middle Eastern Restaurant,Donut Shop,Park


In [49]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 2, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Postalcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M4T,-79.38316,2,Playground,Fried Chicken Joint,Discount Store,Donut Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Yoga Studio,Home Service


In [50]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 3, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Postalcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M5N,-79.416936,3,Garden,Music Venue,Home Service,Fried Chicken Joint,Donut Shop,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Yoga Studio


In [51]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 4, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Postalcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,M5P,-79.411307,4,Trail,Park,Sushi Restaurant,Jewelry Store,Yoga Studio,Fried Chicken Joint,Donut Shop,Farmers Market,Fast Food Restaurant,Flower Shop
