# Battle of Neighbours_ Toronto

## import necessary packages

In [1]:
# import necessary packages
import numpy as np
import pandas as pd 
import json
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import folium
from bs4 import BeautifulSoup

## importing the data from URL

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

req = requests.get(url)
soup = BeautifulSoup(req.content, 'lxml')
table_classes = {"class": ["sortable", "plainrowheaders"]}
wikitables = soup.findAll("table", table_classes)
wikitables

[<table class="wikitable sortable">
 <tbody><tr>
 <th>Postal Code
 </th>
 <th>Borough
 </th>
 <th>Neighborhood
 </th></tr>
 <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>
 <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>
 <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park, Harbourfront
 </td></tr>
 <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor, Lawrence Heights
 </td></tr>
 <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park, Ontario Provincial Government
 </td></tr>
 <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue, Humber Valley Village
 </td></tr>
 <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern, Rouge
 </td></tr>
 <tr>
 <td>M2B


## converting data to Pandas dataframe

In [3]:
for table in wikitables:
    ths = table.find_all('th')
    headings = [th.text.strip() for th in ths]
    if headings[:3] == ['Postcode', 'Borough', "Neighbourhood"]:
        break
with open('list_of_postal_codes_of_canada.txt', 'w') as fo:
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if not tds:
            continue
        PostalCode, Borough, Neighborhood = [td.text.strip() for td in tds[:3]]
        
        print('; '.join([PostalCode, Borough, Neighborhood]), file=fo)
        
df = pd.read_csv('list_of_postal_codes_of_canada.txt', sep = ';', header = None, names = ['PostalCode', 'Borough', 'Neighborhood'])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## filtering the "not assigned" cells from the Borough column

In [4]:
import re
bl = []

for result in df.Borough:
    if not re.search('Not assigned', result):
        bl.append(True)
    else:
        bl.append(False)

Filtered = pd.Series(bl)

tor_data = df[Filtered].reset_index(drop = True)

tor_data.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## combining the Neighbourhoods  one row with the neighborhoods separated with a comma

In [5]:
new_cd = tor_data.astype(str).groupby('PostalCode')['Neighborhood'].agg(','.join).reset_index()
merged_cd = pd.merge(tor_data, new_cd, on = ['PostalCode'], how = 'inner')
duplicate_cd = merged_cd.drop(['Neighborhood_x'], axis = 1)
final_cd = duplicate_cd.drop_duplicates(subset='PostalCode')
final_cd.columns = ['PostalCode', 'Borough', 'Neighborhood']
final_cd = final_cd.reset_index(drop = True)

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
bl = []

for result in final_cd.Neighborhood:
    if not re.search('Not assigned', result):
        bl.append(True)
    else:
        bl.append(False)

Filter = pd.Series(bl)

X = final_cd['Neighborhood'].where(bl, np.nan)
X = pd.Series(X)
final_cd = final_cd.drop(['Neighborhood'], axis = 1)

final_cd['Neighborhood'] = X.values


final_cd["Neighborhood"] = final_cd["Neighborhood"].fillna(final_cd["Borough"])
final_cd.head()
final_cd.shape

(103, 3)

## Create a dataframe of the latitude and longitudes of the Toronto Neighborhoods

In [6]:
#Create a dataframe of the latitude and longitudes of the Toronto Neighborhoods
latlong = pd.read_csv("http://cocl.us/Geospatial_data")

latlong.rename(columns = {"Postal Code": "PostalCode"}, inplace = True)

#Join the Lat and Long dataframe to Neighborhoods dataframe
final_cd.set_index("PostalCode")
latlong.set_index("PostalCode")
neighbor=pd.merge(final_cd, latlong)
neighbor

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South ...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, H...",43.636258,-79.498509


## Use geopy library to get the latitude and longitude values of Toronto, Canada

In [7]:
address = 'Toronto, CA'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighbor['Latitude'], neighbor['Longitude'], neighbor['Borough'], neighbor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## segment and cluster only the neighborhoods in Toronto, the Downtown Toronto Neighborhood.
## and Create a Map of Downtown Toronto Neighborhood

In [9]:
neighbor['Borough'][102]==' Scarborough'

False

In [10]:
Downtown_Toronto_data = neighbor[neighbor['Borough'] == ' Downtown Toronto'].reset_index(drop=False)
Downtown_Toronto_data

Unnamed: 0,index,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
8,36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Isl...",43.640816,-79.381752
9,42,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576


In [11]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

  This is separate from the ipykernel package so we can avoid doing imports until


In [12]:
# create map of Scarborough using latitude and longitude values
Map_Downtown_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Downtown_Toronto_data['Latitude'], Downtown_Toronto_data['Longitude'], Downtown_Toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Map_Downtown_Toronto)  
    
Map_Downtown_Toronto

## Explore Toronto Nearby: downtown using the Foursquare API

In [13]:
#My Foursquare Credentials 
CLIENT_ID = 'UBTZDNXN5KXSJDDOTMJDZO1IY34EKDQMWTXYEFKEEGJQRL1I' # your Foursquare ID
CLIENT_SECRET = 'QPGXI2RYN4OCY24R4TUARVKPLLAZMYNNKL03BV0HK3MSLXSS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#first neighborhood(s) of my dataframe
neighborhood_latitude = Downtown_Toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Downtown_Toronto_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = Downtown_Toronto_data.loc[0, 'Neighborhood'] # neighborhood name

#Get the top 100 venues around this neighborhood with radius of 1000 meters
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

#send requests
results = requests.get(url).json()

In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Impact Kitchen,Restaurant,43.656369,-79.35698
3,The Distillery Historic District,Historic Site,43.650244,-79.359323
4,Corktown Common,Park,43.655618,-79.356211


In [16]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [17]:
# define a function to repeat the process in downtown
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
downtown_venues = getNearbyVenues(names=Downtown_Toronto_data['Neighborhood'],
                                   latitudes=Downtown_Toronto_data['Latitude'],
                                   longitudes=Downtown_Toronto_data['Longitude']
                                  )

 Regent Park, Harbourfront
 Queen's Park, Ontario Provincial Government
 Garden District, Ryerson
 St. James Town
 Berczy Park
 Central Bay Street
 Christie
 Richmond, Adelaide, King
 Harbourfront East, Union Station, Toronto Islands
 Toronto Dominion Centre, Design Exchange
 Commerce Court, Victoria Hotel
 University of Toronto, Harbord
 Kensington Market, Chinatown, Grange Park
 CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
 Rosedale
 Stn A PO Boxes
 St. James Town, Cabbagetown
 First Canadian Place, Underground city
 Church and Wellesley


## Analysis each neighborhood

In [19]:
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
# mean of frquence
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Ha...",0.0,0.0,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.025316,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,...,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0
5,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
6,"First Canadian Place, Underground city",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
8,"Harbourfront East, Union Station, Toronto Isl...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0
9,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.051724,0.0,0.051724,0.017241,0.0,0.0


In [21]:
## Create a dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Restaurant,Beer Bar,Seafood Restaurant,Bakery,Café,Farmers Market,Gourmet Shop
1,"CN Tower, King and Spadina, Railway Lands, Ha...",Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Bar,Rental Car Location,Plane,Boat or Ferry,Coffee Shop,Harbor / Marina
2,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Salad Place,Japanese Restaurant,Department Store,Burger Joint,Bubble Tea Shop,Poke Place
3,Christie,Grocery Store,Café,Park,Athletics & Sports,Italian Restaurant,Diner,Nightclub,Candy Store,Restaurant,Baby Store
4,Church and Wellesley,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Gay Bar,Restaurant,Yoga Studio,Burger Joint,Men's Store,Smoke Shop,Café
5,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Seafood Restaurant,Japanese Restaurant,Italian Restaurant,Deli / Bodega
6,"First Canadian Place, Underground city",Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant,Gym,Asian Restaurant,Seafood Restaurant,Steakhouse,American Restaurant
7,"Garden District, Ryerson",Clothing Store,Coffee Shop,Café,Japanese Restaurant,Italian Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Bubble Tea Shop,Tea Room,Lingerie Store
8,"Harbourfront East, Union Station, Toronto Isl...",Coffee Shop,Aquarium,Hotel,Café,Brewery,Scenic Lookout,Sporting Goods Shop,Italian Restaurant,Restaurant,Fried Chicken Joint
9,"Kensington Market, Chinatown, Grange Park",Café,Coffee Shop,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,Bakery,Park,Dessert Shop,Bar,Grocery Store


## Clustering neighbors usinng KMeans 

In [23]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 4, 3, 2, 2, 2, 2, 4, 2])

In [25]:
# create a new dataframe
downtown_merged = Downtown_Toronto_data

# add clustering labels
downtown_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,index,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Theater,Gym / Fitness Center,Event Space,Performing Arts Venue
1,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Diner,Bar,Smoothie Shop,Beer Bar,Sandwich Place,Distribution Center,Discount Store,Yoga Studio
2,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,4,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Italian Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Bubble Tea Shop,Tea Room,Lingerie Store
3,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,Coffee Shop,Café,Cocktail Bar,Restaurant,American Restaurant,Gastropub,Department Store,Creperie,Lingerie Store,Cosmetics Shop
4,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Coffee Shop,Cocktail Bar,Cheese Shop,Restaurant,Beer Bar,Seafood Restaurant,Bakery,Café,Farmers Market,Gourmet Shop


In [26]:
# map visualization
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [27]:
#Cluster 1
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,-79.389494,0,Coffee Shop,Sushi Restaurant,Diner,Bar,Smoothie Shop,Beer Bar,Sandwich Place,Distribution Center,Discount Store,Yoga Studio


In [28]:
#CLuster 3
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,-79.360636,2,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Theater,Gym / Fitness Center,Event Space,Performing Arts Venue
4,Downtown Toronto,-79.373306,2,Coffee Shop,Cocktail Bar,Cheese Shop,Restaurant,Beer Bar,Seafood Restaurant,Bakery,Café,Farmers Market,Gourmet Shop
5,Downtown Toronto,-79.387383,2,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Salad Place,Japanese Restaurant,Department Store,Burger Joint,Bubble Tea Shop,Poke Place
6,Downtown Toronto,-79.422564,2,Grocery Store,Café,Park,Athletics & Sports,Italian Restaurant,Diner,Nightclub,Candy Store,Restaurant,Baby Store
7,Downtown Toronto,-79.384568,2,Coffee Shop,Café,Restaurant,Deli / Bodega,Clothing Store,Thai Restaurant,Hotel,Gym,Bakery,Bookstore
9,Downtown Toronto,-79.381576,2,Coffee Shop,Hotel,Café,Restaurant,Salad Place,Seafood Restaurant,Deli / Bodega,American Restaurant,Japanese Restaurant,Italian Restaurant
12,Downtown Toronto,-79.400049,2,Café,Coffee Shop,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,Bakery,Park,Dessert Shop,Bar,Grocery Store
14,Downtown Toronto,-79.377529,2,Park,Playground,Trail,Cupcake Shop,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store
16,Downtown Toronto,-79.367675,2,Coffee Shop,Pizza Place,Chinese Restaurant,Restaurant,Pub,Bakery,Italian Restaurant,Café,Butcher,Playground
17,Downtown Toronto,-79.38228,2,Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant,Gym,Asian Restaurant,Seafood Restaurant,Steakhouse,American Restaurant


In [29]:
#CLuster 4
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Downtown Toronto,-79.375418,3,Coffee Shop,Café,Cocktail Bar,Restaurant,American Restaurant,Gastropub,Department Store,Creperie,Lingerie Store,Cosmetics Shop


In [30]:
#CLuster 5
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[2] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,-79.378937,4,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Italian Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Bubble Tea Shop,Tea Room,Lingerie Store
8,Downtown Toronto,-79.381752,4,Coffee Shop,Aquarium,Hotel,Café,Brewery,Scenic Lookout,Sporting Goods Shop,Italian Restaurant,Restaurant,Fried Chicken Joint
10,Downtown Toronto,-79.379817,4,Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Seafood Restaurant,Japanese Restaurant,Italian Restaurant,Deli / Bodega
11,Downtown Toronto,-79.400049,4,Café,Japanese Restaurant,Bar,Italian Restaurant,Bookstore,Bakery,Restaurant,Sandwich Place,Beer Bar,Beer Store
15,Downtown Toronto,-79.374846,4,Coffee Shop,Café,Cocktail Bar,Italian Restaurant,Seafood Restaurant,Beer Bar,Japanese Restaurant,Restaurant,Lounge,Creperie


## Explore New York City Neighborhoods