# The Battle of the Neighborhoods  
### This project aims to analyze the neighborhoods within New York, USA and Toronto, Canada. We will be comparing the frequency and distribution of venues in each area to find the similarities between the two.

Import relevant Python tools and modules

In [1]:
from bs4 import BeautifulSoup
!pip install requests
import requests
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
!pip install folium
import folium
!pip install geopy
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
from pandas.io.json import json_normalize



Retrieve information from New York dataset

In [2]:
with open('ny_neighborhood.json') as x:
    ny = json.load(x)
    
nyneighborhoods = ny['features']

#Label columns as City, Borough, Neighborhood, Latitude, Longitude
column_names = ['City','Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
ny_neighborhoods = pd.DataFrame(columns=column_names)

for data in nyneighborhoods:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    ny_neighborhoods = ny_neighborhoods.append({'City': 'New York',
                                          'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
ny_neighborhoods.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude
0,New York,Bronx,Wakefield,40.894705,-73.847201
1,New York,Bronx,Co-op City,40.874294,-73.829939
2,New York,Bronx,Eastchester,40.887556,-73.827806
3,New York,Bronx,Fieldston,40.895437,-73.905643
4,New York,Bronx,Riverdale,40.890834,-73.912585


Retrieve information from Toronto dataset

In [3]:
List_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(List_url).text
soup = BeautifulSoup(source, 'xml')
table=soup.find('table')

In [4]:
column_names = ['Postalcode','Borough','Neighborhood']
df = pd.DataFrame(columns = column_names)
for tr_cell in table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        df.loc[len(df)] = row_data 
df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
df=df[df['Borough']!='Not assigned']
x1=df.groupby('Postalcode')['Neighborhood'].apply(lambda x: "%s" % ', '.join(x))
x1=x1.reset_index(drop=False)
x1.rename(columns={'Neighborhood':'Neighborhood_joined'},inplace=True)
m = pd.merge(df, x1, on='Postalcode')
m.drop(['Neighborhood'],axis=1,inplace=True)
m.drop_duplicates(inplace=True)
m.rename(columns={'Neighborhood_joined':'Neighborhood'},inplace=True)
m.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Use Geocoder package to retrieve latitude and longitude coordinates for Toronto dataset

In [6]:
def get_geocode(postal_code):
    # initialize variable to 'None'
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude,longitude

In [7]:
geo=pd.read_csv('https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv')
geo.rename(columns={'Postal Code':'Postalcode'},inplace=True)
geom = pd.merge(geo, m, on='Postalcode')
toronto=geom[['Postalcode','Borough','Neighborhood','Latitude','Longitude']]
toronto.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Insert Foursquare credentials

In [40]:
ID= 'S0K0BZIKTCOBIPHLZ4XXADOL1G5FWWDSWUQC30E2ARVOTS5R'
secret= 'IQTKWREDOR4FSKTL2OD42EZEHBWVHYIGJLPWS4Q0AZ3UBT3Z'
version= '20201102'
limit=50

Create function to locate venues within the neighborhood (search via Foursquare API)

In [41]:
def getNearbyVenues(nborhood, radius=1000):
    
    venues=pd.DataFrame(columns=['City','Borough','Neighborhood','VenueName','VenueCategory'])
    for city, borough, neighborhood, lat, lng in zip(nborhood['City'], nborhood['Borough'], nborhood['Neighborhood'], nborhood['Latitude'], nborhood['Longitude']):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            ID, 
            secret, 
            version, 
            lat, 
            lng, 
            radius, 
            limit)
        
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        if results is not None:
            for v in results:
                venues = venues.append({'City':city, 'Borough':borough, 'Neighborhood': neighborhood, 
                           'VenueName': v['venue']['name'], 
                           'VenueCategory': v['venue']['categories'][0]['name']},ignore_index=True)

   
    
    return(venues)

Search for venues within the New York neighborhood

In [42]:
ny_venues = getNearbyVenues(ny_neighborhoods)

In [43]:
ny_venues = ny_venues.rename(columns={'Neighborhood': 'NeighborhoodName'})
ny_venues.head()

Unnamed: 0,City,Borough,NeighborhoodName,VenueName,VenueCategory
0,New York,Bronx,Wakefield,Lollipops Gelato,Dessert Shop
1,New York,Bronx,Wakefield,Ripe Kitchen & Bar,Caribbean Restaurant
2,New York,Bronx,Wakefield,Jackie's West Indian Bakery,Caribbean Restaurant
3,New York,Bronx,Wakefield,Ali's Roti Shop,Caribbean Restaurant
4,New York,Bronx,Wakefield,Carvel Ice Cream,Ice Cream Shop


Search for venues within the Toroto neighborhood

In [48]:
toronto_neighborhoods= pd.DataFrame(columns = ['City','Borough','Neighborhood'])

for ii in range(toronto.shape[0]):
    borough = toronto.loc[ii,'Borough']
    postalcode = toronto.loc[ii,'Postalcode']
    neighborhood = toronto.loc[ii,'Neighborhood'][:-1] #delete the last character \n
    
    #if the neighborhood name is not assigned, than the neighorhood name is same as borough
    if neighborhood == 'Not assigned':
        neighborhood = borough
    
    #find the location data, ignore the neighborhoods that are unable to be located by Nominatim
    geolocator = Nominatim(user_agent="mycapstoneproject")
    location = geolocator.geocode("{},{},Toronto,Ontario,Canada".format(neighborhood,borough))
    
    #try one more searching
    if location is None: 
        location = geolocator.geocode("{},Toronto,Ontario,Canada".format(neighborhood))
        
    if location is None: 
        print("The location data of {} in {} is not available!".format(neighborhood,borough))
    else:
        toronto_neighborhoods = toronto_neighborhoods.append({'City': 'Toronto',
                                                'Borough': borough,
                                                'Neighborhood': neighborhood,
                                                'Latitude': location.latitude,
                                                'Longitude': location.longitude
                                               }, ignore_index=True)

The location data of Malvern, Roug in Scarborough is not available!
The location data of Rouge Hill, Port Union, Highland Cree in Scarborough is not available!
The location data of Guildwood, Morningside, West Hil in Scarborough is not available!
The location data of Wobur in Scarborough is not available!
The location data of Scarborough Villag in Scarborough is not available!
The location data of Kennedy Park, Ionview, East Birchmount Par in Scarborough is not available!
The location data of Golden Mile, Clairlea, Oakridg in Scarborough is not available!
The location data of Cliffside, Cliffcrest, Scarborough Village Wes in Scarborough is not available!
The location data of Birch Cliff, Cliffside Wes in Scarborough is not available!
The location data of Dorset Park, Wexford Heights, Scarborough Town Centr in Scarborough is not available!
The location data of Wexford, Maryval in Scarborough is not available!
The location data of Agincour in Scarborough is not available!
The location da

The location data of Northwest, West Humber - Clairvill in Etobicoke is not available!


In [49]:
toronto_neighborhoods.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude
0,Toronto,Scarborough,Cedarbra,43.756467,-79.226692
1,Toronto,North York,Parkwood,43.687138,-79.410195
2,Toronto,North York,Don Mill,43.752003,-79.404199
3,Toronto,North York,Don Mill,43.752003,-79.404199
4,Toronto,Downtown Toronto,Christi,43.668268,-79.309689


In [50]:
toronto_venues = getNearbyVenues(toronto_neighborhoods)

In [83]:
toronto_venues = toronto_venues.rename(columns={'Neighborhood': 'NeighborhoodName'})
toronto_venues.head()

Unnamed: 0,City,Borough,NeighborhoodName,VenueName,VenueCategory
0,Toronto,Scarborough,Cedarbra,Windies Restaurant & Sports Bar,Pub
1,Toronto,Scarborough,Cedarbra,GoodLife Fitness Scarborough Cedarbrae Mall,Gym
2,Toronto,Scarborough,Cedarbra,Shoppers Drug Mart,Pharmacy
3,Toronto,Scarborough,Cedarbra,Charcoal Kebab House,Xinjiang Restaurant
4,Toronto,Scarborough,Cedarbra,Staples Cedarbrae,Paper / Office Supplies Store


Concatenate the data from New York venues and Toronto venues dataset  
We also replace the 'Neighborhood' column with 'NeighborhoodName' to prevent a multi-index conflict

In [53]:
allvenues =  pd.concat([ny_venues,toronto_venues])
allvenues = allvenues.rename(columns={'Neighborhood': 'NeighborhoodName'})
allvenues.head()

Unnamed: 0,City,Borough,NeighborhoodName,VenueName,VenueCategory
0,New York,Bronx,Wakefield,Lollipops Gelato,Dessert Shop
1,New York,Bronx,Wakefield,Ripe Kitchen & Bar,Caribbean Restaurant
2,New York,Bronx,Wakefield,Jackie's West Indian Bakery,Caribbean Restaurant
3,New York,Bronx,Wakefield,Ali's Roti Shop,Caribbean Restaurant
4,New York,Bronx,Wakefield,Carvel Ice Cream,Ice Cream Shop


Concatenate the data from New York neighborhoods and Toronto neighborhoods dataset

In [52]:
allneighborhoods =  pd.concat([ny_neighborhoods,toronto_neighborhoods])
allneighborhoods = allneighborhoods.rename(columns={'Neighborhood': 'NeighborhoodName'})
allneighborhoods.head()

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude
0,New York,Bronx,Wakefield,40.894705,-73.847201
1,New York,Bronx,Co-op City,40.874294,-73.829939
2,New York,Bronx,Eastchester,40.887556,-73.827806
3,New York,Bronx,Fieldston,40.895437,-73.905643
4,New York,Bronx,Riverdale,40.890834,-73.912585


Organize all venue types into separate columns

In [55]:
allvenues_onehot = pd.get_dummies(allvenues, columns = ['VenueCategory'], prefix="", prefix_sep="")
allvenues_onehot = allvenues_onehot.drop('VenueName',axis = 1)

In [57]:
allvenues_grouped = allvenues_onehot.groupby('NeighborhoodName',axis = 0).sum().reset_index()
allvenues_grouped = allneighborhoods.join(allvenues_grouped.set_index('NeighborhoodName'), on='NeighborhoodName')

#drop the rows with NaN (no venues information)
allvenues_grouped.dropna(inplace=True)
allvenues_grouped.head()

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Lounge,...,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,New York,Bronx,Wakefield,40.894705,-73.847201,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,New York,Bronx,Co-op City,40.874294,-73.829939,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,New York,Bronx,Eastchester,40.887556,-73.827806,0,0,0,0,0,...,0,0,1,0,1,1,0,0,0,0
3,New York,Bronx,Fieldston,40.895437,-73.905643,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,New York,Bronx,Riverdale,40.890834,-73.912585,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


Create a function that will return the top 10 most common venues within each neighborhood

In [58]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

columns = ['City','Borough','NeighborhoodName','Latitude','Longitude','Total Number of Venues']
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
allvenues_sorted = pd.DataFrame(columns = columns)
#allvenues_sorted = allvenues_grouped[['City','Borough','NeighborhoodName']]

for ind in range(allvenues_grouped.shape[0]):
    allvenues_sorted.loc[ind, 'City'] = allvenues_grouped.iloc[ind].City
    allvenues_sorted.loc[ind, 'Borough'] = allvenues_grouped.iloc[ind].Borough
    allvenues_sorted.loc[ind, 'NeighborhoodName'] = allvenues_grouped.iloc[ind].NeighborhoodName
    allvenues_sorted.loc[ind, 'Latitude'] = allvenues_grouped.iloc[ind].Latitude
    allvenues_sorted.loc[ind, 'Longitude'] = allvenues_grouped.iloc[ind].Longitude
    allvenues_sorted.loc[ind, 'Total Number of Venues'] = allvenues_grouped.iloc[ind,5:].sum()
    allvenues_sorted.iloc[ind, 6:] = return_most_common_venues(allvenues_grouped.iloc[ind, 5:], num_top_venues)

allvenues_sorted.head()

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Total Number of Venues,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,New York,Bronx,Wakefield,40.8947,-73.8472,47,Pharmacy,Pizza Place,Fried Chicken Joint,Donut Shop,Supermarket,Caribbean Restaurant,Fast Food Restaurant,Gas Station,Mobile Phone Shop,Bank
1,New York,Bronx,Co-op City,40.8743,-73.8299,50,Shopping Mall,Mobile Phone Shop,Kids Store,Discount Store,Shoe Store,Pharmacy,Mexican Restaurant,Mattress Store,Supermarket,Donut Shop
2,New York,Bronx,Eastchester,40.8876,-73.8278,50,Caribbean Restaurant,Fast Food Restaurant,Shopping Mall,Diner,Asian Restaurant,Supplement Shop,Mobile Phone Shop,Discount Store,Burger Joint,Donut Shop
3,New York,Bronx,Fieldston,40.8954,-73.9056,46,Deli / Bodega,Pizza Place,Bar,Plaza,Bank,Sandwich Place,Mexican Restaurant,Coffee Shop,Medical Supply Store,Park
4,New York,Bronx,Riverdale,40.8908,-73.9126,44,Bar,Bank,Pizza Place,Playground,Plaza,Coffee Shop,Park,Mexican Restaurant,Japanese Restaurant,Deli / Bodega


Organize the neighborhoods into clusters

In [60]:
import scipy.cluster.hierarchy as shc
import matplotlib.pyplot as plt

data = allvenues_grouped.iloc[:,6:]
plt.figure(figsize=(10, 7))   
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Neighborhoods')
plt.ylabel('Distance')
plt.axhline(y=31, c='k')
dend = shc.dendrogram(shc.linkage(data, method='ward'))

In [61]:
from sklearn.cluster import AgglomerativeClustering

cluster = AgglomerativeClustering(n_clusters=9, affinity='euclidean', linkage='ward')  
clusterresult = cluster.fit_predict(data)

In [62]:
allvenues_sorted['Cluster_Labels'] = clusterresult

Create a function to convert the clusters into a visual map

In [71]:
def clustermap(cityname,countryname,dataframe):
    # create map
    address = cityname + ',' + countryname

    geolocator = Nominatim(user_agent="coursera_capstone_project")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    my_map = folium.Map(location=[latitude, longitude], zoom_start=10)

    # set color scheme for the Cluster_Labels
    colnum = 9
    x = np.arange(colnum)
    ys = [i+x+(i*x)**2 for i in range(colnum)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    # add markers to the map

    for lat, lon, neighborhood, borough, cluster_labels in zip(dataframe['Latitude'], 
                                                               dataframe['Longitude'], 
                                                               dataframe['NeighborhoodName'], 
                                                               dataframe['Borough'],
                                                               dataframe['Cluster_Labels']):
        cluster = cluster_labels 
        label = '{}, {},Cluster:{}'.format(neighborhood, borough,cluster_labels)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[cluster],
            fill=True,
            fill_color=rainbow[cluster],
            fill_opacity=0.7).add_to(my_map)
       
    
    return my_map

Determine the number of neighborhoods within each cluster

In [64]:
for ii in range(9):
    num = allvenues_sorted[allvenues_sorted['Cluster_Labels'] == ii].shape[0]
    print('Total number of neighborhoods in cluster {} is {}'.format(ii, num))

Total number of neighborhoods in cluster 0 is 69
Total number of neighborhoods in cluster 1 is 30
Total number of neighborhoods in cluster 2 is 6
Total number of neighborhoods in cluster 3 is 22
Total number of neighborhoods in cluster 4 is 69
Total number of neighborhoods in cluster 5 is 2
Total number of neighborhoods in cluster 6 is 2
Total number of neighborhoods in cluster 7 is 51
Total number of neighborhoods in cluster 8 is 60


Display the clusters as a table and visual map

In [69]:
allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 0]

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Total Number of Venues,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
1,New York,Bronx,Co-op City,40.8743,-73.8299,50,Shopping Mall,Mobile Phone Shop,Kids Store,Discount Store,Shoe Store,Pharmacy,Mexican Restaurant,Mattress Store,Supermarket,Donut Shop,0
3,New York,Bronx,Fieldston,40.8954,-73.9056,46,Deli / Bodega,Pizza Place,Bar,Plaza,Bank,Sandwich Place,Mexican Restaurant,Coffee Shop,Medical Supply Store,Park,0
4,New York,Bronx,Riverdale,40.8908,-73.9126,44,Bar,Bank,Pizza Place,Playground,Plaza,Coffee Shop,Park,Mexican Restaurant,Japanese Restaurant,Deli / Bodega,0
7,New York,Bronx,Woodlawn,40.8983,-73.8673,50,Bar,Pizza Place,Pub,Deli / Bodega,Pharmacy,Discount Store,Baseball Field,Bank,Donut Shop,Caribbean Restaurant,0
10,New York,Bronx,Baychester,40.8669,-73.8358,50,Clothing Store,Department Store,Shopping Mall,Discount Store,Cosmetics Shop,Pharmacy,Supermarket,Spanish Restaurant,Shoe Store,Donut Shop,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
262,New York,Brooklyn,Mill Basin,40.616,-73.9152,50,Clothing Store,Pizza Place,Snack Place,Japanese Restaurant,Lingerie Store,Deli / Bodega,Cosmetics Shop,Convenience Store,Italian Restaurant,Pet Store,0
264,New York,Queens,Utopia,40.7335,-73.7967,50,Pizza Place,Japanese Restaurant,Automotive Shop,Sandwich Place,Ice Cream Shop,Coffee Shop,Hotel,Mattress Store,Bank,Bar,0
265,New York,Queens,Pomonok,40.7349,-73.8049,44,Deli / Bodega,Pizza Place,Automotive Shop,Basketball Court,Bus Station,Chinese Restaurant,Food Truck,Bar,Halal Restaurant,Grocery Store,0
284,New York,Staten Island,Manor Heights,40.6018,-74.1206,34,Chinese Restaurant,Liquor Store,Deli / Bodega,Bagel Shop,Donut Shop,Bus Stop,Pharmacy,Go Kart Track,Ice Cream Shop,Gym / Fitness Center,0


The cluster maps of New York and Toronto must be displayed separately

In [84]:
cluster0 = allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 0]
clustermap('New York','USA',cluster0[cluster0['City'] == 'New York'])

In [85]:
clustermap('Toronto','Canada',cluster0[cluster0['City'] == 'Toronto'])

In [98]:
allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 6]

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Total Number of Venues,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
308,Toronto,North York,Don Mill,43.752,-79.4042,78,Coffee Shop,Park,Thai Restaurant,Gym,Restaurant,Intersection,Bubble Tea Shop,Bus Station,Fast Food Restaurant,Gym / Fitness Center,6
309,Toronto,North York,Don Mill,43.752,-79.4042,78,Coffee Shop,Park,Thai Restaurant,Gym,Restaurant,Intersection,Bubble Tea Shop,Bus Station,Fast Food Restaurant,Gym / Fitness Center,6


In [99]:
cluster6 = allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 6]
clustermap('New York','USA',cluster6[cluster6['City'] == 'New York'])

In [100]:
clustermap('Toronto','Canada',cluster6[cluster6['City'] == 'Toronto'])

In [74]:
allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 4]

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Total Number of Venues,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
12,New York,Bronx,City Island,40.8472,-73.7865,47,Harbor / Marina,Seafood Restaurant,Boat or Ferry,Italian Restaurant,Bar,Thrift / Vintage Store,American Restaurant,Park,Theater,History Museum,4
46,New York,Brooklyn,Bay Ridge,40.6258,-74.0306,50,Pizza Place,Spa,Italian Restaurant,Cosmetics Shop,Grocery Store,Greek Restaurant,American Restaurant,Hookah Bar,Bar,Bakery,4
58,New York,Brooklyn,Windsor Terrace,40.6569,-73.9801,50,Park,Café,Bar,Wine Shop,Italian Restaurant,Grocery Store,French Restaurant,Playground,Plaza,Deli / Bodega,4
61,New York,Brooklyn,Williamsburg,40.7071,-73.9581,50,Pizza Place,Coffee Shop,Wine Bar,Cocktail Bar,South American Restaurant,Liquor Store,Bike Shop,Taco Place,Steakhouse,Bagel Shop,4
64,New York,Brooklyn,Brooklyn Heights,40.6959,-73.9938,50,Park,Yoga Studio,Scenic Lookout,Pier,Coffee Shop,Wine Shop,Cocktail Bar,Gym,Italian Restaurant,Japanese Restaurant,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,New York,Brooklyn,Vinegar Hill,40.7033,-73.9811,50,Park,Gym,Antique Shop,Yoga Studio,Café,Bakery,Bar,Boxing Gym,Art Gallery,Bookstore,4
283,New York,Brooklyn,Dumbo,40.7032,-73.9888,50,Park,Bakery,Boxing Gym,Art Gallery,Gym,Coffee Shop,Bookstore,Ice Cream Shop,Yoga Studio,Breakfast Spot,4
301,New York,Manhattan,Hudson Yards,40.7567,-74.0001,50,Gym / Fitness Center,Theater,Hotel,Coffee Shop,Peruvian Restaurant,American Restaurant,Gym,Italian Restaurant,Art Gallery,Thai Restaurant,4
304,New York,Queens,Queensbridge,40.7561,-73.9456,50,Coffee Shop,Hotel,Bar,Brewery,Café,Restaurant,Bubble Tea Shop,Park,Greek Restaurant,Italian Restaurant,4


In [86]:
cluster4= allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 4]
clustermap('New York','USA',cluster4[cluster4['City'] == 'New York'])

In [88]:
clustermap('Toronto','Canada',cluster4[cluster4['City'] == 'Toronto'])

In [76]:
allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 7]

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Total Number of Venues,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
19,New York,Bronx,High Bridge,40.8366,-73.9261,39,Lounge,Baseball Stadium,Plaza,Park,Sandwich Place,Historic Site,Baseball Field,Gym,BBQ Joint,History Museum,7
22,New York,Bronx,Port Morris,40.8017,-73.9132,40,Baseball Field,Donut Shop,Pizza Place,Furniture / Home Store,Gym,Grocery Store,Deli / Bodega,Health Food Store,Peruvian Restaurant,Performing Arts Venue,7
24,New York,Bronx,Hunts Point,40.8097,-73.8833,30,Park,Food,Nightclub,Grocery Store,Fast Food Restaurant,Gourmet Shop,Market,Bakery,Bank,Mexican Restaurant,7
27,New York,Bronx,Clason Point,40.8066,-73.8541,8,Park,River,Deli / Bodega,Gym / Fitness Center,Pool,Bus Station,Discount Store,Zoo Exhibit,Financial or Legal Service,Fish Market,7
40,New York,Bronx,Castle Hill,40.819,-73.848,38,Deli / Bodega,Supermarket,Pizza Place,Bus Station,Park,Bus Stop,Japanese Restaurant,Photography Studio,Baseball Field,Mexican Restaurant,7
72,New York,Brooklyn,East New York,40.6699,-73.8807,27,Pizza Place,Supermarket,Chinese Restaurant,Pharmacy,Deli / Bodega,Playground,Spanish Restaurant,Caribbean Restaurant,Grocery Store,Metro Station,7
76,New York,Brooklyn,Mill Island,40.6063,-73.9082,25,Pizza Place,Harbor / Marina,Gym,Stables,Park,Tourist Information Center,Hardware Store,Entertainment Service,Bakery,Golf Course,7
85,New York,Brooklyn,Sea Gate,40.5764,-74.0079,15,Beach,Supermarket,Donut Shop,Chinese Restaurant,Park,Pharmacy,Home Service,Pizza Place,Basketball Court,Spa,7
91,New York,Brooklyn,Bergen Beach,40.6151,-73.8986,10,Sushi Restaurant,American Restaurant,Stables,Bus Station,Peruvian Restaurant,Pizza Place,Liquor Store,Supermarket,Deli / Bodega,Italian Restaurant,7
148,New York,Queens,South Ozone Park,40.6685,-73.8099,44,Hotel,Pizza Place,Deli / Bodega,Donut Shop,Food,Gym,Park,Ice Cream Shop,Caribbean Restaurant,Intersection,7


In [89]:
cluster7= allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 7]
clustermap('New York','USA',cluster7[cluster7['City'] == 'New York'])

In [90]:
clustermap('Toronto','Canada',cluster7[cluster7['City'] == 'Toronto'])

In [78]:
allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 8]

Unnamed: 0,City,Borough,NeighborhoodName,Latitude,Longitude,Total Number of Venues,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
5,New York,Bronx,Kingsbridge,40.8817,-73.9028,50,Pizza Place,Mexican Restaurant,Japanese Restaurant,Donut Shop,Bar,Burger Joint,Spanish Restaurant,Coffee Shop,Bakery,Diner,8
6,New York,Manhattan,Marble Hill,40.8766,-73.9107,50,Pizza Place,Mexican Restaurant,Café,Sandwich Place,Coffee Shop,Gym,Discount Store,Park,Spanish Restaurant,Chinese Restaurant,8
8,New York,Bronx,Norwood,40.8772,-73.8794,50,Pizza Place,Park,Donut Shop,Sandwich Place,Pharmacy,Bank,Mexican Restaurant,Diner,Gym,Grocery Store,8
11,New York,Bronx,Pelham Parkway,40.8574,-73.8548,50,Pizza Place,Bank,Donut Shop,Italian Restaurant,Sandwich Place,Coffee Shop,Bakery,Cosmetics Shop,Pharmacy,Gas Station,8
13,New York,Bronx,Bedford Park,40.8702,-73.8855,50,Pizza Place,Diner,Park,Sandwich Place,Botanical Garden,Grocery Store,Gym,Garden,Mexican Restaurant,Chinese Restaurant,8
17,New York,Bronx,East Tremont,40.8427,-73.8874,50,Pizza Place,Donut Shop,Zoo Exhibit,Park,Sandwich Place,Bank,Breakfast Spot,Mobile Phone Shop,Lounge,Fast Food Restaurant,8
18,New York,Bronx,West Farms,40.8395,-73.8777,49,Pizza Place,Donut Shop,Park,Zoo,Zoo Exhibit,Fast Food Restaurant,Supermarket,Sandwich Place,Grocery Store,Lounge,8
20,New York,Bronx,Melrose,40.8198,-73.9094,50,Mexican Restaurant,Pizza Place,Sandwich Place,Pharmacy,Gym,Grocery Store,Kids Store,Fried Chicken Joint,Diner,Donut Shop,8
21,New York,Bronx,Mott Haven,40.8062,-73.9161,50,Pizza Place,Mexican Restaurant,Donut Shop,Gym,Chinese Restaurant,Discount Store,Furniture / Home Store,Italian Restaurant,Speakeasy,Grocery Store,8
30,New York,Bronx,Parkchester,40.8379,-73.856,50,Pizza Place,Latin American Restaurant,Caribbean Restaurant,Diner,Gym,Donut Shop,Kids Store,Mexican Restaurant,Spanish Restaurant,Supermarket,8


In [91]:
cluster8= allvenues_sorted[allvenues_sorted['Cluster_Labels'] == 8]
clustermap('New York','USA',cluster8[cluster8['City'] == 'New York'])

In [92]:
clustermap('Toronto','Canada',cluster8[cluster8['City'] == 'Toronto'])

## The results show that New York has significantly more neighborhoods per cluster compared to Toronto. However, Toronto has about the same, if not more, venues per neighborhood compared to New York. This shows that while New York does cover a wider range, Toronto may be equal in terms of business.