# Comparing Toronto and New York Neighborhoods

In [1]:
#Import Libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 
# converts an address into latitude and longitude 
from geopy.geocoders import Nominatim 
import requests 
 # Converts JSON file into a pandas dataframe
from pandas.io.json import json_normalize


# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib import pyplot as plt

#For cluster analysis 
from sklearn.cluster import KMeans

!pip install folium
import folium 

from IPython.display import HTML
import time



# Import and Clean the Data

In [2]:
#Obtain Postal Code, Borough, and Neighborhood information from Wikipedia
table = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header = 0)

#Obtain the first table
df_toronto = table[0]
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [3]:
df_toronto.rename(columns = {"Postcode": "PostalCode", "Neighbourhood": "Neighborhood"}, inplace = True)

#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df_toronto.drop(df_toronto[df_toronto.Borough == 'Not assigned'].index, inplace=True)
#df.head()

#Combine the neighborhoods that exists in one postal code
df_toronto = df_toronto.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: ','.join(x)).reset_index()
#df.head()

#Change unassigned Neighborhood to its Borough's name
df_toronto.loc[85,'Neighborhood'] = 'Queen\'s Park'

print (df_toronto.shape)

df_toronto.tail()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."
102,M9W,Etobicoke,Northwest


In [4]:
#Create a dataframe of the latitude and longitudes of the Toronto Neighborhoods
latlong = pd.read_csv("http://cocl.us/Geospatial_data")
latlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [5]:
latlong.rename(columns = {"Postal Code": "PostalCode"}, inplace = True)
latlong.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
latlong.tail()

Unnamed: 0,PostalCode,Latitude,Longitude
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437
102,M9W,43.706748,-79.594054


In [7]:
#Join the two tables
df_toronto.set_index("PostalCode")
latlong.set_index("PostalCode")
toronto_neighbor=pd.merge(df_toronto, latlong)
toronto_neighbor.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.739416,-79.588437
102,M9W,Etobicoke,Northwest,43.706748,-79.594054


In [8]:
print('Toronto has {} boroughs and {} neighborhoods.'.format(
        len(toronto_neighbor['Borough'].unique()),
        toronto_neighbor.shape[0]
    )
)

Toronto has 11 boroughs and 103 neighborhoods.


In [9]:
#Next we will add the City to the Neighborhood to be able to identify it later from our New York List
toronto_neighbor['Neighborhood']=toronto_neighbor['Neighborhood'].add(', Toronto')
toronto_neighbor.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern, Toronto",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711
3,M1G,Scarborough,"Woburn, Toronto",43.770992,-79.216917
4,M1H,Scarborough,"Cedarbrae, Toronto",43.773136,-79.239476


In [10]:
address = 'Toronto, CA'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Toronto, Canada are 43.653963, -79.387207.


In [11]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_neighbor['Latitude'], toronto_neighbor['Longitude'], toronto_neighbor['Borough'], toronto_neighbor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='Purple',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Utilize the Foursquare API and create a function to generate a litst of nearby Venues

In [12]:
#Exploring with API
{
    "tags": [
        "hide_input",
    ]
}

CLIENT_ID = '1TQX3BIGE0AVURRWD1YKVRLYVZZA5A3MQ52OZOEDFZBKVMJW' # your Foursquare ID
CLIENT_SECRET = 'E513XDA24031SHHX4EBAU4G2PY0CUYXAASQZKNHNZ4LRX1XQ' # your Foursquare Secret
VERSION = '20200217' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1TQX3BIGE0AVURRWD1YKVRLYVZZA5A3MQ52OZOEDFZBKVMJW
CLIENT_SECRET:E513XDA24031SHHX4EBAU4G2PY0CUYXAASQZKNHNZ4LRX1XQ


In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            100)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    print('Found {} venues in {} neighborhoods.'.format(nearby_venues.shape[0], len(venues_list)))
    
    return(nearby_venues)

In [14]:
toronto_neighbor = getNearbyVenues(names=toronto_neighbor['Neighborhood'],
                                   latitudes=toronto_neighbor['Latitude'],
                                   longitudes=toronto_neighbor['Longitude'],
                                  )

Found 2236 venues in 103 neighborhoods.


In [15]:
#Remove duplicate neighborhoods
toronto_neighbor = toronto_neighbor.drop_duplicates('Neighborhood', keep='first')

In [16]:
print(toronto_neighbor.shape)
toronto_neighbor.head()

(100, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern, Toronto",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
12,"Woburn, Toronto",43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop
15,"Cedarbrae, Toronto",43.773136,-79.239476,Drupati's Roti & Doubles,43.775222,-79.241678,Caribbean Restaurant


In [18]:
#Venues per Neighborhood
toronto_neighbor.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond, Toronto",1,1,1,1,1,1
"Agincourt North,L'Amoreaux East,Milliken,Steeles East, Toronto",1,1,1,1,1,1
"Agincourt, Toronto",1,1,1,1,1,1
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown, Toronto",1,1,1,1,1,1
"Alderwood,Long Branch, Toronto",1,1,1,1,1,1
"Bathurst Manor,Downsview North,Wilson Heights, Toronto",1,1,1,1,1,1
"Bayview Village, Toronto",1,1,1,1,1,1
"Bedford Park,Lawrence Manor East, Toronto",1,1,1,1,1,1
"Berczy Park, Toronto",1,1,1,1,1,1
"Birch Cliff,Cliffside West, Toronto",1,1,1,1,1,1


In [19]:
print('There are {} distinct venues in {} categories.'.format(
    len(toronto_neighbor['Venue'].unique()),len(toronto_neighbor['Venue Category'].unique())))

#print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 93 distinct venues in 58 categories.


In [20]:
#analyzing the neighborhood

# one hot encoding
toronto_onehot = pd.get_dummies(toronto_neighbor[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_neighbor['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Arts & Crafts Store,Bakery,Bank,Bar,Baseball Field,Boutique,Breakfast Spot,Brewery,Cafeteria,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Concert Hall,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Dog Run,Fast Food Restaurant,Field,Food,Garden,Gastropub,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Liquor Store,Massage Studio,Motel,Museum,Neighborhood,Park,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Sports Bar,Steakhouse,Tennis Court,Theme Restaurant,Toy / Game Store,Trail,Warehouse Store,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Rouge,Malvern, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Highland Creek,Rouge Hill,Port Union, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Guildwood,Morningside,West Hill, Toronto",0,1,0,0,0,0,0,0,0,0,0,0,0,0
12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Woburn, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0
15,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Cedarbrae, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Arts & Crafts Store,Bakery,Bank,Bar,Baseball Field,Boutique,Breakfast Spot,Brewery,Cafeteria,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Concert Hall,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Dog Run,Fast Food Restaurant,Field,Food,Garden,Gastropub,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Liquor Store,Massage Studio,Motel,Museum,Park,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Sports Bar,Steakhouse,Tennis Court,Theme Restaurant,Toy / Game Store,Trail,Warehouse Store,Wings Joint
0,"Adelaide,King,Richmond, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Agincourt, Toronto",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
4,"Alderwood,Long Branch, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
5,"Bathurst Manor,Downsview North,Wilson Heights,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,"Bayview Village, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,"Bedford Park,Lawrence Manor East, Toronto",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,"Berczy Park, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
9,"Birch Cliff,Cliffside West, Toronto",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
num_top_venues = 10

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond, Toronto----
                  venue  freq
0          Concert Hall   1.0
1           Yoga Studio   0.0
2                Museum   0.0
3  Gym / Fitness Center   0.0
4          Hockey Arena   0.0
5          Home Service   0.0
6                 Hotel   0.0
7        Ice Cream Shop   0.0
8     Indian Restaurant   0.0
9    Italian Restaurant   0.0


----Agincourt North,L'Amoreaux East,Milliken,Steeles East, Toronto----
                  venue  freq
0                  Park   1.0
1         Grocery Store   0.0
2  Gym / Fitness Center   0.0
3          Hockey Arena   0.0
4          Home Service   0.0
5                 Hotel   0.0
6        Ice Cream Shop   0.0
7     Indian Restaurant   0.0
8    Italian Restaurant   0.0
9   Japanese Restaurant   0.0


----Agincourt, Toronto----
                  venue  freq
0        Breakfast Spot   1.0
1           Yoga Studio   0.0
2                  Park   0.0
3  Gym / Fitness Center   0.0
4          Hockey Arena   0.0
5          Home Se

In [23]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# List of Nearby Venues for each Toronto Neighborhood

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
t_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
t_neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    t_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

t_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond, Toronto",Concert Hall,Wings Joint,Golf Course,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Wings Joint,Chinese Restaurant,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
2,"Agincourt, Toronto",Breakfast Spot,Wings Joint,Clothing Store,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Sandwich Place,Wings Joint,Chinese Restaurant,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
4,"Alderwood,Long Branch, Toronto",Pizza Place,Wings Joint,Chinese Restaurant,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
5,"Bathurst Manor,Downsview North,Wilson Heights,...",Deli / Bodega,Wings Joint,Golf Course,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
6,"Bayview Village, Toronto",Chinese Restaurant,Golf Course,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store,Deli / Bodega
7,"Bedford Park,Lawrence Manor East, Toronto",Café,Wings Joint,Clothing Store,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
8,"Berczy Park, Toronto",Steakhouse,Wings Joint,Chinese Restaurant,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store
9,"Birch Cliff,Cliffside West, Toronto",Café,Wings Joint,Clothing Store,Garden,Food,Field,Fast Food Restaurant,Dog Run,Dessert Shop,Department Store


In [25]:
t_neighborhoods_venues_sorted.iloc[11,]

Neighborhood              Brockton,Exhibition Place,Parkdale Village, To...
1st Most Common Venue                                                   Gym
2nd Most Common Venue                                           Wings Joint
3rd Most Common Venue                                        Clothing Store
4th Most Common Venue                                                Garden
5th Most Common Venue                                                  Food
6th Most Common Venue                                                 Field
7th Most Common Venue                                  Fast Food Restaurant
8th Most Common Venue                                               Dog Run
9th Most Common Venue                                          Dessert Shop
10th Most Common Venue                                     Department Store
Name: 11, dtype: object

# Pre-Process the New York Data before Clustering together with the Toronto Data 

In [26]:
# New York Data

!wget -q -O 'newyork_data.json' https://ibm.box.com/shared/static/fbpwbovar7lf8p5sgddm06cgipa2rxpe.json
print('Data downloaded!')

Data downloaded!


In [27]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [28]:
ny_neighborhoods_data = newyork_data['features']

In [29]:
ny_neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [30]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
ny_neighborhoods = pd.DataFrame(columns=column_names)

In [31]:
for data in ny_neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    ny_neighborhoods = ny_neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [32]:
ny_neighborhoods.tail()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
301,Manhattan,Hudson Yards,40.756658,-74.000111
302,Queens,Hammels,40.587338,-73.80553
303,Queens,Bayswater,40.611322,-73.765968
304,Queens,Queensbridge,40.756091,-73.945631
305,Staten Island,Fox Hills,40.617311,-74.08174


In [33]:
#Next we will add the City to the Neighborhood to be able to identify it later from our New York List
ny_neighborhoods['Neighborhood']=ny_neighborhoods['Neighborhood'].add(', New York')
ny_neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,"Wakefield, New York",40.894705,-73.847201
1,Bronx,"Co-op City, New York",40.874294,-73.829939
2,Bronx,"Eastchester, New York",40.887556,-73.827806
3,Bronx,"Fieldston, New York",40.895437,-73.905643
4,Bronx,"Riverdale, New York",40.890834,-73.912585


In [34]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(ny_neighborhoods['Borough'].unique()),
        ny_neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [35]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [36]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(ny_neighborhoods['Latitude'], ny_neighborhoods['Longitude'], ny_neighborhoods['Borough'], ny_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [37]:
neighborhood_lat = ny_neighborhoods.loc[10, 'Latitude'] # neighborhood latitude value
neighborhood_lon = ny_neighborhoods.loc[10, 'Longitude'] # neighborhood longitude value

neighborhood_name = ny_neighborhoods.loc[10, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_lat, 
                                                               neighborhood_lon))

Latitude and longitude values of Baychester, New York are 40.866858107252696, -73.83579759808117.


# Utilize the Foursquare API and create a function to generate a litst of nearby Venues

In [39]:
# type your answer here
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_lat, 
    neighborhood_lon, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=1TQX3BIGE0AVURRWD1YKVRLYVZZA5A3MQ52OZOEDFZBKVMJW&client_secret=E513XDA24031SHHX4EBAU4G2PY0CUYXAASQZKNHNZ4LRX1XQ&v=20200217&ll=40.866858107252696,-73.83579759808117&radius=500&limit=100'

In [40]:
#Send the GET request
results = requests.get(url).json()

In [41]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [42]:
venues = results['response']['groups'][0]['items']
    
ny_nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
ny_nearby_venues =ny_nearby_venues.loc[:, filtered_columns]

# filter the category for each row
ny_nearby_venues['venue.categories'] = ny_nearby_venues.apply(get_category_type, axis=1)

# clean columns
ny_nearby_venues.columns = [col.split(".")[-1] for col in ny_nearby_venues.columns]

ny_nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Caridad & Louie,Spanish Restaurant,40.865843,-73.837707
1,Dunkin',Donut Shop,40.8678,-73.833365
2,Planet Fitness,Gym / Fitness Center,40.863298,-73.835568
3,Chase Bank,Bank,40.866132,-73.838398
4,ALDI Food Market,Supermarket,40.86325,-73.835002


In [44]:
#Function 'getNearbyVenues' was defined earlier
ny_venues = getNearbyVenues(names=ny_neighborhoods['Neighborhood'],
                                   latitudes=ny_neighborhoods['Latitude'],
                                   longitudes=ny_neighborhoods['Longitude'])

Found 10287 venues in 306 neighborhoods.


In [45]:
ny_venues = ny_venues.drop_duplicates('Neighborhood', keep='first')

In [46]:
ny_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Wakefield, New York",40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
10,"Co-op City, New York",40.874294,-73.829939,Dollar Tree,40.870125,-73.828989,Discount Store
26,"Eastchester, New York",40.887556,-73.827806,Fish & Ting,40.885656,-73.829197,Caribbean Restaurant
45,"Fieldston, New York",40.895437,-73.905643,Fieldston Road Circle,40.894489,-73.905621,Plaza
49,"Riverdale, New York",40.890834,-73.912585,Riverdale Ave,40.890425,-73.910248,Plaza


In [47]:
#Venues per Neighborhood
ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Allerton, New York",1,1,1,1,1,1
"Annadale, New York",1,1,1,1,1,1
"Arden Heights, New York",1,1,1,1,1,1
"Arlington, New York",1,1,1,1,1,1
"Arrochar, New York",1,1,1,1,1,1
"Arverne, New York",1,1,1,1,1,1
"Astoria Heights, New York",1,1,1,1,1,1
"Astoria, New York",1,1,1,1,1,1
"Auburndale, New York",1,1,1,1,1,1
"Bath Beach, New York",1,1,1,1,1,1


In [48]:
# one hot encoding
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_onehot['Neighborhood'] = ny_venues['Neighborhood'] 

# move neighborhood column to the first column

neighbor = ny_onehot['Neighborhood']
ny_onehot.drop(labels=['Neighborhood'], axis=1,inplace = True)
ny_onehot.insert(0, 'Neighborhood', neighbor)

ny_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,BBQ Joint,Bagel Shop,Bakery,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Stop,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Casino,Cocktail Bar,Coffee Shop,College Basketball Court,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Dumpling Restaurant,Farm,Food,Food & Drink Shop,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,History Museum,Ice Cream Shop,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Mexican Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Nail Salon,New American Restaurant,Opera House,Outdoor Sculpture,Paper / Office Supplies Store,Park,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Pool,Ramen Restaurant,Recreation Center,Rental Car Location,Rest Area,Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Skating Rink,Snack Place,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theater,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,"Wakefield, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10,"Co-op City, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26,"Eastchester, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
45,"Fieldston, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
49,"Riverdale, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [49]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,BBQ Joint,Bagel Shop,Bakery,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Stop,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Casino,Cocktail Bar,Coffee Shop,College Basketball Court,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Dumpling Restaurant,Farm,Food,Food & Drink Shop,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,History Museum,Ice Cream Shop,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Mexican Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Nail Salon,New American Restaurant,Opera House,Outdoor Sculpture,Paper / Office Supplies Store,Park,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Pool,Ramen Restaurant,Recreation Center,Rental Car Location,Rest Area,Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Skating Rink,Snack Place,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theater,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,"Allerton, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Annadale, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,"Arden Heights, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Arlington, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Arrochar, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,"Arverne, New York",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,"Astoria Heights, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,"Astoria, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,"Auburndale, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,"Bath Beach, New York",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [50]:
num_top_venues = 5

for hood in ny_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_grouped[ny_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allerton, New York----
                venue  freq
0         Pizza Place   1.0
1  African Restaurant   0.0
2          Nail Salon   0.0
3                Pool   0.0
4   Polish Restaurant   0.0


----Annadale, New York----
                venue  freq
0          Sports Bar   1.0
1  African Restaurant   0.0
2              Museum   0.0
3   Polish Restaurant   0.0
4               Plaza   0.0


----Arden Heights, New York----
                venue  freq
0         Pizza Place   1.0
1  African Restaurant   0.0
2          Nail Salon   0.0
3                Pool   0.0
4   Polish Restaurant   0.0


----Arlington, New York----
                     venue  freq
0                 Bus Stop   1.0
1       African Restaurant   0.0
2  New American Restaurant   0.0
3                     Pool   0.0
4        Polish Restaurant   0.0


----Arrochar, New York----
                venue  freq
0         Pizza Place   1.0
1  African Restaurant   0.0
2          Nail Salon   0.0
3                Pool   0.0
4   Polis

In [51]:
#Function to sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Generate a list of nearby venues for New York neighborhoods

In [78]:
# Create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
ny_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
ny_neighborhoods_venues_sorted['Neighborhood'] = ny_grouped['Neighborhood']

for ind in np.arange(ny_grouped.shape[0]):
    ny_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

ny_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Allerton, New York",Pizza Place,Yoga Studio,Farm,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner
1,"Annadale, New York",Sports Bar,Yoga Studio,Food,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner
2,"Arden Heights, New York",Pizza Place,Yoga Studio,Farm,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner
3,"Arlington, New York",Bus Stop,Yoga Studio,Food,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store
4,"Arrochar, New York",Pizza Place,Yoga Studio,Farm,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner
5,"Arverne, New York",Beach,Yoga Studio,Food & Drink Shop,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store
6,"Astoria Heights, New York",Plaza,Yoga Studio,Farm,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner
7,"Astoria, New York",Brazilian Restaurant,Yoga Studio,Food,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store
8,"Auburndale, New York",Italian Restaurant,Food,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store
9,"Bath Beach, New York",Park,Yoga Studio,Farm,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Dessert Shop,Diner


# Next we will combine the NY and Toronto list and cluster them together. 

In [53]:
# Combine toronto_neighbor and ny_venues to create final_group
#Now that we've seen the top venues for NY and Toronto individually, let's create a combined table to cluster like neighborhoods.
frames = [toronto_neighbor, ny_venues]
group_df = pd.concat(frames)
group_df.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
10164,"Hudson Yards, New York",40.756658,-74.000111,Bella Abzug Park,40.75558,-74.000344,Park
10246,"Hammels, New York",40.587338,-73.80553,Far Rockaway Beach - 81st Street,40.586176,-73.807452,Beach
10264,"Bayswater, New York",40.611322,-73.765968,Westbourne Playground,40.608445,-73.76448,Playground
10266,"Queensbridge, New York",40.756091,-73.945631,Queensboro Bridge Pedestrian & Bike Path,40.755201,-73.950445,Scenic Lookout
10282,"Fox Hills, New York",40.617311,-74.08174,SUBWAY,40.618939,-74.082881,Sandwich Place


In [54]:
# one hot encoding
combo_onehot = pd.get_dummies(group_df[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
combo_onehot['Neighborhood'] = group_df['Neighborhood'] 

# move neighborhood column to the first column

neighbor = combo_onehot['Neighborhood']
combo_onehot.drop(labels=['Neighborhood'], axis=1,inplace = True)
combo_onehot.insert(0, 'Neighborhood', neighbor)

combo_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,Airport,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Stop,Cafeteria,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Casino,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Basketball Court,Concert Hall,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Dumpling Restaurant,Farm,Fast Food Restaurant,Field,Food,Food & Drink Shop,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gastropub,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,History Museum,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Massage Studio,Mexican Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Nail Salon,New American Restaurant,Opera House,Outdoor Sculpture,Paper / Office Supplies Store,Park,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Pool,Ramen Restaurant,Recreation Center,Rental Car Location,Rest Area,Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Skating Rink,Snack Place,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Steakhouse,Sushi Restaurant,Taco Place,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Rouge,Malvern, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek,Rouge Hill,Port Union, Toronto",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12,"Woburn, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
15,"Cedarbrae, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [55]:
final_group = combo_onehot.groupby('Neighborhood').mean().reset_index()
final_group.head()

Unnamed: 0,Neighborhood,African Restaurant,Airport,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Stop,Cafeteria,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Casino,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Basketball Court,Concert Hall,Convenience Store,Cosmetics Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Donut Shop,Dumpling Restaurant,Farm,Fast Food Restaurant,Field,Food,Food & Drink Shop,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gastropub,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,History Museum,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Massage Studio,Mexican Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Nail Salon,New American Restaurant,Opera House,Outdoor Sculpture,Paper / Office Supplies Store,Park,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Polish Restaurant,Pool,Ramen Restaurant,Recreation Center,Rental Car Location,Rest Area,Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Skating Rink,Snack Place,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Steakhouse,Sushi Restaurant,Taco Place,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Adelaide,King,Richmond, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Agincourt, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Alderwood,Long Branch, Toronto",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [56]:
num_top_venues = 5

for hood in final_group['Neighborhood']:
    print("----"+hood+"----")
    temp = final_group[final_group['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond, Toronto----
                     venue  freq
0             Concert Hall   1.0
1                 Pharmacy   0.0
2  New American Restaurant   0.0
3              Opera House   0.0
4        Outdoor Sculpture   0.0


----Agincourt North,L'Amoreaux East,Milliken,Steeles East, Toronto----
                     venue  freq
0                     Park   1.0
1       African Restaurant   0.0
2      Peruvian Restaurant   0.0
3               Nail Salon   0.0
4  New American Restaurant   0.0


----Agincourt, Toronto----
                     venue  freq
0           Breakfast Spot   1.0
1       African Restaurant   0.0
2      Peruvian Restaurant   0.0
3  New American Restaurant   0.0
4              Opera House   0.0


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown, Toronto----
                     venue  freq
0           Sandwich Place   1.0
1       African Restaurant   0.0
2      Peruvian Restaurant   0.0
3         

In [57]:
# Create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = final_group['Neighborhood']

for ind in np.arange(final_group.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(final_group.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.tail()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
395,"Woodlawn, New York",Pizza Place,Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
396,"Woodrow, New York",Sushi Restaurant,Yoga Studio,Food,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner
397,"Woodside, New York",Thai Restaurant,Yoga Studio,Food,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner
398,"York Mills West, Toronto",Bank,Yoga Studio,Food & Drink Shop,Field,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store
399,"Yorkville, New York",Wine Shop,Yoga Studio,Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop


# Format the two dataframes to run our Cluster Test

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [58]:
#Evaluate the list of column heads to isolate the neighborhood, lat, and long
toronto_neighbor.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern, Toronto",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
12,"Woburn, Toronto",43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop
15,"Cedarbrae, Toronto",43.773136,-79.239476,Drupati's Roti & Doubles,43.775222,-79.241678,Caribbean Restaurant


In [59]:
toronto_edit = toronto_neighbor.filter(['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude'], axis=1)
toronto_edit.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude
0,"Rouge,Malvern, Toronto",43.806686,-79.194353
2,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497
4,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711
12,"Woburn, Toronto",43.770992,-79.216917
15,"Cedarbrae, Toronto",43.773136,-79.239476


In [60]:
#We'll need to drop borough 
newyork_edit = ny_neighborhoods.filter(['Neighborhood', 'Latitude', 'Longitude'], axis=1)
newyork_edit.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Wakefield, New York",40.894705,-73.847201
1,"Co-op City, New York",40.874294,-73.829939
2,"Eastchester, New York",40.887556,-73.827806
3,"Fieldston, New York",40.895437,-73.905643
4,"Riverdale, New York",40.890834,-73.912585


In [61]:
#Rename the column heads to match each other
toronto_edit.rename(columns={'Neighborhood Latitude':'Latitude',
                            'Neighborhood Longitude':'Longitude'},
                    inplace=True)
toronto_edit.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Rouge,Malvern, Toronto",43.806686,-79.194353
2,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497
4,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711
12,"Woburn, Toronto",43.770992,-79.216917
15,"Cedarbrae, Toronto",43.773136,-79.239476


In [62]:
#Combine toronto_edit and newyork_edit

frames = [toronto_edit, newyork_edit]
neighborhood_df = pd.concat(frames)
neighborhood_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Rouge,Malvern, Toronto",43.806686,-79.194353
2,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497
4,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711
12,"Woburn, Toronto",43.770992,-79.216917
15,"Cedarbrae, Toronto",43.773136,-79.239476


# Determining K cluster value

In [79]:
# Use Kmeans clustering to determine the cluster number 5

In [114]:
# Set the number of clusters
kclusters = 3

final_clustering = final_group.drop('Neighborhood', 1)

# Run the k-means clustering.
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(final_clustering)

# Check the cluster labels generated for each row in the dataframe.
kmeans.labels_[0:10]

array([0, 1, 0, 0, 2, 2, 0, 2, 0, 2], dtype=int32)

In [115]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhood_merged = neighborhood_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
neighborhood_merged = neighborhood_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

In [116]:
neighborhood_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Rouge,Malvern, Toronto",43.806686,-79.194353,0.0,Fast Food Restaurant,Yoga Studio,Diner,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop,Food
2,"Highland Creek,Rouge Hill,Port Union, Toronto",43.784535,-79.160497,0.0,Bar,Yoga Studio,Food & Drink Shop,Field,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store
4,"Guildwood,Morningside,West Hill, Toronto",43.763573,-79.188711,2.0,Pizza Place,Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
12,"Woburn, Toronto",43.770992,-79.216917,0.0,Coffee Shop,Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop,Food
15,"Cedarbrae, Toronto",43.773136,-79.239476,0.0,Caribbean Restaurant,Yoga Studio,Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop


In [117]:
neighborhood_merged=neighborhood_merged.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

In [118]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = [-1,0,1,2]
for lat, lon, poi, cluster in zip(neighborhood_merged['Latitude'], neighborhood_merged['Longitude'], neighborhood_merged['Neighborhood'], neighborhood_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-0],
        fill=True,
        fill_color=rainbow[int(cluster)-0],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Evaluate the Clusters 

In [119]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 0, neighborhood_merged.columns[[0] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Neighborhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Rouge,Malvern, Toronto",Yoga Studio,Diner,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop,Food
2,"Highland Creek,Rouge Hill,Port Union, Toronto",Yoga Studio,Food & Drink Shop,Field,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store
12,"Woburn, Toronto",Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop,Food
15,"Cedarbrae, Toronto",Yoga Studio,Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop
24,"Scarborough Village, Toronto",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
26,"East Birchmount Park,Ionview,Kennedy Park, Tor...",Yoga Studio,Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop
31,"Clairlea,Golden Mile,Oakridge, Toronto",Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop,Food
41,"Cliffcrest,Cliffside,Scarborough Village West,...",Yoga Studio,Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop
44,"Birch Cliff,Cliffside West, Toronto",Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Yoga Studio,Food
48,"Dorset Park,Scarborough Town Centre,Wexford He...",Diner,Fast Food Restaurant,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Dessert Shop,Food


In [120]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 1, neighborhood_merged.columns[[0] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Neighborhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
218,"Parkwoods, Toronto",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
373,"East Toronto, Toronto",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
479,"Lawrence Park, Toronto",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
482,"Davisville North, Toronto",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
1885,"Caledonia-Fairbanks, Toronto",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
2090,"Queen's Park, Toronto",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
2180,"The Kingsway,Montgomery Road,Old Mill North, T...",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
13,"Bedford Park, New York",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field
27,"Clason Point, New York",Yoga Studio,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Dessert Shop,Field


In [121]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 2, neighborhood_merged.columns[[0] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Neighborhood,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Guildwood,Morningside,West Hill, Toronto",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
212,"Willowdale West, Toronto",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
2011,"The Junction North,Runnymede, Toronto",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
2171,"Alderwood,Long Branch, Toronto",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
2206,"Humber Summit, Toronto",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
6,"Marble Hill, New York",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
7,"Woodlawn, New York",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
8,"Norwood, New York",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
25,"Morrisania, New York",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store
32,"Van Nest, New York",Yoga Studio,Dessert Shop,Farm,Dumpling Restaurant,Donut Shop,Dog Run,Discount Store,Diner,Department Store


# Conclusion

### You can use the above cluster information to find comparable neighborhoods within New York Ciy and Toronto based on nearby venues.