In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

 #import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

import lxml
print('Libraries imported.')

Libraries imported.


# Toronto Data

In [2]:
urlwiki='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
toronto=pd.read_html(urlwiki)
df_toronto= pd.DataFrame(toronto[0])
df_torontoW = df_toronto.loc[df_toronto['Borough']!='Not assigned']
df_torontoW.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [3]:
df_grouped=df_torontoW.groupby(['Postcode','Borough']).agg(lambda x: ', '.join(x))
df_grouped.reset_index(level=['Postcode','Borough'], inplace=True)
for n in range(df_grouped.shape[0]):
    if df_grouped.loc[n,'Neighbourhood'] == 'Not assigned':
        df_grouped.loc[n,'Neighbourhood'] = df_grouped.loc[n,'Borough']
df_grouped

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [4]:
Cords=pd.read_csv('Geospatial_Coordinates.csv')
Cords.columns.values[0]='Postcode'
Toronto_data=pd.merge(df_grouped,Cords, on='Postcode')
Toronto_data.drop('Postcode',axis=1,inplace=True)
Toronto_data.columns.values[1]='Neighborhood'
Toronto_data['City']=['Toronto']*Toronto_data.shape[0]
fixed_columns = [Toronto_data.columns[-1]] + list(Toronto_data.columns[:-1])
Toronto_data = Toronto_data[fixed_columns]
Toronto_data

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude
0,Toronto,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,Toronto,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,Toronto,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Toronto,Scarborough,Woburn,43.770992,-79.216917
4,Toronto,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,Toronto,York,Weston,43.706876,-79.518188
99,Toronto,Etobicoke,Westmount,43.696319,-79.532242
100,Toronto,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,Toronto,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


In [5]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="t_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [6]:
# create map of New York using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Borough'], Toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [7]:
CLIENT_ID = 'TXOUOT3UGXOZAQMIUNEJ1OES5C3ZOHZQUDL5E1BA5XE4GPAN' # your Foursquare ID
CLIENT_SECRET = 'JM4YAMR1YJKW1WBQHOO2I1NTJVOR515MI33DTJNNRUDB3SGN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT=100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    iCount=0
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        iCount = iCount+1

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
            
    
    return(nearby_venues,iCount)

In [8]:
[Toronto_venues,Count] = getNearbyVenues(names=Toronto_data['Neighborhood'],
                                   latitudes=Toronto_data['Latitude'],
                                   longitudes=Toronto_data['Longitude']
                                  )
print(Count)

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

# Clustering Toronto alone with 5 clusters

In [11]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")
# add Postalcode column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# create a new dataframe
columns = ['Neighborhood']
Neighborhood_venues_sorted = pd.DataFrame(columns=columns)
Neighborhood_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)
# add clustering labels
Neighborhood_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = Toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(Neighborhood_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
Toronto_merged['Cluster Labels'].fillna(0,inplace=True)
# create map

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Ney York Data

In [12]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
neighborhoods_data = newyork_data['features']
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    
neighborhoods['City']=['Ney York']*neighborhoods.shape[0]
fixed_columns = [neighborhoods.columns[-1]] + list(neighborhoods.columns[:-1])
neighborhoods = neighborhoods[fixed_columns]
neighborhoods.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude
0,Ney York,Bronx,Wakefield,40.894705,-73.847201
1,Ney York,Bronx,Co-op City,40.874294,-73.829939
2,Ney York,Bronx,Eastchester,40.887556,-73.827806
3,Ney York,Bronx,Fieldston,40.895437,-73.905643
4,Ney York,Bronx,Riverdale,40.890834,-73.912585


In [13]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [14]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [15]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
neighborhoods_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

# Clustering Ney York alones with 5 clusters

In [18]:
# one hot encoding
neighborhoods_onehot = pd.get_dummies(neighborhoods_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
neighborhoods_onehot['Neighborhood'] = neighborhoods_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [neighborhoods_onehot.columns[-1]] + list(neighborhoods_onehot.columns[:-1])
neighborhoods_onehot = neighborhoods_onehot[fixed_columns]

neighborhoods_grouped = neighborhoods_onehot.groupby('Neighborhood').mean().reset_index()
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = neighborhoods_grouped['Neighborhood']
# set number of clusters
kclusters = 5

neighborhoods_grouped_clustering = neighborhoods_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(neighborhoods_grouped_clustering)

## add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
neighborhoods_merged = neighborhoods_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

neighborhoods_merged['Cluster Labels'].fillna(0,inplace=True)

address = 'New York City, NY'

NYgeolocator = Nominatim(user_agent="ny_explorer")
NYlocation = NYgeolocator.geocode(address)
NYlatitude = NYlocation.latitude
NYlongitude = NYlocation.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))
# create map
map_clusters = folium.Map(location=[NYlatitude, NYlongitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_merged['Latitude'], neighborhoods_merged['Longitude'], neighborhoods_merged['Neighborhood'], neighborhoods_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


# Mergeboth Dataframes

In [19]:
TNY_data=pd.concat([Toronto_data, neighborhoods],ignore_index=True)

In [20]:
TNY_data

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude
0,Toronto,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,Toronto,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,Toronto,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Toronto,Scarborough,Woburn,43.770992,-79.216917
4,Toronto,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
404,Ney York,Manhattan,Hudson Yards,40.756658,-74.000111
405,Ney York,Queens,Hammels,40.587338,-73.805530
406,Ney York,Queens,Bayswater,40.611322,-73.765968
407,Ney York,Queens,Queensbridge,40.756091,-73.945631


In [21]:
CLIENT_ID = 'TXOUOT3UGXOZAQMIUNEJ1OES5C3ZOHZQUDL5E1BA5XE4GPAN' # your Foursquare ID
CLIENT_SECRET = 'JM4YAMR1YJKW1WBQHOO2I1NTJVOR515MI33DTJNNRUDB3SGN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT=100

In [39]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    iCount=0
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        iCount = iCount+1

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
            
    
    return(nearby_venues,iCount)

In [42]:
[TNY_venues,count] = getNearbyVenues(names=TNY_data['Neighborhood'],
                                   latitudes=TNY_data['Latitude'],
                                   longitudes=TNY_data['Longitude'])
print(count)

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [43]:
print(TNY_venues.shape)
TNY_venues.head()

(12453, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [44]:
TNY_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,2,2,2,2,2
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",8,8,8,8,8,8
"Alderwood, Long Branch",8,8,8,8,8,8
...,...,...,...,...,...,...
Woodlawn,23,23,23,23,23,23
Woodrow,20,20,20,20,20,20
Woodside,75,75,75,75,75,75
York Mills West,4,4,4,4,4,4


In [45]:
print('There are {} uniques categories.'.format(len(TNY_venues['Venue Category'].unique())))
TNY_venues['Venue Category'].value_counts()


There are 461 uniques categories.


Pizza Place           482
Coffee Shop           468
Italian Restaurant    365
Deli / Bodega         282
Bakery                268
                     ... 
Outlet Mall             1
Theme Park              1
Roller Rink             1
Tex-Mex Restaurant      1
College Stadium         1
Name: Venue Category, Length: 461, dtype: int64

In [46]:
pd.set_option('display.max_rows', 460)

In [47]:
# one hot encoding
TNY_onehot = pd.get_dummies(TNY_venues[['Venue Category']], prefix="", prefix_sep="")
# add Postalcode column back to dataframe
TNY_onehot['Neighborhood'] = TNY_venues['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [TNY_onehot.columns[-1]] + list(TNY_onehot.columns[:-1])
TNY_onehot = TNY_onehot[fixed_columns]
TNY_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Volleyball Court,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
TNY_onehot.shape

(12453, 461)

In [49]:
TNY_grouped = TNY_onehot.groupby('Neighborhood').mean().reset_index()
TNY_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,...,Volleyball Court,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Annadale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Arlington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
TNY_grouped.shape

(400, 461)

In [51]:
num_top_venues = 5

for hood in TNY_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = TNY_grouped[TNY_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
          venue  freq
0   Coffee Shop  0.08
1          Café  0.05
2           Bar  0.04
3    Steakhouse  0.04
4  Burger Joint  0.03


----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1  Latin American Restaurant  0.25
2                     Lounge  0.25
3             Sandwich Place  0.25
4        Peruvian Restaurant  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                   venue  freq
0             Playground   0.5
1                   Park   0.5
2            Yoga Studio   0.0
3     Persian Restaurant   0.0
4  Performing Arts Venue   0.0


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.25
1   Fried Chicken Joint  0.12
2  Fast Food Restaurant  0.12
3              Pharmacy  0.12
4        Sandwich Place  0.12


----Alderwood, Long Branch----
          venue  fr

In [52]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [53]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Neighborhood_venues_sorted = pd.DataFrame(columns=columns)
Neighborhood_venues_sorted['Neighborhood'] = TNY_grouped['Neighborhood']

for ind in np.arange(TNY_grouped.shape[0]):
    Neighborhood_venues_sorted.iloc[ind, 1:] = return_most_common_venues(TNY_grouped.iloc[ind, :], num_top_venues)
Neighborhood_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Bar,Asian Restaurant,Burger Joint,Salad Place,Restaurant,Thai Restaurant,Bakery
1,Agincourt,Breakfast Spot,Sandwich Place,Latin American Restaurant,Lounge,Women's Store,Falafel Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Women's Store,Farm,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Fried Chicken Joint,Sandwich Place,Beer Store,Fast Food Restaurant,Pharmacy,Empanada Restaurant,Factory,English Restaurant
4,"Alderwood, Long Branch",Pizza Place,Pub,Skating Rink,Pharmacy,Sandwich Place,Coffee Shop,Gym,Event Space,Dumpling Restaurant,Duty-free Shop


## Cluster Neighborhoods

In [54]:
# set number of clusters
kclusters = 5

TNY_grouped_clustering = TNY_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(TNY_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 2, 0, 0, 0, 0, 0, 4, 4])

In [59]:
# add clustering labels
Neighborhood_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

TNY_merged = TNY_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
TNY_merged = TNY_merged.join(Neighborhood_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

TNY_merged.head() # check the last columns!

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Toronto,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0.0,Fast Food Restaurant,Women's Store,Farm,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
1,Toronto,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,1.0,Bar,Golf Course,Women's Store,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
2,Toronto,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Breakfast Spot,Pizza Place,Electronics Store,Intersection,Rental Car Location,Medical Center,Mexican Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant
3,Toronto,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Korean Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
4,Toronto,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Fried Chicken Joint,Gas Station,Athletics & Sports,Caribbean Restaurant,Lounge,Thai Restaurant,Bank,Bakery,Electronics Store


In [60]:
TNY_merged.shape
TNY_merged['Cluster Labels'].fillna(0,inplace=True)

In [61]:
address = 'Toronto, CA'

Tgeolocator = Nominatim(user_agent="t_explorer")
Tlocation = Tgeolocator.geocode(address)
Tlatitude = Tlocation.latitude
Tlongitude = Tlocation.longitude
# create map
map_clusters = folium.Map(location=[Tlatitude, Tlongitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(TNY_merged['Latitude'], TNY_merged['Longitude'], TNY_merged['Neighborhood'], TNY_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [62]:
address = 'New York City, NY'

NYgeolocator = Nominatim(user_agent="ny_explorer")
NYlocation = NYgeolocator.geocode(address)
NYlatitude = NYlocation.latitude
NYlongitude = NYlocation.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="t_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[NYlatitude, NYlongitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(TNY_merged['Latitude'], TNY_merged['Longitude'], TNY_merged['Neighborhood'], TNY_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


### Cluster 1

In [63]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 0, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].shape

(109, 12)

In [73]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 0, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].head(10)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0.0,Fast Food Restaurant,Women's Store,Farm,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
2,Scarborough,0.0,Breakfast Spot,Pizza Place,Electronics Store,Intersection,Rental Car Location,Medical Center,Mexican Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant
13,Scarborough,0.0,Pizza Place,Chinese Restaurant,Italian Restaurant,Fast Food Restaurant,Noodle House,Pharmacy,Fried Chicken Joint,Rental Car Location,Thai Restaurant,Bank
15,Scarborough,0.0,Fast Food Restaurant,Chinese Restaurant,Breakfast Spot,Camera Store,Pharmacy,Sandwich Place,Coffee Shop,Discount Store,Pizza Place,Grocery Store
16,Scarborough,0.0,,,,,,,,,,
24,North York,0.0,Home Service,Pizza Place,Pharmacy,Coffee Shop,Discount Store,Butcher,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
34,North York,0.0,Hockey Arena,French Restaurant,Coffee Shop,Pizza Place,Portuguese Restaurant,Intersection,Egyptian Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant
35,East York,0.0,Fast Food Restaurant,Pizza Place,Breakfast Spot,Pet Store,Pharmacy,Athletics & Sports,Café,Gastropub,Gym / Fitness Center,Bank
47,Central Toronto,0.0,Pizza Place,Sandwich Place,Dessert Shop,Gym,Italian Restaurant,Coffee Shop,Café,Sushi Restaurant,Farmers Market,Park
72,North York,0.0,Pizza Place,Japanese Restaurant,Asian Restaurant,Park,Pub,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Farm


### Cluster 2

In [65]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 1, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].shape

(248, 12)

In [74]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 1, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].head(10)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,1.0,Bar,Golf Course,Women's Store,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
3,Scarborough,1.0,Coffee Shop,Korean Restaurant,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
4,Scarborough,1.0,Hakka Restaurant,Fried Chicken Joint,Gas Station,Athletics & Sports,Caribbean Restaurant,Lounge,Thai Restaurant,Bank,Bakery,Electronics Store
6,Scarborough,1.0,Discount Store,Coffee Shop,Department Store,Women's Store,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
7,Scarborough,1.0,Bus Line,Metro Station,Soccer Field,Bakery,Intersection,Park,Fast Food Restaurant,Eye Doctor,Falafel Restaurant,Factory
8,Scarborough,1.0,Motel,American Restaurant,Women's Store,Farmers Market,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
9,Scarborough,1.0,Skating Rink,College Stadium,Café,General Entertainment,Women's Store,Falafel Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
10,Scarborough,1.0,Indian Restaurant,Chinese Restaurant,Vietnamese Restaurant,Brewery,Pet Store,Eye Doctor,Exhibit,Event Space,Ethiopian Restaurant,English Restaurant
11,Scarborough,1.0,Middle Eastern Restaurant,Breakfast Spot,Auto Garage,Bakery,Shopping Mall,Sandwich Place,Falafel Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant
12,Scarborough,1.0,Breakfast Spot,Sandwich Place,Latin American Restaurant,Lounge,Women's Store,Falafel Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant


### Cluster 3

In [67]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 2, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].shape

(15, 12)

In [76]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 2, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].head(10)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,2.0,Park,Playground,Women's Store,Farm,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
23,North York,2.0,Park,Convenience Store,Bank,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
25,North York,2.0,Construction & Landscaping,Food & Drink Shop,Park,Falafel Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
30,North York,2.0,Park,Airport,Women's Store,Farmers Market,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
40,East York,2.0,Park,Convenience Store,Metro Station,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
44,Central Toronto,2.0,Park,Swim School,Bus Line,Women's Store,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
74,York,2.0,Park,Women's Store,Market,Fast Food Restaurant,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
79,North York,2.0,Construction & Landscaping,Park,Bakery,Women's Store,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
90,Etobicoke,2.0,Park,Pool,River,Women's Store,Falafel Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant
98,York,2.0,Convenience Store,Park,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space


### Cluster 4

In [69]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 3, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].shape

(4, 12)

In [70]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 3, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
188,Brooklyn,3.0,Beach,Dog Run,Lighthouse,Spa,Bus Line,Bus Station,Women's Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
275,Queens,3.0,Beach,Trail,Monument / Landmark,Women's Store,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space
282,Queens,3.0,Beach,Women's Store,Farmers Market,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Exhibit
405,Queens,3.0,Beach,Diner,Southern / Soul Food Restaurant,Dog Run,Café,Fast Food Restaurant,Shoe Store,Bus Stop,Bus Station,Deli / Bodega


### Cluster 5 

In [71]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 4, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].shape

(33, 12)

In [75]:
TNY_merged.loc[TNY_merged['Cluster Labels'] == 4, TNY_merged.columns[[1] + list(range(5, TNY_merged.shape[1]))]].head(10)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,4.0,Convenience Store,Playground,Food & Drink Shop,Falafel Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
48,Central Toronto,4.0,Tennis Court,Playground,Women's Store,Farm,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
91,Etobicoke,4.0,Baseball Field,Deli / Bodega,Women's Store,Fast Food Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Exhibit
144,Bronx,4.0,Caribbean Restaurant,Deli / Bodega,Chinese Restaurant,Basketball Court,Laundromat,Food,Liquor Store,Fried Chicken Joint,Supermarket,Eye Doctor
192,Brooklyn,4.0,Deli / Bodega,Playground,Food,Convenience Store,Southern / Soul Food Restaurant,Supermarket,Grocery Store,Salad Place,Donut Shop,Park
194,Brooklyn,4.0,Harbor / Marina,Donut Shop,Baseball Field,Athletics & Sports,Playground,Falafel Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant
251,Queens,4.0,Park,Deli / Bodega,Fast Food Restaurant,Gym / Fitness Center,Hotel,Bar,Donut Shop,Sandwich Place,Exhibit,Event Space
253,Queens,4.0,Candy Store,Bubble Tea Shop,Dance Studio,Deli / Bodega,Women's Store,Farmers Market,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant
262,Queens,4.0,Deli / Bodega,Indian Restaurant,Plaza,Arts & Crafts Store,Coffee Shop,Mexican Restaurant,Fast Food Restaurant,Sushi Restaurant,Playground,Gym
285,Queens,4.0,Donut Shop,Supermarket,Mobile Phone Shop,Playground,Women's Store,Farm,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
