In [5]:
import pandas as pd
import numpy as np
import requests

#### We get our data for New York similar to the labs from the https://cocl.us/new_york_dataset

In [16]:
data =requests.get('https://cocl.us/new_york_dataset').json()
data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

#### The data that we are looking for is in the features, so we extract that portion

In [17]:
data = data['features']
data

[{'type': 'Feature',
  'id': 'nyu_2451_34572.1',
  'geometry': {'type': 'Point',
   'coordinates': [-73.84720052054902, 40.89470517661]},
  'geometry_name': 'geom',
  'properties': {'name': 'Wakefield',
   'stacked': 1,
   'annoline1': 'Wakefield',
   'annoline2': None,
   'annoline3': None,
   'annoangle': 0.0,
   'borough': 'Bronx',
   'bbox': [-73.84720052054902,
    40.89470517661,
    -73.84720052054902,
    40.89470517661]}},
 {'type': 'Feature',
  'id': 'nyu_2451_34572.2',
  'geometry': {'type': 'Point',
   'coordinates': [-73.82993910812398, 40.87429419303012]},
  'geometry_name': 'geom',
  'properties': {'name': 'Co-op City',
   'stacked': 2,
   'annoline1': 'Co-op',
   'annoline2': 'City',
   'annoline3': None,
   'annoangle': 0.0,
   'borough': 'Bronx',
   'bbox': [-73.82993910812398,
    40.87429419303012,
    -73.82993910812398,
    40.87429419303012]}},
 {'type': 'Feature',
  'id': 'nyu_2451_34572.3',
  'geometry': {'type': 'Point',
   'coordinates': [-73.82780644716412, 

#### The columns we are interested are 'Borough', 'Neighborhood', 'Latitude', 'Longitude'. We use a for loop to gather this information from our data set

In [22]:
columns = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

ny_data = pd.DataFrame(columns=columns)
    
for i in data:
    borough = i['properties']['borough'] 
    neighborhood = i['properties']['name']     
    coordinates = i['geometry']['coordinates']
    longitude = coordinates[0]
    latitude = coordinates[1]
    
    ny_data = ny_data.append({'Borough': borough,'Neighborhood': neighborhood,'Latitude': latitude,'Longitude': longitude}, ignore_index=True)

In [23]:
ny_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


#### Now let's find the location of newyork using geocoder library

In [2]:
import geocoder
from geopy.geocoders import Nominatim 

address = 'New York'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York are 40.7127281, -74.0060152.


#### Let's build a map with the information we have using the folium library

In [26]:
import folium
map_ny = folium.Map(location=[latitude, longitude], zoom_start=10)

# adding markers to map
for latitude, longitude, borough, neighbourhood in zip(ny_data['Latitude'], ny_data['Longitude'], ny_data['Borough'], ny_data['Neighborhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='red',
        fill=True
        ).add_to(map_ny)  
    
map_ny

#### Entering credentials

In [27]:
CLIENT_ID = 'E245K2Z5NCNHDNCIPPKUK3NRNMIMOCT0IW03FLFCCIY5RIRM' 
CLIENT_SECRET = 'FB21LFZ4HNSTXKVAMMLDC2HHUWKLUGVLNCW1DVOGSBL4WNXR'
VERSION = '20180605'

#### Borrowing the 'getNearbyVenues' function from the lab

In [33]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius
            )
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

#### Apply the function to New York

In [34]:
ny_venues = getNearbyVenues(ny_data['Neighborhood'], ny_data['Latitude'], ny_data['Longitude'])

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [35]:
print(ny_venues.shape)
ny_venues.head()

(6120, 5)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,Dessert Shop
1,Wakefield,40.894705,-73.847201,Rite Aid,Pharmacy
2,Wakefield,40.894705,-73.847201,Carvel Ice Cream,Ice Cream Shop
3,Wakefield,40.894705,-73.847201,Walgreens,Pharmacy
4,Wakefield,40.894705,-73.847201,Dunkin',Donut Shop


#### One-hot Encoding

In [46]:
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")

ny_onehot['Neighborhood'] = ny_venues['Neighborhood'] 

fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]

ny_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,...,Vietnamese Restaurant,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Checking the venue categories in our data to see which of them could be of use

In [56]:
for i in ny_onehot.columns:
    print(i)

Yoga Studio
Accessories Store
Adult Boutique
Afghan Restaurant
African Restaurant
Airport Terminal
American Restaurant
Antique Shop
Arcade
Arepa Restaurant
Argentinian Restaurant
Art Gallery
Art Museum
Arts & Crafts Store
Arts & Entertainment
Asian Restaurant
Athletics & Sports
Auditorium
Australian Restaurant
Austrian Restaurant
Automotive Shop
BBQ Joint
Bagel Shop
Bakery
Bank
Bar
Baseball Field
Baseball Stadium
Basketball Court
Beach
Beach Bar
Beer Bar
Beer Garden
Beer Store
Big Box Store
Bike Rental / Bike Share
Bike Shop
Bike Trail
Bistro
Board Shop
Boarding House
Boat or Ferry
Bookstore
Boutique
Bowling Alley
Boxing Gym
Brazilian Restaurant
Breakfast Spot
Brewery
Bridge
Bubble Tea Shop
Buffet
Building
Burger Joint
Burrito Place
Bus Line
Bus Station
Bus Stop
Business Service
Butcher
Cafeteria
Café
Cajun / Creole Restaurant
Campground
Candy Store
Cantonese Restaurant
Caribbean Restaurant
Carpet Store
Caucasian Restaurant
Cha Chaan Teng
Check Cashing Service
Cheese Shop
Chinese Resta

In [57]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,...,Vietnamese Restaurant,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Annadale,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arlington,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_grouped['Neighborhood']

for ind in np.arange(ny_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Supermarket,Deli / Bodega,Chinese Restaurant,Breakfast Spot,Used Auto Dealership,Spa,Fried Chicken Joint,Fast Food Restaurant,Bike Trail
1,Annadale,Pizza Place,American Restaurant,Bakery,Park,Train Station,Pharmacy,Liquor Store,Restaurant,Diner,Field
2,Arden Heights,Coffee Shop,Deli / Bodega,Lawyer,Bus Stop,Pizza Place,Business Service,Pharmacy,Women's Store,Ethiopian Restaurant,Event Service
3,Arlington,Intersection,Grocery Store,American Restaurant,Deli / Bodega,Bus Stop,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Service,Event Space
4,Arrochar,Bus Stop,Pizza Place,Deli / Bodega,Italian Restaurant,Bagel Shop,Sandwich Place,Athletics & Sports,Middle Eastern Restaurant,Pharmacy,Supermarket


#### K-means clustering

In [75]:
from sklearn.cluster import KMeans

kclusters = 5

ny_grouped_clustering = ny_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)
kmeans

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=0, tol=0.0001, verbose=0)

In [76]:
kmeans.labels_

array([4, 4, 0, 0, 0, 3, 3, 0, 3, 4, 3, 3, 4, 4, 3, 1, 4, 3, 4, 3, 3, 0,
       0, 4, 3, 3, 3, 3, 4, 2, 0, 3, 0, 4, 4, 3, 0, 4, 4, 3, 3, 4, 3, 3,
       3, 4, 4, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 3, 4, 3, 4, 3, 4, 4, 3,
       4, 1, 4, 4, 3, 4, 4, 3, 3, 3, 4, 4, 3, 4, 4, 3, 3, 4, 4, 3, 3, 3,
       0, 3, 4, 3, 4, 4, 3, 3, 4, 3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 4,
       3, 4, 0, 3, 3, 4, 3, 4, 4, 4, 3, 4, 3, 3, 3, 2, 4, 4, 4, 4, 3, 4,
       4, 4, 3, 4, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 4,
       3, 4, 3, 4, 3, 0, 3, 3, 4, 3, 3, 4, 4, 0, 0, 4, 4, 4, 3, 3, 3, 4,
       4, 3, 3, 4, 4, 4, 4, 4, 4, 3, 2, 0, 4, 0, 4, 4, 3, 0, 4, 3, 4, 4,
       3, 0, 4, 4, 0, 4, 3, 3, 3, 4, 4, 4, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4,
       4, 4, 3, 0, 3, 3, 4, 4, 4, 3, 4, 4, 3, 4, 3, 4, 3, 4, 4, 4, 3, 4,
       3, 4, 3, 3, 0, 3, 3, 3, 4, 0, 4, 3, 3, 4, 3, 4, 3, 4, 4, 3, 3, 4,
       3, 4, 3, 0, 3, 3, 0, 3, 3, 3, 3, 4, 4, 3, 3, 0, 0, 3, 4, 3, 4, 3,
       4, 3, 4, 4, 0, 3, 3, 4, 3, 4, 4, 0, 4, 3, 3]

In [77]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
ny_merged = ny_data
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
ny_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,4.0,Pharmacy,Ice Cream Shop,Donut Shop,Deli / Bodega,Sandwich Place,Dessert Shop,Laundromat,Women's Store,Falafel Restaurant,Entertainment Service
1,Bronx,Co-op City,40.874294,-73.829939,4.0,Bus Station,Park,Pharmacy,Fast Food Restaurant,Restaurant,Pizza Place,Discount Store,Bagel Shop,Grocery Store,Post Office
2,Bronx,Eastchester,40.887556,-73.827806,4.0,Bus Station,Caribbean Restaurant,Deli / Bodega,Diner,Food & Drink Shop,Automotive Shop,Donut Shop,Bus Stop,Juice Bar,Bowling Alley
3,Bronx,Fieldston,40.895437,-73.905643,3.0,Plaza,Bus Station,River,Women's Store,Farmers Market,Empanada Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
4,Bronx,Riverdale,40.890834,-73.912585,3.0,Park,Bus Station,Bank,Medical Supply Store,Gym,Plaza,Playground,Baseball Field,Empanada Restaurant,Entertainment Service


#### Removing NaN values

In [78]:
ny_merged = ny_merged.dropna(subset=['Cluster Labels'])

#### Creating a map using folium library

In [83]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighborhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Checking the clusters

In [85]:
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Woodlawn,Deli / Bodega,Pizza Place,Pub,Playground,Cosmetics Shop,Pharmacy,Food & Drink Shop,Food Truck,Liquor Store,Donut Shop
28,Throgs Neck,Bar,Italian Restaurant,Sports Bar,Coffee Shop,Asian Restaurant,Pizza Place,Mobile Phone Shop,Liquor Store,American Restaurant,Deli / Bodega
32,Van Nest,Pizza Place,Deli / Bodega,Hookah Bar,BBQ Joint,Coffee Shop,Spa,Supermarket,Shop & Service,Donut Shop,Board Shop
34,Belmont,Italian Restaurant,Pizza Place,Deli / Bodega,Dessert Shop,Bakery,Food & Drink Shop,Grocery Store,Liquor Store,Bar,Market
41,Olinville,Supermarket,Deli / Bodega,Caribbean Restaurant,Chinese Restaurant,Basketball Court,Food,Bus Station,Pizza Place,Laundromat,Farm
83,Marine Park,Chinese Restaurant,Pizza Place,Gym,Athletics & Sports,Coffee Shop,Basketball Court,Soccer Field,Baseball Field,Deli / Bodega,Fish & Chips Shop
89,Ocean Hill,Deli / Bodega,Fried Chicken Joint,Southern / Soul Food Restaurant,Bakery,Food,Supermarket,Mexican Restaurant,Dry Cleaner,Donut Shop,Pharmacy
144,Glendale,Brewery,Food & Drink Shop,Deli / Bodega,Pizza Place,Arts & Crafts Store,Women's Store,Fast Food Restaurant,Ethiopian Restaurant,Event Service,Event Space
150,Whitestone,Bubble Tea Shop,Dance Studio,Deli / Bodega,Candy Store,Women's Store,Fast Food Restaurant,Event Service,Event Space,Exhibit,Factory
156,Bellerose,Deli / Bodega,Pizza Place,Chinese Restaurant,Italian Restaurant,Grocery Store,Liquor Store,Bank,Donut Shop,Seafood Restaurant,Mobile Phone Shop


In [86]:
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Country Club,Sandwich Place,Flea Market,Athletics & Sports,Playground,Women's Store,Falafel Restaurant,Empanada Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service
303,Bayswater,Playground,Women's Store,Farmers Market,Empanada Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Factory


In [87]:
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
172,Breezy Point,Beach,Supermarket,Monument / Landmark,Trail,Bus Stop,Women's Store,Fast Food Restaurant,Event Service,Event Space,Exhibit
179,Neponsit,Beach,Women's Store,Fountain,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Factory,Falafel Restaurant
302,Hammels,Beach,Gym / Fitness Center,Diner,Building,Fried Chicken Joint,Bus Station,Bus Stop,Fast Food Restaurant,Shoe Store,Dog Run


In [88]:
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Fieldston,Plaza,Bus Station,River,Women's Store,Farmers Market,Empanada Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
4,Riverdale,Park,Bus Station,Bank,Medical Supply Store,Gym,Plaza,Playground,Baseball Field,Empanada Restaurant,Entertainment Service
9,Williamsbridge,Bar,Caribbean Restaurant,Nightclub,Playground,Soup Place,Farmers Market,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space
12,City Island,Seafood Restaurant,Deli / Bodega,Thrift / Vintage Store,Park,Baseball Field,Arts & Crafts Store,Grocery Store,Boat or Ferry,Bar,Bank
16,Fordham,Mobile Phone Shop,Plaza,Shoe Store,Gym,Clothing Store,Video Game Store,Diner,Supplement Shop,Liquor Store,Chinese Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
288,Roxbury,Baseball Field,Fast Food Restaurant,Irish Pub,Beach,Trail,Deli / Bodega,Ethiopian Restaurant,Event Service,Event Space,Exhibit
292,Lighthouse Hill,Art Museum,Italian Restaurant,Spa,Trail,Café,Fast Food Restaurant,Ethiopian Restaurant,Event Service,Event Space,Exhibit
294,Malba,Rock Club,Tennis Court,Rest Area,Women's Store,Empanada Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space,Exhibit
301,Hudson Yards,American Restaurant,Gym / Fitness Center,Hotel,Cycle Studio,Roof Deck,Residential Building (Apartment / Condo),Pedestrian Plaza,Park,Grocery Store,Salad Place


In [89]:
ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Wakefield,Pharmacy,Ice Cream Shop,Donut Shop,Deli / Bodega,Sandwich Place,Dessert Shop,Laundromat,Women's Store,Falafel Restaurant,Entertainment Service
1,Co-op City,Bus Station,Park,Pharmacy,Fast Food Restaurant,Restaurant,Pizza Place,Discount Store,Bagel Shop,Grocery Store,Post Office
2,Eastchester,Bus Station,Caribbean Restaurant,Deli / Bodega,Diner,Food & Drink Shop,Automotive Shop,Donut Shop,Bus Stop,Juice Bar,Bowling Alley
5,Kingsbridge,Pizza Place,Mexican Restaurant,Spanish Restaurant,Bakery,Latin American Restaurant,Ice Cream Shop,Coffee Shop,Supermarket,Candy Store,Caribbean Restaurant
6,Marble Hill,Gym,Discount Store,Sandwich Place,Coffee Shop,Yoga Studio,Pizza Place,Steakhouse,Shopping Mall,Seafood Restaurant,Pharmacy
...,...,...,...,...,...,...,...,...,...,...,...
297,Bronxdale,Breakfast Spot,Bank,Paper / Office Supplies Store,Mexican Restaurant,Pizza Place,Gym,Italian Restaurant,Supermarket,Chinese Restaurant,Spanish Restaurant
298,Allerton,Pizza Place,Supermarket,Deli / Bodega,Chinese Restaurant,Breakfast Spot,Used Auto Dealership,Spa,Fried Chicken Joint,Fast Food Restaurant,Bike Trail
299,Kingsbridge Heights,Pizza Place,Bus Station,Food Truck,Latin American Restaurant,Coffee Shop,Chinese Restaurant,Food,School,Lake,Grocery Store
300,Erasmus,Caribbean Restaurant,Grocery Store,Juice Bar,Bus Line,Music Venue,Food Truck,Mobile Phone Shop,Furniture / Home Store,Chinese Restaurant,School


#### We can do a more detailed analysis by choosing venues that are of interest for a Tourist(Turkish)

In [112]:
ny_interest = ny_grouped[['Neighborhood','Turkish Restaurant', 'Middle Eastern Restaurant', 'Rental Car Location',
                        'Tourist Information Center','Theme Park', 'Shopping Mall', 'Nightclub', 'Metro Station', 'Bar']]
ny_interest

Unnamed: 0,Neighborhood,Turkish Restaurant,Middle Eastern Restaurant,Rental Car Location,Tourist Information Center,Theme Park,Shopping Mall,Nightclub,Metro Station,Bar
0,Allerton,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
1,Annadale,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
2,Arden Heights,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
3,Arlington,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
4,Arrochar,0.0,0.047619,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
296,Woodhaven,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.038462,0.038462
297,Woodlawn,0.0,0.000000,0.034483,0.0,0.0,0.0,0.0,0.000000,0.034483
298,Woodrow,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
299,Woodside,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.066667


#### Let's look at each venue individually to get a better understanding of our data

In [115]:
ny_turkish = ny_grouped[['Neighborhood','Turkish Restaurant']]
ny_turkish = ny_turkish[ny_turkish['Turkish Restaurant'] != 0]
ny_turkish

Unnamed: 0,Neighborhood,Turkish Restaurant
9,Bath Beach,0.033333
245,Sheepshead Bay,0.12
264,Sunnyside Gardens,0.033333
274,Turtle Bay,0.033333
278,Upper West Side,0.033333


In [116]:
ny_middle = ny_grouped[['Neighborhood','Middle Eastern Restaurant']]
ny_middle = ny_middle[ny_middle['Middle Eastern Restaurant'] != 0]
ny_middle

Unnamed: 0,Neighborhood,Middle Eastern Restaurant
4,Arrochar,0.047619
6,Astoria,0.033333
11,Bay Ridge,0.033333
27,Boerum Hill,0.033333
60,Cobble Hill,0.033333
96,Flatbush,0.05
111,Glen Oaks,0.047619
145,Kew Gardens Hills,0.058824
150,Lenox Hill,0.033333
155,Little Neck,0.033333


In [117]:
ny_car = ny_grouped[['Neighborhood','Rental Car Location']]
ny_car = ny_car[ny_car['Rental Car Location'] != 0]
ny_car

Unnamed: 0,Neighborhood,Rental Car Location
76,East Elmhurst,0.090909
211,Pleasant Plains,0.052632
215,Port Richmond,0.166667
261,Steinway,0.130435
269,Tompkinsville,0.045455
297,Woodlawn,0.034483


In [118]:
ny_tourist = ny_grouped[['Neighborhood','Tourist Information Center']]
ny_tourist = ny_tourist[ny_tourist['Tourist Information Center'] != 0]
ny_tourist

Unnamed: 0,Neighborhood,Tourist Information Center
274,Turtle Bay,0.033333


In [119]:
ny_theme = ny_grouped[['Neighborhood','Theme Park']]
ny_theme = ny_theme[ny_theme['Theme Park'] != 0]
ny_theme

Unnamed: 0,Neighborhood,Theme Park
26,Bloomfield,0.25


In [120]:
ny_shop = ny_grouped[['Neighborhood','Shopping Mall']]
ny_shop = ny_shop[ny_shop['Shopping Mall'] != 0]
ny_shop

Unnamed: 0,Neighborhood,Shopping Mall
10,Battery Park City,0.033333
12,Bay Terrace,0.025641
13,Baychester,0.052632
64,Concourse Village,0.033333
73,Downtown,0.033333
109,Georgetown,0.034483
130,Hollis,0.153846
142,Jamaica Hills,0.05
149,Lefrak City,0.045455
166,Marble Hill,0.041667


In [121]:
ny_night = ny_grouped[['Neighborhood','Nightclub']]
ny_night = ny_night[ny_night['Nightclub'] != 0]
ny_night

Unnamed: 0,Neighborhood,Nightclub
33,Broadway Junction,0.0625
39,Bushwick,0.033333
41,Cambria Heights,0.076923
49,Chelsea,0.028571
98,Flatlands,0.055556
200,Ocean Parkway,0.05
291,Williamsbridge,0.2


In [122]:
ny_bar = ny_grouped[['Neighborhood','Bar']]
ny_bar = ny_bar[ny_bar['Bar'] != 0]
ny_bar

Unnamed: 0,Neighborhood,Bar
6,Astoria,0.033333
8,Auburndale,0.055556
10,Battery Park City,0.033333
11,Bay Ridge,0.033333
14,Bayside,0.033333
...,...,...
292,Williamsburg,0.033333
294,Windsor Terrace,0.040000
296,Woodhaven,0.038462
297,Woodlawn,0.034483


In [123]:
ny_metro = ny_grouped[['Neighborhood','Metro Station']]
ny_metro = ny_metro[ny_metro['Metro Station'] != 0]
ny_metro

Unnamed: 0,Neighborhood,Metro Station
5,Arverne,0.1
63,Concourse,0.04
69,Cypress Hills,0.033333
79,East New York,0.071429
85,Edgemere,0.133333
93,Far Rockaway,0.033333
128,Highland Park,0.066667
170,Melrose,0.035714
181,Morrisania,0.041667
190,New Lots,0.052632


### We will calculate a score for each neighborhood depending on their values in the selected venues. To be able to calculate this score, we first need to normalize each columns to be fairer to each venue. Note that some of these venues are rarer than others,which results in low mean values in neighborhood. The normalization should give enough recognition to these rare venues as well.

In [125]:
cols_to_norm = ['Turkish Restaurant', 'Middle Eastern Restaurant', 'Rental Car Location',
                        'Tourist Information Center','Theme Park', 'Shopping Mall', 'Nightclub', 'Metro Station', 'Bar']
ny_interest[cols_to_norm] = ny_interest[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
ny_interest

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,Neighborhood,Turkish Restaurant,Middle Eastern Restaurant,Rental Car Location,Tourist Information Center,Theme Park,Shopping Mall,Nightclub,Metro Station,Bar
0,Allerton,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
1,Annadale,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
2,Arden Heights,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
3,Arlington,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
4,Arrochar,0.0,0.809524,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
296,Woodhaven,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.288462,0.153846
297,Woodlawn,0.0,0.000000,0.206897,0.0,0.0,0.0,0.0,0.000000,0.137931
298,Woodrow,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000
299,Woodside,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.266667


#### Sanity check to see that the columns are normalized

In [126]:
ny_interest['Turkish Restaurant'].max()

1.0

### Now we are going to calculate the score of each neighborhood by summing up their normalized venue scores. This is going to give every category in our list an equal weight. It is hard to evaluate the relative importance of each category in the list, so we will stick to equal weight. Depending on the individuals, these weights can be modified.

In [130]:
scores = ny_interest[cols_to_norm]
ny_interest["Score"] = scores.sum(axis=1)
ny_interest

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Neighborhood,Turkish Restaurant,Middle Eastern Restaurant,Rental Car Location,Tourist Information Center,Theme Park,Shopping Mall,Nightclub,Metro Station,Bar,Score
0,Allerton,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
1,Annadale,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
2,Arden Heights,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
3,Arlington,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
4,Arrochar,0.0,0.809524,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.809524
...,...,...,...,...,...,...,...,...,...,...,...
296,Woodhaven,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.288462,0.153846,0.442308
297,Woodlawn,0.0,0.000000,0.206897,0.0,0.0,0.0,0.0,0.000000,0.137931,0.344828
298,Woodrow,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
299,Woodside,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.266667,0.266667


#### Sorting based on our score criterion

In [176]:
score_ny = ny_interest[["Neighborhood","Score"]]
score_ny.sort_values(by="Score", ascending=False)

Unnamed: 0,Neighborhood,Score
291,Williamsbridge,1.800000
215,Port Richmond,1.666667
274,Turtle Bay,1.277778
264,Sunnyside Gardens,1.244444
245,Sheepshead Bay,1.000000
...,...,...
136,Hunters Point,0.000000
137,Hunts Point,0.000000
139,Jackson Heights,0.000000
140,Jamaica Center,0.000000


#### Combining our scores with the location data

In [172]:
combined = pd.merge(score_ny, ny_data, on='Neighborhood')
combined

Unnamed: 0,Neighborhood,Score,Borough,Latitude,Longitude
0,Allerton,0.000000,Bronx,40.865788,-73.859319
1,Annadale,0.000000,Staten Island,40.538114,-74.178549
2,Arden Heights,0.000000,Staten Island,40.549286,-74.185887
3,Arlington,0.000000,Staten Island,40.635325,-74.165104
4,Arrochar,0.809524,Staten Island,40.596313,-74.067124
...,...,...,...,...,...
300,Woodhaven,0.442308,Queens,40.689887,-73.858110
301,Woodlawn,0.344828,Bronx,40.898273,-73.867315
302,Woodrow,0.000000,Staten Island,40.541968,-74.205246
303,Woodside,0.266667,Queens,40.746349,-73.901842


### Drawing a new folium map based on our score. The redder the markers, the more atractive the locations are for Turkish tourists. We see that the Brooklyn area is possibly more interesting for Turkish tourists.

In [175]:
map_ny = folium.Map(location=[latitude, longitude], zoom_start=10)

for latitude, longitude, borough, neighbourhood, score in zip(combined['Latitude'], combined['Longitude'], combined['Borough'], combined['Neighborhood'], combined['Score']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    if score >= 1.5:
        the_color = 'darkred'
    elif score >= 1:
        the_color = 'red'
    elif score >= 0.75:
        the_color = 'lightred'
    elif score >= 0.5:
        the_color = 'orange'
    elif score >= 0.25:
        the_color = 'yellow'
    else:
        the_color = 'white'
         
    folium.CircleMarker(
            [latitude, longitude],
            radius=5,
            popup=label,
            color=the_color,
            fill=True
            ).add_to(map_ny)  
 
map_ny

### More colored dots around the Brooklyn area suggests that Turkish tourists may enjoy this part of NY more.