# 1. Import Modules

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


Now instead of looking for the user's current location for simplicity we shall explore two boroughs of New York first and look for the hotels in those areas. 

# 2. Getting json data of New York

In [2]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

# 3. Creating the dataframe

In [3]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [4]:
neighborhoods_data = newyork_data['features']
for data in neighborhoods_data:
    borough = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


We want our neighborhoods to be as far as possible for the reasons of comparison. So we shall plot the neighborhoods on the map of New York.

# 4. Map of New York and mark the neighborhoods

In [5]:
address = 'New York City, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of New York City are 40.7308619, -73.9871558.


In [6]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

Let's take **Staten Island** and **Manhattan** as our target Boroughs.

In [7]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


In [8]:
staten_data = neighborhoods[neighborhoods['Borough'] == 'Staten Island'].reset_index(drop=True)
staten_data.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Staten Island,St. George,40.644982,-74.079353
1,Staten Island,New Brighton,40.640615,-74.087017
2,Staten Island,Stapleton,40.626928,-74.077902
3,Staten Island,Rosebank,40.615305,-74.069805
4,Staten Island,West Brighton,40.631879,-74.107182
5,Staten Island,Grymes Hill,40.624185,-74.087248
6,Staten Island,Todt Hill,40.597069,-74.111329
7,Staten Island,South Beach,40.580247,-74.079553
8,Staten Island,Port Richmond,40.633669,-74.129434
9,Staten Island,Mariner's Harbor,40.632546,-74.150085


# 5. Explore the neighborhoods in those two boroughs one by one

In [9]:
CLIENT_ID = 'J5D0K1WE2SCLGSCHLQQNNJSCW0VTWEPQ3SOCO5FTQ00U3NMU' # your Foursquare ID
CLIENT_SECRET = 'B3S2Q3MWGMBYO4ASTYTTGDIZMZRISSTXZF1WTPZBGJJVZX14' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 50

In [10]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [11]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500, LIMIT = 100):
    
    venues_list=[]
    dist_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng,
            v['venue']['id'],
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['location']['distance'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue Id',  
                  'Venue Name',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Distance',
                  'Venue Category']
    
    return(nearby_venues)

### Explore the neighborhoods of Manhattan!!!

In [12]:
manhattan_venues = getNearbyVenues(names = manhattan_data['Neighborhood'], 
                                   latitudes = manhattan_data['Latitude'], 
                                   longitudes = manhattan_data['Longitude'])

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [13]:
manhattan_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,Marble Hill,40.876551,-73.91066,4b4429abf964a52037f225e3,Arturo's,40.874412,-73.910271,240,Pizza Place
1,Marble Hill,40.876551,-73.91066,4baf59e8f964a520a6f93be3,Bikram Yoga,40.876844,-73.906204,376,Yoga Studio
2,Marble Hill,40.876551,-73.91066,4b79cc46f964a520c5122fe3,Tibbett Diner,40.880404,-73.908937,452,Diner
3,Marble Hill,40.876551,-73.91066,4bb114c4f964a520b9783ce3,Sam's Pizza,40.879435,-73.905859,516,Pizza Place
4,Marble Hill,40.876551,-73.91066,4d9a37d4d97ba1430a07346b,Loeser's Delicatessen,40.879242,-73.905471,529,Sandwich Place


## Filter out only those venues that are Hotels

In [14]:
manhattan_hotels = manhattan_venues[manhattan_venues['Venue Category'] == 'Hotel'].reset_index(drop=True)
manhattan_hotels.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,Chinatown,40.715618,-73.994279,578692f4498e1054905dbde7,Hotel 50 Bowery NYC,40.715936,-73.996789,214,Hotel
1,Chinatown,40.715618,-73.994279,4ac7e4a7f964a52065ba20e3,Crosby Street Hotel,40.723035,-73.997416,866,Hotel
2,Manhattanville,40.816934,-73.957385,4c87b045d8086dcb7337a452,Aloft Harlem,40.8091,-73.951505,1003,Hotel
3,Central Harlem,40.815976,-73.943211,4c87b045d8086dcb7337a452,Aloft Harlem,40.8091,-73.951505,1036,Hotel
4,Upper East Side,40.775639,-73.960508,4b37853af964a520c54125e3,The Mark Hotel,40.775201,-73.963351,244,Hotel


In [15]:
columns_names = ['Neighborhood Name', 
                  'Venue Name',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Like Counter',
                  'Disliked',
                  'Tip Counter',
                  'Rating']
manhattan_hotels_data = pd.DataFrame(columns=columns_names)
manhattan_hotels_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating


###  Fetching data about the hotels

In [16]:
for v_id,name,v_name,lat,lon in zip(manhattan_hotels['Venue Id'],manhattan_hotels['Neighborhood'],manhattan_hotels['Venue Name'],manhattan_hotels['Venue Latitude'],manhattan_hotels['Venue Longitude']):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        v_id, 
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION)
    
    # make the GET request
    results = requests.get(url).json()['response']['venue']
    try :
        r = results['rating']
    except :
        r = 'Not Rated yet'
    
    manhattan_hotels_data = manhattan_hotels_data.append({'Neighborhood Name' : name, 
                                          'Venue Name':v_name,
                                          'Venue Latitude':lat, 
                                          'Venue Longitude':lon, 
                                          'Like Counter':results['likes']['count'],
                                          'Disliked':results['dislike'],
                                          'Tip Counter':results['tips']['count'],
                                          'Rating' : r}, ignore_index=True)

## Hotel Data

In [17]:
manhattan_hotels_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,66,False,8,8.9
1,Chinatown,Crosby Street Hotel,40.723035,-73.997416,378,False,134,9.3
2,Manhattanville,Aloft Harlem,40.8091,-73.951505,61,False,36,8.1
3,Central Harlem,Aloft Harlem,40.8091,-73.951505,61,False,36,8.1
4,Upper East Side,The Mark Hotel,40.775201,-73.963351,87,False,30,9.0
5,Upper East Side,The Carlyle,40.774413,-73.963301,149,False,59,8.9
6,Lenox Hill,The Carlyle,40.774413,-73.963301,149,False,59,8.9
7,Lenox Hill,The Mark Hotel,40.775201,-73.963351,87,False,30,9.0
8,Lenox Hill,Loews Regency Hotel,40.76455,-73.969218,132,False,25,8.8
9,Lincoln Square,Mandarin Oriental,40.768987,-73.983017,307,False,87,9.4


# User Interface

In [18]:
user_sees = manhattan_hotels.drop(['Neighborhood Latitude','Neighborhood Longitude','Venue Id','Venue Latitude','Venue Longitude','Venue Category'],axis = 1)
user_sees.columns = ['Neighborhood','Hotel Name','Distance']
user_sees['Rating'] = manhattan_hotels_data['Rating']
user_sees

Unnamed: 0,Neighborhood,Hotel Name,Distance,Rating
0,Chinatown,Hotel 50 Bowery NYC,214,8.9
1,Chinatown,Crosby Street Hotel,866,9.3
2,Manhattanville,Aloft Harlem,1003,8.1
3,Central Harlem,Aloft Harlem,1036,8.1
4,Upper East Side,The Mark Hotel,244,9.0
5,Upper East Side,The Carlyle,272,8.9
6,Lenox Hill,The Carlyle,795,8.9
7,Lenox Hill,The Mark Hotel,875,9.0
8,Lenox Hill,Loews Regency Hotel,959,8.8
9,Lincoln Square,Mandarin Oriental,542,9.4


### Explore the neighborhoods of Staten Islands!!!

In [19]:
staten_venues = getNearbyVenues(names = staten_data['Neighborhood'], 
                                   latitudes = staten_data['Latitude'], 
                                   longitudes = staten_data['Longitude'])
staten_venues.head()

St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill
Todt Hill
South Beach
Port Richmond
Mariner's Harbor
Port Ivory
Castleton Corners
New Springville
Travis
New Dorp
Oakwood
Great Kills
Eltingville
Annadale
Woodrow
Tottenville
Tompkinsville
Silver Lake
Sunnyside
Park Hill
Westerleigh
Graniteville
Arlington
Arrochar
Grasmere
Old Town
Dongan Hills
Midland Beach
Grant City
New Dorp Beach
Bay Terrace
Huguenot
Pleasant Plains
Butler Manor
Charleston
Rossville
Arden Heights
Greenridge
Heartland Village
Chelsea
Bloomfield
Bulls Head
Richmond Town
Shore Acres
Clifton
Concord
Emerson Hill
Randall Manor
Howland Hook
Elm Park
Manor Heights
Willowbrook
Sandy Ground
Egbertville
Prince's Bay
Lighthouse Hill
Richmond Valley
Fox Hills


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,St. George,40.644982,-74.079353,4bf9c5c08d30d13a6bce0218,Staten Island September 11 Memorial,40.646767,-74.07651,311,Monument / Landmark
1,St. George,40.644982,-74.079353,4a214841f964a520cd7c1fe3,Beso,40.643306,-74.076508,304,Tapas Restaurant
2,St. George,40.644982,-74.079353,4e62c75a483bd9a9747d8cd8,Richmond County Bank Ballpark,40.645056,-74.076864,210,Baseball Stadium
3,St. George,40.644982,-74.079353,4a271f0cf964a5205c911fe3,Enoteca Maria,40.641941,-74.07732,379,Italian Restaurant
4,St. George,40.644982,-74.079353,4b6da712f964a52080832ce3,St. George Theatre,40.642253,-74.077496,341,Theater


In [20]:
staten_hotels = staten_venues[staten_venues['Venue Category'] == 'Hotel'].reset_index(drop=True)
staten_hotels

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,Travis,40.586314,-74.190737,59fe09012079557b03858a2d,Staten Island New York Hotel,40.586781,-74.190973,55,Hotel
1,Travis,40.586314,-74.190737,4c0055bfad15a5931dbe8d73,Comfort Inn,40.586191,-74.190216,46,Hotel
2,Arrochar,40.596313,-74.067124,4c96c79482b56dcbd0bde4aa,Staten Island Navy Lodge,40.598734,-74.062288,489,Hotel
3,Grasmere,40.598268,-74.076674,4c96c79482b56dcbd0bde4aa,Staten Island Navy Lodge,40.598734,-74.062288,1217,Hotel
4,Pleasant Plains,40.524699,-74.219831,4bc41a1ddce4eee165f0719d,West Shore Motor Lodge,40.532809,-74.225245,1012,Hotel
5,Charleston,40.530531,-74.232158,4bc41a1ddce4eee165f0719d,West Shore Motor Lodge,40.532809,-74.225245,637,Hotel
6,Chelsea,40.594726,-74.18956,4c0055bfad15a5931dbe8d73,Comfort Inn,40.586191,-74.190216,951,Hotel
7,Bloomfield,40.605779,-74.187256,4ba43a73f964a520f08d38e3,Hilton Garden Inn Staten Island,40.614832,-74.176646,1348,Hotel
8,Bloomfield,40.605779,-74.187256,4d9a4ae41829a0938baebf68,Nicotra's Ballroom,40.614842,-74.1761,1380,Hotel
9,Bloomfield,40.605779,-74.187256,4bbe9fe8b083a593ba63a2e9,Hampton Inn & Suites Staten Island,40.613095,-74.178888,1078,Hotel


In [21]:
columns_names = ['Neighborhood Name', 
                  'Venue Name',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Like Counter',
                  'Disliked',
                  'Tip Counter',
                  'Rating']
staten_hotel_data = pd.DataFrame(columns=columns_names)
staten_hotel_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating


## Fetching data about the hotels of Staten Islands

In [22]:
for v_id,name,v_name,lat,lon in zip(staten_hotels['Venue Id'],staten_hotels['Neighborhood'],staten_hotels['Venue Name'],staten_hotels['Venue Latitude'],staten_hotels['Venue Longitude']):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        v_id, 
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION)
    
    # make the GET request
    results = requests.get(url).json()['response']['venue']
    try :
        r = results['rating']
    except :
        r = 'Not Rated yet'
    
    staten_hotel_data = staten_hotel_data.append({'Neighborhood Name' : name, 
                                          'Venue Name':v_name,
                                          'Venue Latitude':lat, 
                                          'Venue Longitude':lon, 
                                          'Like Counter':results['likes']['count'],
                                          'Disliked':results['dislike'],
                                          'Tip Counter':results['tips']['count'],
                                          'Rating' : r}, ignore_index=True)

## Hotel Data

In [23]:
staten_hotel_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating
0,Travis,Staten Island New York Hotel,40.586781,-74.190973,0,False,0,Not Rated yet
1,Travis,Comfort Inn,40.586191,-74.190216,4,False,11,4.8
2,Arrochar,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9
3,Grasmere,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9
4,Pleasant Plains,West Shore Motor Lodge,40.532809,-74.225245,0,False,3,Not Rated yet
5,Charleston,West Shore Motor Lodge,40.532809,-74.225245,0,False,3,Not Rated yet
6,Chelsea,Comfort Inn,40.586191,-74.190216,4,False,11,4.8
7,Bloomfield,Hilton Garden Inn Staten Island,40.614832,-74.176646,64,False,19,8.1
8,Bloomfield,Nicotra's Ballroom,40.614842,-74.1761,14,False,4,7.2
9,Bloomfield,Hampton Inn & Suites Staten Island,40.613095,-74.178888,18,False,9,6.6


# User Interface

In [24]:
user_sees = staten_hotels.drop(['Neighborhood Latitude','Neighborhood Longitude','Venue Id','Venue Latitude','Venue Longitude','Venue Category'],axis = 1)
user_sees.columns = ['Neighborhood','Hotel Name','Distance']
user_sees['Rating'] = staten_hotel_data['Rating']
user_sees

Unnamed: 0,Neighborhood,Hotel Name,Distance,Rating
0,Travis,Staten Island New York Hotel,55,Not Rated yet
1,Travis,Comfort Inn,46,4.8
2,Arrochar,Staten Island Navy Lodge,489,5.9
3,Grasmere,Staten Island Navy Lodge,1217,5.9
4,Pleasant Plains,West Shore Motor Lodge,1012,Not Rated yet
5,Charleston,West Shore Motor Lodge,637,Not Rated yet
6,Chelsea,Comfort Inn,951,4.8
7,Bloomfield,Hilton Garden Inn Staten Island,1348,8.1
8,Bloomfield,Nicotra's Ballroom,1380,7.2
9,Bloomfield,Hampton Inn & Suites Staten Island,1078,6.6


# So we can straightaway tell that Manhattan has many more good hotels than Staten Island has.

## Just out of curiosity we would like to Cluster hotels with similar statistics.

### Only take the necessary columns from the dataframe containing the hotel data of Manhattan.

In [25]:
manhattan_hotels_cluster = manhattan_hotels_data.drop(['Neighborhood Name','Venue Name', 'Venue Latitude', 'Venue Longitude', 'Disliked'], axis = 1)

In [26]:
manhattan_hotels_cluster

Unnamed: 0,Like Counter,Tip Counter,Rating
0,66,8,8.9
1,378,134,9.3
2,61,36,8.1
3,61,36,8.1
4,87,30,9.0
5,149,59,8.9
6,149,59,8.9
7,87,30,9.0
8,132,25,8.8
9,307,87,9.4


# 6. Clustering the hotels of Manhattan!!

In [27]:
# set number of clusters
kclusters = 3

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_hotels_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 1, 2, 0, 2, 0, 0,
       2, 1, 1, 2, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0,
       0, 2, 0, 0, 2, 0, 0, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 2, 0, 0,
       0], dtype=int32)

In [28]:
manhattan_hotels_data['Cluster Labels'] = kmeans.labels_

In [29]:
manhattan_hotels_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,66,False,8,8.9,0
1,Chinatown,Crosby Street Hotel,40.723035,-73.997416,378,False,134,9.3,2
2,Manhattanville,Aloft Harlem,40.8091,-73.951505,61,False,36,8.1,0
3,Central Harlem,Aloft Harlem,40.8091,-73.951505,61,False,36,8.1,0
4,Upper East Side,The Mark Hotel,40.775201,-73.963351,87,False,30,9.0,0
5,Upper East Side,The Carlyle,40.774413,-73.963301,149,False,59,8.9,0
6,Lenox Hill,The Carlyle,40.774413,-73.963301,149,False,59,8.9,0
7,Lenox Hill,The Mark Hotel,40.775201,-73.963351,87,False,30,9.0,0
8,Lenox Hill,Loews Regency Hotel,40.76455,-73.969218,132,False,25,8.8,0
9,Lincoln Square,Mandarin Oriental,40.768987,-73.983017,307,False,87,9.4,2


# Let's see the hotels in the map of Manhattan

In [30]:
address = 'Manhattan, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, nei, ven, cluster in zip(manhattan_hotels_data['Venue Latitude'], manhattan_hotels_data['Venue Longitude'], manhattan_hotels_data['Neighborhood Name'], manhattan_hotels_data['Venue Name'], manhattan_hotels_data['Cluster Labels']):
    label = folium.Popup(str(ven) + ',' + str(nei) + ',' + ' Cluster = ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Let's analyze our clusters of hotels

## Cluster 1

In [32]:
manhattan_hotels_data.loc[manhattan_hotels_data['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,66,False,8,8.9,0
2,Manhattanville,Aloft Harlem,40.8091,-73.951505,61,False,36,8.1,0
3,Central Harlem,Aloft Harlem,40.8091,-73.951505,61,False,36,8.1,0
4,Upper East Side,The Mark Hotel,40.775201,-73.963351,87,False,30,9.0,0
5,Upper East Side,The Carlyle,40.774413,-73.963301,149,False,59,8.9,0
6,Lenox Hill,The Carlyle,40.774413,-73.963301,149,False,59,8.9,0
7,Lenox Hill,The Mark Hotel,40.775201,-73.963351,87,False,30,9.0,0
8,Lenox Hill,Loews Regency Hotel,40.76455,-73.969218,132,False,25,8.8,0
10,Clinton,Kimpton Ink48 Hotel,40.764505,-73.995987,190,False,91,8.9,0
11,Clinton,Hampton Inn Manhattan/Times Square Central,40.755607,-73.988663,97,False,13,9.1,0


## Cluster 2

In [33]:
manhattan_hotels_data.loc[manhattan_hotels_data['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
12,Clinton,W New York - Times Square,40.759356,-73.98569,980,False,221,9.1,1
16,Midtown,W New York - Times Square,40.759356,-73.98569,980,False,221,9.1,1
23,Chelsea,Soho House,40.740563,-74.005902,1099,False,198,9.4,1
24,Chelsea,"The Standard, High Line",40.740966,-74.008021,1058,False,255,9.3,1
33,West Village,Soho House,40.740563,-74.005902,1099,False,198,9.4,1
34,West Village,"The Standard, High Line",40.740966,-74.008021,1058,False,255,9.3,1
56,Sutton Place,The Plaza Hotel,40.764519,-73.974488,1352,False,229,9.5,1


## Cluster 3

In [34]:
manhattan_hotels_data.loc[manhattan_hotels_data['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
1,Chinatown,Crosby Street Hotel,40.723035,-73.997416,378,False,134,9.3,2
9,Lincoln Square,Mandarin Oriental,40.768987,-73.983017,307,False,87,9.4,2
17,Midtown,Lotte New York Palace,40.758082,-73.975235,447,False,104,9.1,2
19,Midtown,citizenM Hotel New York Times Square,40.761691,-73.984953,309,False,80,9.2,2
22,Murray Hill,The NoMad Hotel,40.744981,-73.988819,349,False,90,9.5,2
25,Greenwich Village,Crosby Street Hotel,40.723035,-73.997416,378,False,134,9.3,2
26,Greenwich Village,The Bowery Hotel,40.726186,-73.991611,493,False,90,9.4,2
27,East Village,The Bowery Hotel,40.726186,-73.991611,493,False,90,9.4,2
29,Lower East Side,PUBLIC,40.722923,-73.991717,302,False,33,9.0,2
31,Little Italy,Crosby Street Hotel,40.723035,-73.997416,378,False,134,9.3,2


# Detailed Analysis :
|Cluster Labels|Analysis|
|:------------:|:------|
|0|The number of ***'Likes'*** and ***'Tips'*** are high but not as high as those in Cluster 2. The ratings of the hotels are mixed but they are mostly high.|
|1|The number of ***'Likes'*** and ***'Tips'*** are comparatively much lower than the hotels in the other clusters. The ratings are moderate and not as good as the hotels in the other clusters.|
|2|These hotels have a very high number of ***'Likes'*** and ***'Tips'***. Although the ratings of any of the hotels are not out of the charts they are really high.|

### Only take the necessary columns from the dataframe containing the hotel data of Staten Island. Remove the hotels that has not been rated.

In [35]:
staten_hotel_data1 = staten_hotel_data[staten_hotel_data['Rating'] != 'Not Rated yet']

In [36]:
staten_hotel_cluster = staten_hotel_data1.drop(['Neighborhood Name','Venue Name', 'Venue Latitude', 'Venue Longitude', 'Disliked'], axis = 1)

In [37]:
staten_hotel_cluster

Unnamed: 0,Like Counter,Tip Counter,Rating
1,4,11,4.8
2,7,4,5.9
3,7,4,5.9
6,4,11,4.8
7,64,19,8.1
8,14,4,7.2
9,18,9,6.6
10,3,3,5.5
11,7,4,5.9
12,3,3,5.5


# 6. Clustering the hotels of Staten Island!!

In [38]:
# set number of clusters
kclusters = 3

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(staten_hotel_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([2, 2, 2, 2, 1, 0, 0, 2, 2, 2], dtype=int32)

In [47]:
staten_hotel_data1['Cluster Labels'] = kmeans.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [40]:
staten_hotel_data1

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
1,Travis,Comfort Inn,40.586191,-74.190216,4,False,11,4.8,2
2,Arrochar,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9,2
3,Grasmere,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9,2
6,Chelsea,Comfort Inn,40.586191,-74.190216,4,False,11,4.8,2
7,Bloomfield,Hilton Garden Inn Staten Island,40.614832,-74.176646,64,False,19,8.1,1
8,Bloomfield,Nicotra's Ballroom,40.614842,-74.1761,14,False,4,7.2,0
9,Bloomfield,Hampton Inn & Suites Staten Island,40.613095,-74.178888,18,False,9,6.6,0
10,Bulls Head,Ramada Staten Island,40.608577,-74.146589,3,False,3,5.5,2
11,Shore Acres,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9,2
12,Willowbrook,Ramada Staten Island,40.608577,-74.146589,3,False,3,5.5,2


In [41]:
address = 'Staten Island, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Manhattan are 40.5834557, -74.1496048.


In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, nei, ven, cluster in zip(staten_hotel_data1['Venue Latitude'], staten_hotel_data1['Venue Longitude'], staten_hotel_data1['Neighborhood Name'], staten_hotel_data1['Venue Name'], staten_hotel_data1['Cluster Labels']):
    label = folium.Popup(str(ven) + ',' + str(nei) + ',' + ' Cluster = ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Cluster 1

In [43]:
staten_hotel_data1.loc[staten_hotel_data1['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
8,Bloomfield,Nicotra's Ballroom,40.614842,-74.1761,14,False,4,7.2,0
9,Bloomfield,Hampton Inn & Suites Staten Island,40.613095,-74.178888,18,False,9,6.6,0


## Cluster 2

In [44]:
staten_hotel_data1.loc[staten_hotel_data1['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
7,Bloomfield,Hilton Garden Inn Staten Island,40.614832,-74.176646,64,False,19,8.1,1


## Cluster 3

In [45]:
staten_hotel_data1.loc[staten_hotel_data1['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
1,Travis,Comfort Inn,40.586191,-74.190216,4,False,11,4.8,2
2,Arrochar,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9,2
3,Grasmere,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9,2
6,Chelsea,Comfort Inn,40.586191,-74.190216,4,False,11,4.8,2
10,Bulls Head,Ramada Staten Island,40.608577,-74.146589,3,False,3,5.5,2
11,Shore Acres,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,5.9,2
12,Willowbrook,Ramada Staten Island,40.608577,-74.146589,3,False,3,5.5,2


# Detailed Analysis :
|Cluster Labels|Analysis|
|:------------:|:------|
|0|These hotels have a higher number of ***'Likes'*** and ***'Tips'*** than other clusters. Although the ratings of any of the hotels are not out of the charts they are really high.|
|1|The number of ***'Likes'*** and ***'Tips'*** are comparatively much lower than the hotels in the other clusters. The ratings are moderate and not as good as the hotels in the other clusters.|
|2|The number of ***'Likes'*** and ***'Tips'*** are high but not as high as those in Cluster 0. The ratings of the hotels are mixed but they are mostly high.|