# Recommend Hotels in Manhattan, NY for Visitors

## Step 0: Download all the libraries needed in this project

In [1]:
import numpy as np 
import pandas as pd 
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
from   folium.plugins import MarkerCluster

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

#import k-means from clustering stage
from sklearn.cluster import KMeans

#pd.set_option("display.max_rows", None, "display.max_columns", None)


## Step 1: Let's download and explore [NYC neighbourhood geography data]("https://geo.nyu.edu/catalog/nyu_2451_34572") 

In [2]:
with open('ny-geojson.json') as json_data:
    ny_geometry = json.load(json_data)
#ny_geometry

##### It looks we the "features" data is what we wanted.

In [3]:
nbh_geometry = ny_geometry['features']
#nbh_geometry[0]

##### Transform nbh_geometry into a pandas dataframe

In [4]:
# Instantiate the dataframe
ny_nbhs = pd.DataFrame(columns=['Borough', 'Neighborhood', 'Latitude', 'Longitude'])

# Go through the data and fill the dataframe one row at a time.
for data in nbh_geometry:
    borough = data['properties']['borough'] 
    nbh_name = data['properties']['name']
        
    nbh_coordinates = data['geometry']['coordinates']
    nbh_latitude = nbh_coordinates[1]
    nbh_lontitude = nbh_coordinates[0]
    
    ny_nbhs = ny_nbhs.append({'Borough': borough,
                              'Neighborhood': nbh_name,
                              'Latitude': nbh_latitude,
                              'Longitude': nbh_lontitude}, ignore_index=True)
print(ny_nbhs.shape)
ny_nbhs.head(10)

(306, 4)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [5]:
# Let's see how many boroughs and neighborhoods in our data
print(" There are {} Boroughs and {} Neighborhoods in New York city. ".format(len(ny_nbhs['Borough'].unique()), ny_nbhs.shape[0]))


 There are 5 Boroughs and 306 Neighborhoods in New York city. 


#####  Here we choose Manhattan to explore in this project as an example

In [6]:
borough='Manhattan'
Brgh_df = ny_nbhs[ny_nbhs['Borough']==borough].reset_index(drop=True)
Brgh_df.drop(['Borough'], axis=1, inplace = True)
print(" There are {} Neighborhoods in {}. ".format(Brgh_df.shape[0],borough))
Brgh_df.head(10)

 There are 40 Neighborhoods in Manhattan. 


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Marble Hill,40.876551,-73.91066
1,Chinatown,40.715618,-73.994279
2,Washington Heights,40.851903,-73.9369
3,Inwood,40.867684,-73.92121
4,Hamilton Heights,40.823604,-73.949688
5,Manhattanville,40.816934,-73.957385
6,Central Harlem,40.815976,-73.943211
7,East Harlem,40.792249,-73.944182
8,Upper East Side,40.775639,-73.960508
9,Yorkville,40.77593,-73.947118


In [7]:
geolocator = Nominatim(user_agent="nyc_agent")
Manh_loc = geolocator.geocode('Manhattan, NY')
# create a map of Manhattan
manh_map = folium.Map(location=[Manh_loc.latitude,Manh_loc.longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(Brgh_df['Latitude'], Brgh_df['Longitude'], Brgh_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#66a3ff',
        fill=True,
        fill_color='#0066ff',
        fill_opacity=0.7,
        parse_html=False).add_to(manh_map)  
    
manh_map

## Step 2:  Let's leverage the Foursquare API to explore the selected neighborhoods and segment them.

##### Step 2.1  Set up the URL for Foursquare request

In [8]:
LIMIT = 500
radius = 1000

USER_ID = 'VBJRAO5IMFVHJ15014BHKY1WXSLLA1JVQ0L0HQGMAIF4PQLH' 
USER_SECRET = 'GFNHX055SNPUJUVPMUENX3JQKZ5MGXQ5TWQKBHEROMXTR5L3' 
VERSION = '20180605' 


##### Step 2.2  Define functions to extract the category of the venue and venues

In [9]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [10]:
def get_Nbh_Venues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):        
        
        #create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            USER_ID, 
            USER_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)            
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Venue', 'Latitude', 'Longitude', 'Category']
    
    return(nearby_venues)

##### Step 2.3  Explore venues in burough Manhattan 

In [11]:
# mnhtn_venues stands for manhattan venues
mnhtn_venues = get_Nbh_Venues(names=Brgh_df['Neighborhood'],
                            latitudes=Brgh_df['Latitude'],
                            longitudes=Brgh_df['Longitude'])

print(mnhtn_venues.shape)
mnhtn_venues.head(6)

(3180, 5)


Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Marble Hill,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,Starbucks,40.877531,-73.905582,Coffee Shop
4,Marble Hill,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym
5,Marble Hill,Dunkin',40.877136,-73.906666,Donut Shop


In [12]:
#print(mnhtn_venues['Category'].unique())

In [13]:
#extract all restaurant venues and saved as rst_df
rst_df = mnhtn_venues[mnhtn_venues['Category'].str.contains('Restaurant')].reset_index(drop=True)
rst_df['Regional_Style'] = rst_df['Category']
rst_df

Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category,Regional_Style
0,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant,Seafood Restaurant
1,Marble Hill,Boston Market,40.877430,-73.905412,American Restaurant,American Restaurant
2,Chinatown,Kiki's,40.714476,-73.992036,Greek Restaurant,Greek Restaurant
3,Chinatown,Spicy Village,40.717010,-73.993530,Chinese Restaurant,Chinese Restaurant
4,Chinatown,Wah Fung Number 1 Fast Food 華豐快飯店,40.717278,-73.994177,Chinese Restaurant,Chinese Restaurant
...,...,...,...,...,...,...
883,Hudson Yards,Il Punto Ristorante,40.756079,-73.994594,Italian Restaurant,Italian Restaurant
884,Hudson Yards,Spanish Diner,40.752394,-74.001491,Spanish Restaurant,Spanish Restaurant
885,Hudson Yards,Treadwell,40.759964,-73.996284,Restaurant,Restaurant
886,Hudson Yards,EDEN Local,40.759909,-73.996301,Restaurant,Restaurant


In [14]:
# Let's see how many restaurant categories in this dataframe.
print('There are {} uniques categories.'.format(len(rst_df['Category'].unique())))

#Let's see all of categories name before reorganize some restaurant categories
# print(rst_df['Category'].unique())

There are 72 uniques categories.


In [15]:
#reogrizate some restaurant categories
rst_df.loc[(rst_df.Regional_Style == 'New American Restaurant'),'Regional_Style']='American Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Sushi Restaurant'),'Regional_Style']='Japanese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Japanese Curry Restaurant'),'Regional_Style']='Japanese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Hotpot Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Dim Sum Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Dumpling Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Shanghai Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Taiwanese Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Cantonese Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Ramen Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Tapas Restaurant'),'Regional_Style']='Chinese Restaurant'
rst_df.loc[(rst_df.Regional_Style == 'Szechuan Restaurant'),'Regional_Style']='Chinese Restaurant'
print(rst_df.shape)
rst_df.head(6)

(888, 6)


Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category,Regional_Style
0,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant,Seafood Restaurant
1,Marble Hill,Boston Market,40.87743,-73.905412,American Restaurant,American Restaurant
2,Chinatown,Kiki's,40.714476,-73.992036,Greek Restaurant,Greek Restaurant
3,Chinatown,Spicy Village,40.71701,-73.99353,Chinese Restaurant,Chinese Restaurant
4,Chinatown,Wah Fung Number 1 Fast Food 華豐快飯店,40.717278,-73.994177,Chinese Restaurant,Chinese Restaurant
5,Chinatown,The Fat Radish,40.715323,-73.99195,English Restaurant,English Restaurant


In [16]:
# Let's see how many categories after recognization
print('There are {} uniques categories.'.format(len(rst_df['Regional_Style'].unique())))
#print(rst_df['Regional_Style'].unique())

There are 61 uniques categories.


### Step 3, Sort the neighborhoods and the most common restaurant avenues

##### Step 3.1  Count and sort the total restaurant venues by descending order

In [17]:
nbh_group = rst_df.groupby(['Neighborhood']).count()
nbh_group.reset_index(inplace=True)
nbh_group.sort_values(by=['Venue'], inplace=True, ascending=False)
nbh_group = nbh_group[['Neighborhood','Venue']]
nbh_group.columns = ['Neighborhood','Venues Sum']
print(nbh_group.shape)
nbh_group.head(6)

(39, 2)


Unnamed: 0,Neighborhood,Venues Sum
12,Greenwich Village,44
4,Chinatown,37
33,Turtle Bay,37
8,East Village,36
35,Upper West Side,35
26,Murray Hill,35


##### Step 3.2  Count and sort the selected restaurant venues by descending order

In [18]:
# prfr_opts stands for preferred restaurant options
# prfr_rsts preferred restaurant restaurants 
prfr_opts = ["Chinese Restaurant","Seafood Restaurant","Japanese Restaurant"]
#prfr_rsts = rst_df.loc[rst_df['Neighborhood'].isin(sltd_nbhs)]
prfr_rsts = rst_df.loc[rst_df['Regional_Style'].isin(prfr_opts)].reset_index(drop='True')
print(prfr_rsts.shape)
prfr_rsts

(201, 6)


Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category,Regional_Style
0,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant,Seafood Restaurant
1,Chinatown,Spicy Village,40.717010,-73.993530,Chinese Restaurant,Chinese Restaurant
2,Chinatown,Wah Fung Number 1 Fast Food 華豐快飯店,40.717278,-73.994177,Chinese Restaurant,Chinese Restaurant
3,Chinatown,Da Yu Hot Pot 大渝火锅,40.716735,-73.995752,Hotpot Restaurant,Chinese Restaurant
4,Chinatown,Xi'an Famous Foods,40.715232,-73.997263,Chinese Restaurant,Chinese Restaurant
...,...,...,...,...,...,...
196,Tudor City,Hiroshi Japanese Fusion,40.748571,-73.976009,Sushi Restaurant,Japanese Restaurant
197,Flatiron,HALL,40.740260,-73.992324,Japanese Restaurant,Japanese Restaurant
198,Flatiron,Sugarfish,40.738951,-73.988955,Japanese Restaurant,Japanese Restaurant
199,Flatiron,Sushi By Bou,40.740883,-73.993525,Sushi Restaurant,Japanese Restaurant


In [19]:
# htl_df stands for hotel dataframe
# Retrieve all of hotel venues into a new dataframe htl_df
htl_df = mnhtn_venues.loc[mnhtn_venues["Category"]=="Hotel"].reset_index(drop=True)
print(htl_df.shape)
htl_df

(63, 5)


Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,Hotel
1,Upper East Side,The Carlyle,40.774413,-73.963301,Hotel
2,Upper East Side,The Surrey,40.774415,-73.963889,Hotel
3,Upper West Side,The Lucerne Hotel,40.783427,-73.978495,Hotel
4,Lincoln Square,The Phillips Club,40.774473,-73.983349,Hotel
...,...,...,...,...,...
58,Tudor City,Millennium Hilton New York One UN Plaza,40.750399,-73.969050,Hotel
59,Flatiron,The New York EDITION,40.741286,-73.987358,Hotel
60,Hudson Yards,Equinox Hotel - Hudson Yards,40.754768,-74.001986,Hotel
61,Hudson Yards,YOTEL New York,40.759171,-73.995268,Hotel


### Finding the recommended hotels

In [20]:
# Caculate the distance between hotels and preferred restaurants
# Then save restaurants within distance_m into new dataframe
from geopy.distance import distance
distance_m = 400

dtypes = np.dtype([
          ('Hotel', str),
          ('Count', int),
          ('Restaurants', str),
          ('Latitude', float),
          ('Longitude', float)
          ])
data = np.empty(0, dtype=dtypes)
sltd_htls = pd.DataFrame(data)
sltd_htls['Hotel'] = htl_df['Venue']
sltd_htls.set_index('Hotel', inplace=True)


for htl_lat, htl_lng, htl_venue in zip(htl_df['Latitude'], htl_df['Longitude'],htl_df['Venue']):
       
    rst_venue_str = ""
    num = 0
    for rst_lat, rst_lng, rst_venue in zip(prfr_rsts['Latitude'], prfr_rsts['Longitude'],prfr_rsts['Venue']):
        d = distance((htl_lat, htl_lng), (rst_lat, rst_lng)).m
        if (d < distance_m):
            rst_venue_str = rst_venue_str +','+rst_venue
            num = num + 1
    
    sltd_htls.loc[htl_venue] = [num, rst_venue_str, htl_lat, htl_lng]
    
sltd_htls.sort_values('Count', ascending=False, inplace = True)
sltd_htls.drop_duplicates(inplace = True)
sltd_htls.reset_index(inplace = True)

##### Set up the number of prefered restaurants arounding a hotel which should be recommended.

In [21]:
# Senario 1, if the least number is greater than  5
least_num1 = 5
rcmd_htls = sltd_htls[sltd_htls['Count']>least_num1] 
print(rcmd_htls.shape)
rcmd_htls

(13, 5)


Unnamed: 0,Hotel,Count,Restaurants,Latitude,Longitude
0,Hotel 50 Bowery NYC,24.0,",Spicy Village,Wah Fung Number 1 Fast Food 華豐快...",40.715936,-73.996789
1,"The Renwick Hotel, Curio Collection by Hilton",15.0,",Café Zaiya,Kajitsu,Sushi Ryusei,Tempura Matsu...",40.750184,-73.977604
2,citizenM Bowery,14.0,",Spicy Village,Wah Fung Number 1 Fast Food 華豐快...",40.720599,-73.993574
3,Shelburne Hotel & Suites by Affinia,12.0,",Kajitsu,Sushi Ryusei,Tempura Matsui,Momosan R...",40.748419,-73.97794
4,Mercer Hotel,10.0,",Tomoe Sushi,Blue Ribbon Sushi,Lure Fishbar,Bo...",40.724828,-73.998553
5,AKA United Nations,10.0,",KaoruMC,Crave Fishbar,Yama 49,Aburiya Kinnosu...",40.75264,-73.97134
6,"The Langham, New York, Fifth Avenue",9.0,",Koi New York,Café Zaiya,Zuma New York,Café Ch...",40.750144,-73.983532
7,The William,9.0,",Koi New York,Café Zaiya,Kajitsu,Momosan Ramen...",40.750673,-73.980077
8,"The Bernic Hotel, Tapestry Collection by Hilton",9.0,",KaoruMC,Crave Fishbar,Yama 49,Aburiya Kinnosu...",40.754432,-73.972685
9,Millennium Hilton New York One UN Plaza,8.0,",KaoruMC,Yama 49,Aburiya Kinnosuke,Pescatore S...",40.750399,-73.96905


In [22]:
# Senario 2, if we choose the least number is greater than  3
least_num2 = 3
rcmd_htls = sltd_htls[sltd_htls['Count']> least_num2] 
print(rcmd_htls.shape)
rcmd_htls.head(10)

(21, 5)


Unnamed: 0,Hotel,Count,Restaurants,Latitude,Longitude
0,Hotel 50 Bowery NYC,24.0,",Spicy Village,Wah Fung Number 1 Fast Food 華豐快...",40.715936,-73.996789
1,"The Renwick Hotel, Curio Collection by Hilton",15.0,",Café Zaiya,Kajitsu,Sushi Ryusei,Tempura Matsu...",40.750184,-73.977604
2,citizenM Bowery,14.0,",Spicy Village,Wah Fung Number 1 Fast Food 華豐快...",40.720599,-73.993574
3,Shelburne Hotel & Suites by Affinia,12.0,",Kajitsu,Sushi Ryusei,Tempura Matsui,Momosan R...",40.748419,-73.97794
4,Mercer Hotel,10.0,",Tomoe Sushi,Blue Ribbon Sushi,Lure Fishbar,Bo...",40.724828,-73.998553
5,AKA United Nations,10.0,",KaoruMC,Crave Fishbar,Yama 49,Aburiya Kinnosu...",40.75264,-73.97134
6,"The Langham, New York, Fifth Avenue",9.0,",Koi New York,Café Zaiya,Zuma New York,Café Ch...",40.750144,-73.983532
7,The William,9.0,",Koi New York,Café Zaiya,Kajitsu,Momosan Ramen...",40.750673,-73.980077
8,"The Bernic Hotel, Tapestry Collection by Hilton",9.0,",KaoruMC,Crave Fishbar,Yama 49,Aburiya Kinnosu...",40.754432,-73.972685
9,Millennium Hilton New York One UN Plaza,8.0,",KaoruMC,Yama 49,Aburiya Kinnosuke,Pescatore S...",40.750399,-73.96905


In [23]:
# create a map of NYC centered based on Manhantten's latitude and longitude.  
manh_map = folium.Map(location=[Manh_loc.latitude,Manh_loc.longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(Brgh_df['Latitude'], Brgh_df['Longitude'], Brgh_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#66a3ff',
        fill=True,
        fill_color='#0066ff',
        fill_opacity=0.7,
        parse_html=False).add_to(manh_map)  

# add selected restaurants as orange circle markers to map
for lat, lng, venue in zip(prfr_rsts['Latitude'], prfr_rsts['Longitude'],prfr_rsts['Venue']):
    
    label = folium.Popup('Restaurant :'+venue, parse_html=True)
    folium.CircleMarker(
        location =[lat, lng],
        radius=7,
        popup=label,
        color='#ff6600',
        fill=True,
        fill_color='#ffd1b3',
        fill_opacity=0.7,
        parse_html=False).add_to(manh_map) 

# add selected hotels as green circle markers to map
for cnt, lat, lng, venue in zip(sltd_htls['Count'], sltd_htls['Latitude'], sltd_htls['Longitude'],sltd_htls['Hotel']):
    
    if(cnt >= least_num2):
        icon = folium.Icon(color='green',icon='bed',prefix ='fa')
    else:
        icon = folium.Icon(color='lightgray',icon='bed',prefix='fa')                         
    
    folium.Marker([lat, lng], popup= "@" + venue,  icon= icon).add_to(manh_map)

manh_map
 