In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense,Activation,Dropout
from keras.layers import LSTM
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from keras.callbacks import EarlyStopping
import math
import os
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer


In [3]:
hotel_details=pd.read_csv('hotel/Hotel_details.csv',delimiter=',')
hotel_rooms=pd.read_csv('hotel/Hotel_Room_attributes.csv',delimiter=',')
hotel_cost=pd.read_csv('hotel/hotels_RoomPrice.csv',delimiter=',')

In [4]:
hotel_details.head()

Unnamed: 0,id,hotelid,hotelname,address,city,country,zipcode,propertytype,starrating,latitude,longitude,Source,url,curr
0,46406,1771651,Mediteran Bungalow Galeb,Vukovarska 7,Omis,Croatia,21310.0,Holiday parks,4,43.440124,16.682505,2,https://www.booking.com/hotel/hr/bungalow-luxu...,EUR
1,46407,177167,Hotel Polonia,Plac Teatralny 5,Torun,Poland,,Hotels,3,53.012329,18.6038,5,https://www.agoda.com/en-gb/hotel-polonia/hote...,EUR
2,46408,1771675,Rifugio Sass Bece,"Belvedere del Pordoi,1",Canazei,Italy,38032.0,Hotels,3,46.47792,11.81335,2,http://www.booking.com/hotel/it/rifugio-sass-b...,EUR
3,46409,177168,Madalena Hotel,Mykonos,Mykonos,Greece,84600.0,Hotels,3,37.452316,25.329849,5,https://www.agoda.com/en-gb/madalena-hotel/hot...,EUR
4,46410,1771718,Pension Morenfeld,Mair im Korn Strasse 2,Lagundo,Italy,39022.0,Hotels,3,46.68278,11.131736,2,http://www.booking.com/hotel/it/pension-morenf...,EUR


In [5]:
hotel_rooms.head()

Unnamed: 0,id,hotelcode,roomamenities,roomtype,ratedescription
0,50677497,634876,Air conditioning: ;Alarm clock: ;Carpeting: ;C...,Double Room,"Room size: 15 m²/161 ft², Shower, 1 king bed"
1,50672149,8328096,Air conditioning: ;Closet: ;Fireplace: ;Free W...,Vacation Home,"Shower, Kitchenette, 2 bedrooms, 1 double bed ..."
2,50643430,8323442,Air conditioning: ;Closet: ;Dishwasher: ;Firep...,Vacation Home,"Shower, Kitchenette, 2 bedrooms, 1 double bed ..."
3,50650317,7975,Air conditioning: ;Clothes rack: ;Coffee/tea m...,Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds"
4,50650318,7975,Air conditioning: ;Clothes rack: ;Coffee/tea m...,Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds"


# Data Cleaning and transformations

In [6]:
del hotel_details['id']
del hotel_rooms['id']
del hotel_details['zipcode']

In [7]:
hotel_details=hotel_details.dropna()
hotel_rooms=hotel_rooms.dropna()

In [8]:
hotel_details.drop_duplicates(subset='hotelid',keep=False,inplace=True)

In [9]:
hotel=pd.merge(hotel_rooms,hotel_details,left_on='hotelcode',right_on='hotelid',how='inner')

In [10]:
hotel.columns

Index(['hotelcode', 'roomamenities', 'roomtype', 'ratedescription', 'hotelid',
       'hotelname', 'address', 'city', 'country', 'propertytype', 'starrating',
       'latitude', 'longitude', 'Source', 'url', 'curr'],
      dtype='object')

In [11]:
del hotel['hotelid']
del hotel['url']
del hotel['curr']
del hotel['Source']

In [12]:
hotel.columns

Index(['hotelcode', 'roomamenities', 'roomtype', 'ratedescription',
       'hotelname', 'address', 'city', 'country', 'propertytype', 'starrating',
       'latitude', 'longitude'],
      dtype='object')

# Recommender system based only on City and ratings about the hotel

In [13]:
def citybased(city):
    hotel['city']=hotel['city'].str.lower()
    citybase=hotel[hotel['city']==city.lower()]
    citybase=citybase.sort_values(by='starrating',ascending=False)
    citybase.drop_duplicates(subset='hotelcode',keep='first',inplace=True)
    if(citybase.empty==0):
        hname=citybase[['hotelname','starrating','address','roomamenities','ratedescription']]
        return hname.head()
    else:
        print('No Hotels Available')

In [14]:
print('Top 5 hotels')
citybased('London')

Top 5 hotels


Unnamed: 0,hotelname,starrating,address,roomamenities,ratedescription
69848,Park Plaza London Park Royal,4,628 Western Avenue,Air conditioning: ;Alarm clock: ;Bathrobes: ;C...,"Room size: 24 m²/258 ft², Shower, 1 king bed a..."
120285,Novotel London Excel Hotel,4,7 Western Gateway,Air conditioning: ;Carpeting: ;Closet: ;Coffee...,"Room size: 25 m²/269 ft², Non-smoking, Shower ..."
63905,Hard Rock Hotel London,4,Great Cumberland Place,Air conditioning: ;Bathrobes: ;Carpeting: ;Clo...,"Room size: 28 m²/301 ft², Shower and bathtub, ..."
69711,Holiday Inn London Elstree,4,Barnet By Pass,Additional toilet: ;Air conditioning: ;Carpeti...,"Room size: 25 m²/269 ft², City view, Non-smoki..."
64612,DoubleTree by Hilton Hotel London - Westminster,4,30 John Islip Street,Air conditioning: ;Alarm clock: ;Bathrobes: ;C...,"Room size: 21 m²/226 ft², Non-smoking, Shower,..."


In [15]:
room_no=[
     ('king',2),
   ('queen',2), 
    ('triple',3),
    ('master',3),
   ('family',4),
   ('murphy',2),
   ('quad',4),
   ('double-double',4),
   ('mini',2),
   ('studio',1),
    ('junior',2),
   ('apartment',4),
    ('double',2),
   ('twin',2),
   ('double-twin',4),
   ('single',1),
     ('diabled',1),
   ('accessible',1),
    ('suite',2),
    ('one',2)
   ]

In [16]:
def calc():
    guests_no=[]
    for i in range(hotel.shape[0]):
        temp=hotel['roomtype'][i].lower().split()
        flag=0
        for j in range(len(temp)):
            for k in range(len(room_no)):
                if temp[j]==room_no[k][0]:
                    guests_no.append(room_no[k][1])
                    flag=1
                    break
            if flag==1:
                break
        if flag==0:
            guests_no.append(2)
    hotel['guests_no']=guests_no

calc()

In [17]:
def pop_citybased(city,number):
    hotel['city']=hotel['city'].str.lower()
    popbased=hotel[hotel['city']==city.lower()]
    popbased=popbased[popbased['guests_no']==number].sort_values(by='starrating',ascending=False)
    popbased.drop_duplicates(subset='hotelcode',keep='first',inplace=True)
    if popbased.empty==True:
        print('Sorry No Hotels Available\n tune your constraints')
    else:
        return popbased[['hotelname','roomtype','guests_no','starrating','address','roomamenities','ratedescription']].head(10)
    
    

In [18]:
pop_citybased('London',4)

Unnamed: 0,hotelname,roomtype,guests_no,starrating,address,roomamenities,ratedescription
968,Holiday Inn London - Heathrow T5,Family Room,4,4,Old Bath Road,Air conditioning: ;Alarm clock: ;Blackout curt...,"Room size: 25 m²/269 ft², Non-smoking, Separat..."
129097,Best Western Plus London Croydon Aparthotel,Quad,4,4,Dunheved Road South,Air conditioning: ;Alarm clock: ;Closet: ;Clot...,"Room size: 20 m²/215 ft², Street view, Non-smo..."
53437,Wimbledon Boutique Hotel,Family Room,4,4,78 Worple Road,Air conditioning: ;Carpeting: ;Closet: ;Coffee...,"Room size: 22 m²/237 ft², Street view, Non-smo..."
114332,Greenland Villa,Quad Room,4,4,9 Charlton Road,Air conditioning: ;Carpeting: ;Closet: ;Clothe...,"Room size: 15 m²/161 ft², Non-smoking, Shower,..."
115037,DoubleTree by Hilton Hotel London - Docklands ...,Two Bedroom Family Room,4,4,265 Rotherhithe Street,Air conditioning: ;Closet: ;Coffee/tea maker: ...,"Room size: 60 m²/646 ft², Lake view, Non-smoki..."
48488,The Park City Grand Plaza Kensington Hotel,Family Room - Room Only,4,4,18-30 Lexham Gardens,Air conditioning: ;Free Wi-Fi in all rooms!: ;...,"Free Wi-Fi, Extra low price! (non-refundable)"
112773,The Cleveland Hotel,Family Studio,4,4,39-40 Cleveland Square,Air conditioning: ;Free Wi-Fi in all rooms!: ;...,"Free Wi-Fi, Extra low price! (non-refundable)"
108956,Mercure London Hyde Park Hotel,Family Room,4,4,8-14 Talbot Square,Air conditioning: ;Desk: ;Free Wi-Fi in all ro...,"Room size: 31 m²/334 ft², Non-smoking, 1 singl..."
110049,Sheraton Skyline Hotel London Heathrow,"Family Room, Guest room, 2 Double",4,4,"Heathrow Airport, Bath Road",Air conditioning: ;Alarm clock: ;Bathrobes: ;C...,"Non-smoking, Shower and bathtub, 2 double beds"
54264,Pestana Chelsea Bridge Hotel And Spa,Deluxe Family Room,4,4,354 Queenstown Road,Air conditioning: ;Alarm clock: ;Bathrobes: ;B...,"Room size: 30 m²/323 ft², Non-smoking, Separat..."


In [19]:
hotel['roomamenities']=hotel['roomamenities'].str.replace(': ;',',')

In [22]:
hotel

Unnamed: 0,hotelcode,roomamenities,roomtype,ratedescription,hotelname,address,city,country,propertytype,starrating,latitude,longitude,guests_no
0,634876,"air conditioning,alarm clock,carpeting,closet,...",Double Room,"Room size: 15 m²/161 ft², Shower, 1 king bed",The Old Cider House,25 Castle Street,nether stowey,United Kingdom,Hotels,4,51.150921,-3.158470,2
1,7975,"air conditioning,clothes rack,coffee/tea maker...",Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds",Apollo Hotel London,64-66 Queensborough Terrace,london,United Kingdom,Hotels,2,51.511234,-0.185117,3
2,7975,"air conditioning,clothes rack,coffee/tea maker...",Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds",Apollo Hotel London,64-66 Queensborough Terrace,london,United Kingdom,Hotels,2,51.511234,-0.185117,3
3,7975,"air conditioning,clothes rack,coffee/tea maker...",Standard Triple Room,"Room size: 20 m²/215 ft², Shower, 3 single beds",Apollo Hotel London,64-66 Queensborough Terrace,london,United Kingdom,Hotels,2,51.511234,-0.185117,3
4,7975,"air conditioning,clothes rack,heating,in-room ...",Deluxe Triple Room,"Shower, 1 single bed and 1 double bed",Apollo Hotel London,64-66 Queensborough Terrace,london,United Kingdom,Hotels,2,51.511234,-0.185117,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145420,268416,"air conditioning,alarm clock,carpeting,closet,...",Twin Room,"Room size: 17 m²/183 ft², Non-smoking, Shower,...",Sure Hotel by Best Western Newcastle,358 Westgate Road,newcastle-upon-tyne,United Kingdom,Hotels,3,54.972672,-1.634396,2
145421,268416,"air conditioning,alarm clock,carpeting,closet,...",Twin Room,"Room size: 17 m²/183 ft², Non-smoking, Shower,...",Sure Hotel by Best Western Newcastle,358 Westgate Road,newcastle-upon-tyne,United Kingdom,Hotels,3,54.972672,-1.634396,2
145422,268416,"air conditioning,alarm clock,carpeting,closet,...",Twin Room,"Room size: 17 m²/183 ft², Non-smoking, Shower,...",Sure Hotel by Best Western Newcastle,358 Westgate Road,newcastle-upon-tyne,United Kingdom,Hotels,3,54.972672,-1.634396,2
145423,268416,"air conditioning,free wi-fi in all rooms!,in-r...",Twin Standard,"Pay nothing until November 21, 2019, Free Wi-F...",Sure Hotel by Best Western Newcastle,358 Westgate Road,newcastle-upon-tyne,United Kingdom,Hotels,3,54.972672,-1.634396,2


In [25]:
def requirementbased(city,number,features):
    hotel['city']=hotel['city'].str.lower()
    hotel['roomamenities']=hotel['roomamenities'].str.lower()
    features=features.lower()
    features_tokens=word_tokenize(features)  
    sw = stopwords.words('english')
    lemm = WordNetLemmatizer()
    f1_set = {w for w in features_tokens if not w in sw}
    f_set=set()
    for se in f1_set:
        f_set.add(lemm.lemmatize(se))
    reqbased=hotel[hotel['city']==city.lower()]
    reqbased=reqbased[reqbased['guests_no']==number]
    reqbased=reqbased.set_index(np.arange(reqbased.shape[0]))
    l1 =[];l2 =[];cos=[];
    #print(reqbased['roomamenities'])
    for i in range(reqbased.shape[0]):
        temp_tokens=word_tokenize(reqbased['roomamenities'][i])
        temp1_set={w for w in temp_tokens if not w in sw}
        temp_set=set()
        for se in temp1_set:
            temp_set.add(lemm.lemmatize(se))
        rvector = temp_set.intersection(f_set)
        #print(rvector)
        cos.append(len(rvector))
    reqbased['similarity']=cos
    reqbased=reqbased.sort_values(by='similarity',ascending=False)
    reqbased.drop_duplicates(subset='hotelcode',keep='first',inplace=True)
    return reqbased[['hotelname','roomtype','guests_no','starrating','address','roomamenities','ratedescription','similarity']].head(5)

# Requirement And special needs based Recommender

In [21]:
requirementbased('London',1,'I need a extra toilet and room should be completely air conditioned.I should have a bathrobe.')

Unnamed: 0,hotelname,roomtype,guests_no,starrating,address,roomamenities,ratedescription,similarity
245,The Colonnade London Hotel,Studio,1,4,2 Warrington Crescent,"additional toilet,air conditioning,alarm clock...","Room size: 28 m²/301 ft², Balcony/terrace, Non...",4
256,Park International Hotel,Standard Single Room,1,4,117-129 Cromwell Road,"additional toilet,air conditioning,blackout cu...","Room size: 11 m²/118 ft², Non-smoking, Shower,...",3
1018,Holiday Inn London - Heathrow Ariel,1 Single Bed 1 Per Non-Smoking,1,3,118 Bath Road,"additional toilet,air conditioning,carpeting,c...","Room size: 25 m²/269 ft², Non-smoking, Shower ...",3
485,Saba Hotel,Single En Suite,1,3,101-103 Shepherds Bush Road,"additional toilet,air conditioning,carpeting,c...","Non-smoking, Shower, 1 single bed",3
487,Holiday Inn Express Dunstable,1 Bedroom Mobility Accessible Tub Non-Smoking,1,4,Vinci Site London Road,"additional toilet,air conditioning,carpeting,c...","Room size: 25 m²/269 ft², Non-smoking, Shower,...",3
233,Best Western Palm Hotel,Standard Room With Three Single Beds,1,3,64-76 Hendon Way,"additional toilet,air conditioning,carpeting,c...","Room size: 28 m²/301 ft², Bathtub, 3 single beds",3
460,Park Plaza London Waterloo,Executive Studio Twin,1,4,6 Hercules Road,"air conditioning,alarm clock,bathrobes,carpeti...","Room size: 25 m²/269 ft², City view, Non-smoki...",3
285,The Springfield Hotel,Single Room,1,2,154 Sussex Gardens,"additional toilet,air conditioning,alarm clock...","Street view, Shower, 1 single bed",3
538,The Collective Canary Wharf,Comfy Studio - High Floor,1,4,20 Crossharbour Plaza,"air conditioning,bathrobes,free wi-fi in all r...","Room size: 20 m²/215 ft², 1 double bed",3
1025,Holiday Inn Express Park Royal,2 Single Beds Non-smoking,1,3,Victoria RoadNorth Acton,"air conditioning,alarm clock,carpeting,closet,...","Room size: 25 m²/269 ft², Non-smoking, Shower,...",3


In [24]:
import pickle

with open("hotel/hotel.pkl", 'wb') as file:
    pickle.dump(hotel, file)