In [1]:
import pandas as pd
import numpy as np
import joblib
import enchant
import spacy
import os

In [2]:
data_path = "./data"

In [3]:
hotel_feats_path = os.path.join(data_path, "hotel_features.csv")
hotel_feats_df = pd.read_csv(hotel_feats_path)
hotel_feats_df.head()

Unnamed: 0,Address,Average_Score,Hotel_Name,Hotel_Location,Hotel_Service,Hotel_Room,Hotel_F&A,Hotel_Meal
0,163 Marsh Wall Docklands Tower Hamlets London ...,7.1,Britannia International Hotel Canary Wharf,-0.399103,-0.336323,-0.103139,0.130045,0.03139
1,372 Strand Westminster Borough London WC2R 0JJ UK,8.1,Strand Palace Hotel,-0.268657,-0.109453,-0.124378,-0.174129,-0.323383
2,Westminster Bridge Road Lambeth London SE1 7UT UK,8.7,Park Plaza Westminster Bridge London,-0.318538,0.015666,-0.263708,0.174935,0.227154
3,Scarsdale Place Kensington Kensington and Chel...,8.1,Copthorne Tara Hotel London Kensington,0.264925,-0.298507,-0.014925,0.130597,-0.291045
4,7 Pepys Street City of London London EC3N 4AF UK,8.7,DoubleTree by Hilton Hotel London Tower of London,0.336245,0.122271,-0.318777,-0.082969,-0.139738


In [4]:
spacy.prefer_gpu()
nlp = spacy.load("en_core_web_sm")
vocab_path = os.path.join(data_path, "aspect_nouns.csv")
vocab_df = pd.read_csv(vocab_path)
aspect_nouns = dict(vocab_df.to_numpy())
aspect_nouns
en_dict = enchant.Dict("en_us")
model_path = "./models/xgb_suggestion.joblib"
model = joblib.load(model_path)

In [5]:
def extractNoun(sent):
  doc = nlp(sent)
  nouns = []
  for token in doc:
    if "NN" in token.tag_ and len(token.lemma_)>2 and en_dict.check(token.lemma_):
      nouns.append(token.lemma_.lower())
  return " ".join(nouns)

def review_feats_extract(review, aspect_nouns):
  nola = len(np.unique(list(aspect_nouns.values())))
  feat_counts = np.zeros(nola)
  for noun in review.split(" "):
    if noun in aspect_nouns:
      feat_counts[aspect_nouns[noun]-1] += 1

  denom = np.sum(feat_counts)
  denom = denom if denom != 0 else 1
  return list(feat_counts / denom)

In [22]:
tags = {
  "trip_type": ["business trip", "leisure trip"],
  "room_type": ["standard", "classic", "club", "junior", "superior", 
               "deluxe", "suite", "luxury"],
  "travel_type": ["solo", "couple", "group", "family"],
  "stay_time": [1],
  "room_size": ["single", "double", "twin", "large", "king", "queen"]
}

In [36]:
def suggest(description, user_tags, top_n= 10, hotel_feats_df= hotel_feats_df, 
            aspect_nouns= aspect_nouns, model= model):
  nouns = extractNoun(description)
  des_feats = review_feats_extract(nouns, aspect_nouns)
  user_feats = np.concatenate([des_feats, user_tags])
  customer_columns = ["Customer_Location", "Customer_Service", "Customer_Room", "Customer_F&A", "Customer_Meal", 
                      "trip_type", "room_type", "travel_type", "stay_time", "room_size"]
  
  suggest_df = pd.concat([hotel_feats_df, 
                        pd.DataFrame([user_feats for n in range(len(hotel_feats_df))], columns= customer_columns)], axis= 1)
  x_predict = suggest_df.drop(columns=["Address", "Hotel_Name"]).to_numpy()
  suggest_df["predict"] = model.predict(x_predict)
  suggest_df = suggest_df.sort_values(by="predict", ascending=False)
  print(suggest_df.iloc[:top_n, [-1, 2, 1]].to_string(index=False))

In [38]:
userDes = "great food and good bar"
userTags = [1, 2, 2, 5, 2]
suggest(userDes, userTags)

 predict                           Hotel_Name  Average_Score
9.183137     Camperio House Suites Apartments            8.9
9.172785                 Le Relais Montmartre            9.3
9.172544                        Phileas Hotel            8.7
9.156624                         Hotel Silver            8.4
9.154266       Holiday Inn London Whitechapel            8.5
9.153229                  Kingsway Hall Hotel            8.5
9.149500                  Blakemore Hyde Park            8.5
9.149500 Park Inn by Radisson Uno City Vienna            8.6
9.148552      Tryp Barcelona Condal Mar Hotel            8.1
9.147636                     Room Mate Giulia            9.3


In [39]:
userDes = str(input("Description: "))
userTags = []
print("Please input following tag:")
for tag, item in tags.items():
  print(f"{tag} (default: {item[0]})")
  if tag == "stay_time":
    userStay = int(input("You stay in:"))
    if userStay>1:
      userTags.append(userStay)
    else:
      userTags.append(1)
    continue
  print(item)
  userTag = str(input("Your tag:"))
  if userTag in item:
    idx = item.index(userTag)
    idx = idx+1 if tag != "trip_type" else idx
    userTags.append(idx)
  else:
    userTags.append(0)
    
print()
suggest(userDes, userTags)

Please input following tag:
trip_type (default: business trip)
['business trip', 'leisure trip']
room_type (default: standard)
['standard', 'classic', 'club', 'junior', 'superior', 'deluxe', 'suite', 'luxury']
travel_type (default: solo)
['solo', 'couple', 'group', 'family']
stay_time (default: 1)
room_size (default: single)
['single', 'double', 'twin', 'large', 'king', 'queen']
 predict                         Hotel_Name  Average_Score
9.349676               Le Relais Montmartre            9.3
9.325933          City Hotel Deutschmeister            8.0
9.325933        Hotel Mercure Milano Centro            8.4
9.323000                    Vincci Maritimo            8.3
9.318494      Grand Royale London Hyde Park            7.7
9.313035        Park Grand London Hyde Park            7.5
9.313035        Hotel Tour d Auvergne Opera            8.3
9.312585                   Acevi Villarroel            7.8
9.310282 Holiday Inn Amsterdam Arena Towers            8.4
9.309271                   U