## Data Loading

In [1]:
# Importing needed libraries
import numpy as np
import pandas as pd
from ast import literal_eval
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer

In [2]:
# Reading in the data
df = pd.read_csv('data/Hotel_review_feature_engineering.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Hotel_Address,Additional_Number_of_Scoring,Review_Date,Average_Score,Hotel_Name,Reviewer_Nationality,Negative_Review,Review_Total_Negative_Word_Counts,Total_Number_of_Reviews,...,wi_fi,air_conditioning,breakfast,booking_com,room_problem,location,staff,bed_and_room,month,year
0,0,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,8/3/2017,7.7,Hotel Arena,Russia,I am so angry that i made this post available...,397,1403,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,8,2017
1,1,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,8/3/2017,7.7,Hotel Arena,Ireland,No Negative,0,1403,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,8,2017
2,2,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/31/2017,7.7,Hotel Arena,Australia,Rooms are nice but for elderly a bit difficul...,42,1403,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,7,2017
3,3,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/31/2017,7.7,Hotel Arena,UK,My room was dirty and I was afraid to walk ba...,210,1403,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,7,2017
4,4,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/24/2017,7.7,Hotel Arena,New Zealand,You When I booked with your company on line y...,140,1403,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7,2017


In [3]:
# Dropping unneeded columns
df.drop(['Unnamed: 0', 'Additional_Number_of_Scoring',
       'Review_Date','Reviewer_Nationality',
       'Negative_Review', 'Review_Total_Negative_Word_Counts',
       'Total_Number_of_Reviews', 'Positive_Review',
       'Review_Total_Positive_Word_Counts',
       'Total_Number_of_Reviews_Reviewer_Has_Given', 'Reviewer_Score',
       'days_since_review', 'lat', 'lng', 'City', 'tourist',
       'Trip_type', 'Travelling_Status', 'stay_duration', 'room_small',
       'wi_fi', 'air_conditioning', 'breakfast', 'booking_com', 'room_problem',
       'location', 'staff', 'bed_and_room', 'month', 'year'],1,inplace=True)

In [4]:
df.head()

Unnamed: 0,Hotel_Address,Average_Score,Hotel_Name,Tags,Country
0,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,"[' Leisure trip ', ' Couple ', ' Duplex Double...",Netherlands
1,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,"[' Leisure trip ', ' Couple ', ' Duplex Double...",Netherlands
2,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,"[' Leisure trip ', ' Family with young childre...",Netherlands
3,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,"[' Leisure trip ', ' Solo traveler ', ' Duplex...",Netherlands
4,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,7.7,Hotel Arena,"[' Leisure trip ', ' Couple ', ' Suite ', ' St...",Netherlands


## Defining the recommender and constructing the model

In [5]:
class Tags_Country_recommender():
    def __init__(self,data=df):
        self.data = data
        # Seperating the tags components to make it clearer
        self.data['Tags'] = self.data['Tags'].apply(lambda x:"".join(literal_eval(x)))
        # Making these columns lowercase
        self.data['Country']=self.data['Country'].str.lower()
        self.data['Tags']=self.data['Tags'].str.lower()
      
    def recommend(self,location,description):   
        # Dividing the texts into small tokens (sentences into words)
        description = description.lower()
        description_tokens=word_tokenize(description)  
        sw = stopwords.words('english')  # List of predefined english stopwords to be used for computing
        lemm = WordNetLemmatizer() # This groups similar words so that it can be analyzed as a single word
        
        # We now define the functions below connecting these imported packages
        filtered_sen = {w for w in description_tokens if not w in sw}
        f_set=set()
        for fs in filtered_sen:
            f_set.add(lemm.lemmatize(fs))
        
        
        # Defining a new variable that takes in the location inputted and bring out the features defined below
        country_feat = self.data[self.data['Country']==location.lower()]
        country_feat = country_feat.set_index(np.arange(country_feat.shape[0]))
        cos=[]
        for i in range(country_feat.shape[0]):
            country_tokens=word_tokenize(country_feat['Tags'][i])
            filtered_set={w for w in country_tokens if not w in sw}
            t_set=set()
            for te in filtered_set:
                t_set.add(lemm.lemmatize(te))
            rvector = t_set.intersection(f_set)
            cos.append(len(rvector))
        country_feat['similarity']=cos
        country_feat=country_feat.sort_values(by='similarity',ascending=False)
        country_feat.drop_duplicates(subset='Hotel_Name',keep='first',inplace=True)
        country_feat.sort_values('Average_Score',ascending=False,inplace=True)
        country_feat.reset_index(inplace=True)
        # Printing top 10 recommendations based on the country and descriptions given
        # Prints out both the hotel name and its location
        for i in range(10):
            print (f'We recommend {country_feat.iloc[i,3]} located at {country_feat.iloc[i,1]}')

## Running the model

In [6]:
recommender = Tags_Country_recommender()

In [7]:
recommender.recommend('Netherlands','I am going on a business trip, I need a standard room and i am staying for two nights ')

We recommend Waldorf Astoria Amsterdam located at Herengracht 542 556 Amsterdam City Center 1017 CG Amsterdam Netherlands
We recommend The Toren located at Keizersgracht 164 Amsterdam City Center 1015 CZ Amsterdam Netherlands
We recommend Pillows Anna van den Vondel Amsterdam located at Anna van den Vondelstraat 6 Oud West 1054 GZ Amsterdam Netherlands
We recommend Luxury Suites Amsterdam located at Oudeschans 75 Amsterdam City Center 1011 KW Amsterdam Netherlands
We recommend The Hoxton Amsterdam located at Herengracht 255 Amsterdam City Center 1016 BJ Amsterdam Netherlands
We recommend Ambassade Hotel located at Herengracht 341 Amsterdam City Center 1016 AZ Amsterdam Netherlands
We recommend Canal House located at Keizersgracht 148 Amsterdam City Center 1015 CX Amsterdam Netherlands
We recommend Andaz Amsterdam Prinsengracht A Hyatt Hotel located at Prinsengracht 587 Amsterdam City Center 1067 HT Amsterdam Netherlands
We recommend Banks Mansion All Inclusive Hotel located at Herengra

In [8]:
# Saving the model as a joblib file
from joblib import load,dump

dump(recommender,'model/recommender_based_tags_and_countries.joblib')

['model/recommender_based_tags_and_countries.joblib']

In [9]:
# Loading the saved model for use
model = load('model/recommender_based_tags_and_countries.joblib')

In [10]:
model.recommend('UK','I want to go on a business trip with my employees')

We recommend 41 located at 41 Buckingham Palace Road Westminster Borough London SW1W 0PS UK
We recommend Haymarket Hotel located at 1 Suffolk Place Westminster Borough London SW1Y 4HX UK
We recommend Charlotte Street Hotel located at 15 17 Charlotte Street Hotel Westminster Borough London W1T 1RJ UK
We recommend Taj 51 Buckingham Gate Suites and Residences located at Buckingham Gate Westminster Borough London SW1E 6AF UK
We recommend The Soho Hotel located at 4 Richmond Mews Westminster Borough London W1D 3DH UK
We recommend Milestone Hotel Kensington located at 1 Kensington Court Kensington and Chelsea London W8 5DL UK
We recommend Ham Yard Hotel located at One Ham Yard Westminster Borough London W1D 7DT UK
We recommend The Lanesborough located at Hyde Park Corner Westminster Borough London SW1X 7TA UK
We recommend Lansbury Heritage Hotel located at 117 Poplar High Street Tower Hamlets London E14 0AE UK
We recommend The Goring located at 15 Beeston Place Westminster Borough London SW1