In [3]:
from Contexts import *
import json
from Intents import *
import random
import os
import re
import pandas as pd

In [4]:
# completed functions
def check_actions(current_intent, attributes, context):
    '''This function performs the action for the intent
    as mentioned in the intent config file'''
    '''Performs actions pertaining to current intent'''

    context = IntentComplete()
    
    if current_intent.action == 'BookRestaurant':
        #print(current_intent.action)
        return BookRestaurant(attributes), context
    if current_intent.action == 'BookHotel':
        return BookHotel(attributes), context

    return 'action: ' + current_intent.action, context

def check_required_params(current_intent, attributes, context):
    '''Collects attributes pertaining to the current intent'''
    #print("params of current indent:")
    for para in current_intent.params:
        #print(para.name)
        if para.required:
            if para.name not in attributes:
                if para.name=='Tariff':
                    context = GetTariff()
                return random.choice(para.prompts), context

    return None, context

In [18]:
# here are the actions, you do not need to change these

def BookHotel(attributes):
    #constants
    HOTEL_DB_COL_MAP = {
        "hloc": "LOCATION",
        "starrating": "STAR RATING",
        "Tariff": "TARIFF"
    }

    db = pd.read_csv("./db/db_hotel.csv")
    
    for attr, val in attributes.items():
        if attr not in HOTEL_DB_COL_MAP:
            continue
        col_name = HOTEL_DB_COL_MAP[attr]
        if col_name == "TARIFF":
            for v in val:
                if ">" in v:
                    v = v.replace(">", "").strip()
                    db = db[db[col_name] > int(v)]
                if "<" in v:
                    v = v.replace("<", "").strip()
                    db = db[db[col_name] < int(v)]
                if "<=" in v:
                    v = v.replace("<=", "").strip()
                    db = db[db[col_name] <= int(v)]
                if ">=" in v:
                    v = v.replace(">=", "").strip()
                    db = db[db[col_name] >= int(v)]
                if "-" in v:
                    v = v.split("-")
                    db = db[db[col_name] < int(v[1].strip())]
                    db = db[db[col_name] > int(v[0].strip())]
        else:       
            db = db[db[col_name] == val]

    hotel_names = db["HOTEL NAME"]
    output = ["List of Hotels matching your criterio\n"]

    if len(hotel_names):
        for i, hotel in enumerate(hotel_names):
            output.append("{}. {}".format(i + 1, hotel))
        
        return output
    else:
        return "No Hotels found for the provided parameters"
        
def BookRestaurant(attributes):
    #constants
    RESTAURANT_DB_COL_MAP = {
        "rloc": "LOCATION",
        "cuisine": "CUISINE",
        "cost": "COST"
    }

    db = pd.read_csv("./db/db_restaurant.csv")
    
    for attr, val in attributes.items():
        if attr not in RESTAURANT_DB_COL_MAP:
            continue
        col_name = RESTAURANT_DB_COL_MAP[attr]
        db = db[db[col_name] == val]

    restaurant_names = db["RESTAURANT NAME"]
    output = ["List of Restaurants matching your criterion:\n"]

    if len(restaurant_names):
        for i, restaurant in enumerate(restaurant_names):
            output.append("{}. {}\n".format(i + 1, restaurant))

        return output
    else:
        return "No Restaurant found for the provided parameters"

In [19]:
#!pip install pyspellchecker 
from spellchecker import SpellChecker

def input_processor(user_input, context, attributes, intent, stopwords, lv_dst=2):
    
    tokens = user_input.split()
    #remove stop words (they are not necessary for classifying the intent)
    tokens = [t for t in tokens if not t in stopwords]
    
    #spell checking
    spell = SpellChecker(distance = lv_dst)
    misspelled = spell.unknown(tokens)
    
    for w in misspelled:
        i = tokens.index(w)
        tokens[i] = spell.correction(w)
        print("[log] misspelled:", w, "correct:", tokens[i])
        
    user_input = " ".join(tokens)
    
    attributes, cleaned_input = getattributes(user_input, context, attributes)
    
    #print("cleaned input:", cleaned_input)
    
    return attributes, cleaned_input

def getattributes(uinput,context,attributes):
    '''This function marks the entities in user input, and updates
    the attributes dictionary'''
    
    #Can use context to to context specific attribute fetching
    #TODO replace
    
    if context.name.startswith('IntentComplete'):
        return attributes, uinput
    else:
        
        files = os.listdir('./entities/')
        entities = {}
        
        for f in files:  
            lines = open('./entities/'+f).readlines()
            for i, line in enumerate(lines):
                lines[i] = line[:-1]
            entities[f[:-4]] = '|'.join(lines)
            
        #then you need to match them with the input and extract the attributes
        #regular expressions will save you here!

        #uinput here should be modified in order to contain the relevant placeholders
        #e.g. i would prefer cheap --> should become i would prefer $cost
        #at the same time you save in attributes the relevant info
        #e.g. {'cost:cheap'}
        #in the end you return the attributes and the modified input
        #this is specific for the tariff context and processes it separately.
        #you need to make the process_tariff method
        for entity in entities:
            for i in entities[entity].split('|'):
                if i.lower() in uinput.lower():
                    attributes[entity] = i
                    #print("attribute found",attributes[entity])
                
        for entity in entities:
            uinput = re.sub(entities[entity],r'$'+entity,uinput,flags=re.IGNORECASE)
        
        
        if context.name=='GetTariff' and context.active:
            #print('Tariff')
            match = process_tariff(uinput)
            uinput = re.sub(r'[<|>|<=|>=]?\s*[0-9]+\s*[-]?\s*[0-9]+', '$tariff', uinput)
            attributes['Tariff'] = match
            context.active = False
            
        #print("adapted user input:",uinput)
        #print("[log] Attributes (slots) filled:",attributes) #check which attributes/slots have been filled
        return attributes, uinput
    
def process_tariff(uinput):
    
    #people can write: ">200 and <300"
    #or
    #"less than 300"
    #or
    #more than 200
    #for more input i either need to define more cases or train a classifier on a dataset
    
    vals = uinput.split('and')
    for v in vals:
        v.replace("less than", "<")
        v.replace("more than", ">")

    return vals


In [22]:
def loadIntent(path, intent):
    with open(path) as fil:
        dat = json.load(fil)
        intent = dat[intent]
        #print("Intent:",intent['intentname'])
    return Intent(intent['intentname'],intent['Parameters'], intent['actions'])

def intentIdentifier(clean_input, context,current_intent, clf, tf_idf, count_vect, treshold = 0.70):
  
    #perhaps lower-case the input (or do any other slight processing here)
    clean_input = [clean_input.lower()]
    
    x_count = count_vect.transform(clean_input)
    x_tf = tf_idf.transform(x_count)
    
    #only switch intent when certainty is higher than treshold value
    probs = clf.predict_proba(x_tf)[0]
    
    if np.max(probs) > treshold and current_intent is None:
        print("[log] hotel booking probability:",probs[0], "restaurant booking prob", probs[1])
        predicted = np.argmax(probs)
        if predicted==1: 
            return loadIntent('params/newparams.cfg', 'RestaurantBooking')
        else:
            return loadIntent('params/newparams.cfg','HotelBooking')
    else:
        if current_intent is None:
            print("[log] classifier certainty under treshold", probs[0], probs[1])
        return current_intent
    
    
    

In [14]:
from sklearn.naive_bayes import MultinomialNB, ComplementNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

#creates and trains a binary tf-idf classifier
def create_classifier(training_data,stopwords):
    #validation set size
    test_size = int((len(training_data[0])+len(training_data[1]))/4)
    #print("test size", test_size)
    
    #merge the corpi for classification
    y_train = np.concatenate((np.zeros(len(training_data[0])-int(test_size/2))
                      ,np.ones(len(training_data[1])-int(test_size/2))
                      ))
    X_train = np.concatenate((training_data[0][:-int(test_size/2)],
                              training_data[1][:-int(test_size/2)]))
                              
    y_test = np.concatenate((np.zeros((int(test_size/2)))
                            ,np.ones((int(test_size/2)))
                      ))
    X_test = np.concatenate((training_data[0][-int(test_size/2):],
                              training_data[1][-int(test_size/2):]
                            ))
    
    #print("training shape x:",X_train.shape, "y:", y_train.shape)
    #print("test shape x:", X_test.shape, "y:", y_test.shape)
    
    #tf-idf
    vect = CountVectorizer(stop_words = stopwords)
    X_train_counts = vect.fit_transform(X_train)
    tf_transformer = TfidfTransformer(use_idf=False).fit(X_train_counts)
    X_tf = tf_transformer.transform(X_train_counts)
    
    #classifier
    clf = ComplementNB().fit(X_tf,y_train)
    X_test_count = vect.transform(X_test)
    X_test_tf = tf_transformer.transform(X_test_count)
    predicted = clf.predict(X_test_tf)
    print("[log] complement naive bayes accuracy on validation set:",
          np.mean(predicted==y_test), "perfect!")
    
    #fit classifier on full data set
    #TODO
    
    return clf, tf_transformer, vect

In [26]:
import nltk
from nltk.corpus import stopwords
import numpy as np

class Session:
    
    
    def __init__(self, attributes=None, active_contexts=[FirstGreeting(), IntentComplete()]):
        
        '''Initialise a default session'''

        #Contexts are flags which control dialogue flow, see Contexts.py
        self.active_contexts = active_contexts
        self.context = FirstGreeting()

        #Intent tracks the current state of dialogue
        #self.current_intent = First_Greeting()
        self.current_intent = None
    
        #attributes hold the information collected over the conversation
        self.attributes = {}
        
        #list of stop words to remove
        additional_stopwords = {"want"} #because I figured that "want" only appears in one dataset of intents, so is classified wrongly
        self.stopwords = set(stopwords.words('english')).union(additional_stopwords)
        
        #training data
        hotel_data = []
        with open('intents/HotelBooking.dat') as hotel_file:
            for line in hotel_file:
                line = hotel_file.readline().rstrip("\n")
                hotel_data.append(line)
        
        rest_data =[]
        with open('intents/RestaurantBooking.dat') as rest_file:
            for line in rest_file:
                line = rest_file.readline().rstrip("\n")
                rest_data.append(line)  
            
        training_data = [hotel_data, rest_data]
        
        #intent classifier
        self.intent_clf, self.tf_idf, self.count_vect = create_classifier(training_data,self.stopwords)
        
    def update_contexts(self):
        '''Not used yet, but is intended to maintain active contexts'''
        for context in self.active_contexts:
            if context.active:
                context.decrease_lifespan()

    def reply(self, user_input):
        '''Generate response to user input'''
        self.attributes, clean_input = input_processor(user_input, self.context, self.attributes, self.current_intent, self.stopwords)

        self.current_intent = intentIdentifier(clean_input, self.context, self.current_intent, self.intent_clf, self.tf_idf, self.count_vect)
        
        #ask again if intent was not confidently identified
        if self.current_intent is None:
            return "Sorry, I didn't understand you. Do you want to book a restaurant or a hotel?"
        
        prompt, self.context = check_required_params(self.current_intent, self.attributes, self.context)
        
        
        #prompt being None means all parameters satisfied, perform the intent action
        if prompt is None:
            if self.context.name!='IntentComplete':
               # print("context with none prompt", self.context.name)
                prompt, self.context = check_actions(self.current_intent, self.attributes, self.context)

        #Resets the state after the Intent is complete
        if self.context.name=='IntentComplete':
            self.attributes = {}
            self.context = FirstGreeting()
            self.current_intent = None
        #print("current context", self.context.name)
        return prompt

In [27]:
session = Session()

print("RESTAURANT/HOTEL BOOKING PORTAL")
print("Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?")


print ('[Team 53 BOT]: Hi! How may I assist you?')

inp = "I want to book a table"
print("[User]:",inp)
print ('[Team 53 BOT]:', session.reply(inp))

inp = "Somewhere in the eastern party of the city"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

inp = "Something cheap"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

inp = "Indian food of course"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

[log] complement naive bayes accuracy on validation set: 1.0 perfect!
RESTAURANT/HOTEL BOOKING PORTAL
Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?
[Team 53 BOT]: Hi! How may I assist you?
[User]: I want to book a table
[log] hotel booking probability: 0.28245301009330465 restaurant booking prob 0.7175469899066947
[Team 53 BOT]: Can you provide your preference on eating place[EAST, WEST, NORTH, SOUTH]
[User]: Somewhere in the eastern party of the city
[Team 53 BOT]: What is the cost you are comfortable with?
[User]: Something cheap
[Team 53 BOT]: Which cuisine would you prefer?
[User]: Indian food of course
[Team 53 BOT]: No Restaurant found for the provided parameters


In [30]:
session = Session()

print("RESTAURANT/HOTEL BOOKING PORTAL")
print("Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?")


print ('[Team 53 BOT]: Hi! How may I assist you?')

inp = "I want to go on holiday in Goa"
print("[User]:",inp)
print ('[Team 53 BOT]:', session.reply(inp))

inp = "Should have a 5 stars"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

inp = "Less than 10000 Rupees"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))



[log] complement naive bayes accuracy on validation set: 1.0 perfect!
RESTAURANT/HOTEL BOOKING PORTAL
Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?
[Team 53 BOT]: Hi! How may I assist you?
[User]: I want to go on holiday in Goa
[log] hotel booking probability: 0.9984431966653748 restaurant booking prob 0.00155680333462532
[Team 53 BOT]: Please mention the hotel star rating.
[User]: Should have a 5 stars
[Team 53 BOT]: What is your preferred tariff in EUR?
[User]: Less than 10000 Rupees
[Team 53 BOT]: ['List of Hotels matching your criterio\n', '1. The Panjim Inn', '2. Vivanta by Taj - Panaji', '3. Vivanta by Taj - Holiday', '4. Vivanta By Taj- Fort Aguada', '5. Fortune Select Regina', '6. Bogmallo Beach Resort', '7. DEVAAYA AYURVEDA SPA RESORT', '8. Holiday Inn Resort', '9. The Crown Goa', '10. Hotel Neo Majestic', '11. The Kenilworth Beach Resort & Spa', '12. Hotel La Calypso Get Aways.', '13. The "O"', '14. Sun-n-Sand Hotel']


In [34]:
session = Session()

print("RESTAURANT/HOTEL BOOKING PORTAL")
print("Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?")


print ('[Team 53 BOT]: Hi! How may I assist you?')

#The word 'eat' does not occur in the training data, so the classifier does not recognice the intent
#a pretrained word vector model could work here (but this gives other issues and is out of the scope of this assignment I suppose)
inp = "I want to eat"
print("[User]:",inp)
print ('[Team 53 BOT]:', session.reply(inp))

#spelling error will be corrected
inp = "I want to eat ecpensive food"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

inp = "in the north"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

inp = "italian cuisine"
print("[User]:",inp)
print('[Team 53 BOT]:', session.reply(inp))

[log] complement naive bayes accuracy on validation set: 1.0 perfect!
RESTAURANT/HOTEL BOOKING PORTAL
Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?
[Team 53 BOT]: Hi! How may I assist you?
[User]: I want to eat
[log] classifier certainty under treshold 0.5 0.5
[Team 53 BOT]: Sorry, I didn't understand you. Do you want to book a restaurant or a hotel?
[User]: I want to eat ecpensive food
[log] misspelled: ecpensive correct: expensive
[log] hotel booking probability: 0.002417945988823799 restaurant booking prob 0.9975820540111769
[Team 53 BOT]: Can you provide your preference on eating place[EAST, WEST, NORTH, SOUTH]
[User]: in the north
[Team 53 BOT]: Which cuisine would you prefer?
[User]: italian cuisine
[Team 53 BOT]: ['List of Restaurants matching your criterion:\n', '1. The Ramgarh Lodge\n', '2. Vivanta by Taj - Whitefield\n', '3. Hotel Clarks Shiraz\n']


In [None]:
session = Session()

print("RESTAURANT/HOTEL BOOKING PORTAL")
print("Welcome to the Hotel/Restaurant Booking Portal. What do you want to do?")


print ('[Team 53 BOT]: Hi! How may I assist you?')

inp=""
while inp!="end":
	
	inp = input('[User]: ')
	print ('[Team 53 BOT]:', session.reply(inp))