# Assignment 7: Dialogue systems and question answering

In order to try the chatbot, run the whole code sequentially.

* Libraries

In [1]:
# to find patterns in text
import re

# to randomize questions
import random

#to handle data structures and math
import numpy as np
import pandas as pd

#to sleep
import time

## Logic implementation

* Implementation of the Frame

They are used to find and store information required to answer questions about a topic. An example is a weather forecast, where a place and a time is required to give an answer. Each frame has a priority score used to decide the order in which frames are asked first, an array of different formulations asking the user for the relevant information and a parser for finding the relevant information given a user input. 

In [2]:
class Frame():
    def __init__(self):
        self.name = ""
        self.information = ""
        self.priority = int()
        self.questions = []
        self.decoder = ""

    def is_empty(self):
        return self.information == ""
    
    def make_question(self):
        return random.choice(self.questions)
    
    def parser(self, input_sentence):
        """
        retrieves the information of the input sentence of a given frame
        """

        input_sentence = input_sentence.lower()

        len_input = len(input_sentence.split())
        if len_input == 1:
            information = input_sentence
        
        else:
            information = re.findall(self.decoder, input_sentence)

            # if nothing is found
            if information ==[]:
                information = ""

            if len(information)==1:
                information = information[0]
        
        return information

    def set_information(self, input_sentence):
        information = self.parser(input_sentence)
        self.information = information
        


* Implementation of scenario

The components of a scenario are a vocabulary, a predefined question to ask the user, and an array of necessary information to make an appropriate response. The vocabulary is a list of keywords which the chatbot matches with the user input in order to guess the scenario. For a weather bot, the vocabulary of one scenario might contain hot, cold, temperature while another scenario’s vocabulary contains sun, sunny.

In [3]:
class Scenario():

    def __init__(self):
        self.vocabulary = []
        self.name = ""
        self.question = ""
        self.needed_information = []



* Implementation of Topic

It contains a list of scenarios, a vocabulary, a database, a filter (required frame), and a list of frames. Topic is also where the main logic functions are implemented. The vocabulary is initiated as a list of generic keywords for a specific topic. Then, the vocabularies of all its scenarios are concatenated to the list, generating one main vocabulary that is used to guess the topic of the user input. For our weather bot, the complete vocabulary contains words such as weather, hot, and sun.

In [4]:
# Abstract class
class Topic():
    
    def __init__(self):

        self.vocabulary = []
        self.scenarios = []
        self.name = str()
        self.frames = []
        self.is_filled = False
        self.temp_scenario = None
        self.data_base = pd.DataFrame()
        self.filter = ""

    def build_vocabulary(self):
        """
        Builds the topic vocabulary based on the scenarios vocabulary
        """
        for scenario in self.scenarios:
            self.vocabulary = self.vocabulary + scenario.vocabulary

    def are_frames_empty(self):
        """
        cheks if all necessary frames are empty and returns True if any of them is
        """
        return np.any([frame.is_empty() for frame in self.frames])

    def get_highest_priority_frame(self):
        frames = [frame for frame in self.frames if frame.is_empty()]
        frames.sort(key = lambda x: x.priority)
        return frames[0]


    def ask_for_frames(self):
        """
        Insists to retrieve information of the frame with the highest priority
        """
        frame = self.get_highest_priority_frame()
        
        #ask question
        input_question = frame.make_question()
        answer = handle_input(input_question)

        #collect answer
        result = frame.parser(answer)

        # Check result
        if result != "":
            frame.set_information(result)

    def complete_frames(self, input_sentence):
        """
        Aims to look for the frames of the topic and store their information
        """

        # Check if with the input sentence we can complete the frames
        for frame in self.frames:
            if frame.is_empty():
                frame.set_information(input_sentence)

        
        # if not, ask for them.
        while self.are_frames_empty():
            self.ask_for_frames()

    def guess_scenario(self, input_sentence):
        """
        Returns the most appropiate scenario fiven a user sentence
        """

        # Tries by looking at similarities in the input of the sentence
        input_sentence = input_sentence.lower().split()
        scenario_vocab = [scenario.vocabulary for scenario in self.scenarios]
        counter = []
        for vocab in scenario_vocab:
            score = 0
            for word in input_sentence:
                if word in vocab:
                    score +=1

            counter.append(score)

        # If there is any score of the scenario is higher than 0, we retrieve a scenario
        if np.any(np.array(counter)>0):
            index_max = counter.index(max(counter))
            scenario = self.scenarios[index_max]

        # If the user input is not sufficient, we ask for a possible scenario to the user
        else:
            scenario = self.ask_scenario()

        if scenario != None:
            self.temp_scenario = scenario

        return

    def ask_scenario(self):
        """
        Asks iteratively for a scenario in the scenarios of the topic
        """
        for scenario in self.scenarios:
            question_input = scenario.question
            answer = handle_input(question_input)
            if re.search("yes", answer.lower()):
                return scenario

        # If nothing is found. Handle later
        return None
            

    def respond(self):
        """
        Response function
        """

        response_data = self.get_data()
        response = decoder(response_data)

        return response

    def retrieve_filter_value(self):
        """
        Retrieve the information stored in the filtering frame
        """
        
        for frame in self.frames:
            if frame.name == self.filter:
                return frame.information

    def get_data(self):
        """
        filters and adapts the data to pass it to the decoder
        """

        # get necessary filters of the topic
        filter_by = self.filter
        filter_value = self.retrieve_filter_value()
        select = self.temp_scenario.needed_information

        # retrieve information
        df = self.data_base
        aux_df = df[df[filter_by] == filter_value]
        aux_df = aux_df.filter(select)

        return aux_df



* Implementation of the Chatbot

In [5]:
class Chatbot():

    def __init__(self):
        self.topics = []
        self.temp_topic = None

    def start_chatbot(self):

        print("Welcome to the Chatbot!")
        print("I provide information about " + self.find_all_topics() + ".")
        print("In order to shut me down, input STOP.")
        print()
        time.sleep(0.5)

        input_question = "What do you want to know?"
        answer = handle_input(input_question)
        while answer !="STOP":

            #guesses topic
            self.guess_topic(input_sentence=answer)

            if self.temp_topic != None:

                #guesses scenario
                self.temp_topic.guess_scenario(answer)

                if self.temp_topic.temp_scenario !=None:

                    # completes the frames of the topic
                    self.temp_topic.complete_frames(answer)
                    
                    # responds accordingly
                    response = self.temp_topic.respond()
                    print("Chatbot: ", response)

                    # Restart
                    time.sleep(0.5)
                    input_sentence = "What else do you want to know?"
                    answer = handle_input(input_sentence)

                    # information is reseted
                    self.reset_knowledge()
                
                else:
                    input_sentence = "I am sorry, I don't have more information. Try again please. What do you want to know?"
                    answer = handle_input(input_sentence)

            else:
                input_sentence = "I am sorry, I did not understand you. Try again please. What do you want to know?"
                answer = handle_input(input_sentence)

    def find_all_topics(self):
        topics = [topic.name for topic in self.topics]
        return ", ".join(topics)

    def guess_topic(self, input_sentence):
        """
        Returns the most appropiate topic fiven a user sentence
        """

        input_sentence = input_sentence.lower().split()
        topics_vocab = [topic.vocabulary for topic in self.topics]
        counter = []
        for vocab in topics_vocab:
            score =0
            for word in input_sentence:
                if word in vocab:
                    score +=1

            counter.append(score)

        # if any scores 1 point at least
        if np.any(np.array(counter)>0):

            index_max = counter.index(max(counter))
            self.temp_topic = self.topics[index_max]
            
        return

    def reset_knowledge(self):
        """
        resets the knowledge of the topic by default 
        """

        # frames of the topic
        for frame in self.temp_topic.frames:
            frame.information=""

        # scenario
        self.temp_topic.temp_scenario = None
        
        #topic
        self.temp_topic = None

    
        return






* Other functions

In [6]:
"""
Used for returning a human reading response in the decoder
"""
answers_dict = {
    "snow": "it will {result} snow", 
    "rain": "it will {result} rain",
    "sun" : "it will {result} be sunny",
    "temperature": "it will be {result} degrees",
    "restaurant": "A restaurant named {result} ",
    "sushi": "is {result} based on sushi",
    "italian": " is {result} italian",
    "vegan": " is {result} vegan",
    "station": " in {result}",
    "bus": "The next bus leaves at {result}",
    "tram": "The next tram leaves at {result}"
    }

In [7]:
def decoder(df):
    """
    Given a dataframe, decodes to string its content
    """
    final_string = ""
    for i in range(df.shape[0]):
        df_line = df.iloc[i,]
        
        for name, val in zip(df_line.index,df_line.values):
            result_string = structure_result(value=val, name=name)
            final_string += result_string + " "

        final_string +="\n"

    return final_string

In [8]:
def structure_result(value, name):
    """
    Decodes to string a particular data case
    """

    if value == True:
        res = ""
    elif value == False:
        res = "not"
    else:
        res = str(value)

    result = answers_dict[name].format(result = res)

    return result

In [9]:
def handle_input(input_question):
    """
    Function that handles all inputs
    """

    print("Chatbot: ", input_question)

    answer = input(input_question + " ")

    print("User: ", answer)

    if answer == "STOP":
        print("The Chatbot has been turned off")

    return answer


## Information filling
* Weather topic

It has its own frames

In [10]:
# Weather information
#Retrieve Where
where = Frame()
where.name = "where"
where.priority = 1
where.questions = ["What place is it about?", "Could you provide me the name of the place?", "What is the name of the place?"]
where.decoder = "(?:is |for |about |in )+(\w+)"

#Retrieve when
when = Frame()
when.name = "when"
when.priority = 2
when.questions = ["What day of the week is it?", "At what day date?", "What day is it?"]
when.decoder = "(monday|tuesday|wednesday|thursday|friday|saturday|sunday)"

Its associated database to base or answers on

In [11]:
weather_data = pd.DataFrame({"when": ["monday", "tuesday","wednesday","thursday","friday","saturday","sunday"], 
                     "temperature": [1,2,-1,1,0,3,2], 
                     "sun": [True , False, True, True, False, True, False],
                     "snow": [False, True, False, False, True, False, False],
                     "rain": [False, False, False, False, False, False, True]})

Its possible scenarios

In [12]:
"""
Weather forecast scenarios
"""
sun = Scenario()
sun.vocabulary = ["sun", "sunny"]
sun.name = "sun"
sun.question ="Do you wanna know if it's sunny?"
sun.needed_information = ["sun"]

rain = Scenario()
rain.vocabulary =["rain","raining"]
rain.name = "rain"
rain.question ="Do you wanna know if it's going to rain?"
rain.needed_information = ["rain"]

snow = Scenario()
snow.vocabulary =["snow","snowing"]
snow.name = "snow"
snow.question ="Do you wanna know if it's going to snow?"
snow.needed_information = ["snow"]

temperature = Scenario() 
temperature.vocabulary = ["hot", "cold", "warm", "chilly", "temperature"]
temperature.name = "temperature"
temperature.question ="Do you wanna know what the temperature will be?"
temperature.needed_information = ["temperature"]

forecast = Scenario() 
forecast.vocabulary = ["forecast"]
forecast.name = "weather forecast"
forecast.question ="Do you wanna know the weather forecast?"
forecast.needed_information = ["temperature", "sun", "snow", "rain"]

And the topic itself

In [13]:
weather = Topic()
weather.scenarios = [sun, rain, snow, temperature, forecast]
weather.vocabulary = ["weather"]
weather.build_vocabulary()
weather.name = "weather"
weather.frames =[where, when]
weather.data_base = weather_data
weather.filter = "when"

* Restaurant info

Frames

In [14]:
#Retrieve Where
neighborhood = Frame()
neighborhood.name = "neighborhood"
neighborhood.priority = 1
neighborhood.questions = ["Where are you now?", "What neighborhood is it about?", "Could you provide me the name of the neighborhood?"]
neighborhood.decoder = "(?:is |for |about |in )+(\w+)"


Data base

In [15]:
restoData = pd.DataFrame({"neighborhood": ["haga", "johanneberg","johanneberg","haga", "vasa"],
                          "restaurant": ["solrosen", "yoko","en italienare och en grek","buffebordet", "simba"],  
                          "sushi": [False, True, False, True, False],
                          "vegan": [True, True, False, True, True],
                          "italian": [False, False, True, True, False]})

Scenarios

In [16]:
"""
Restaturant Scenarios
"""
sushi = Scenario()
sushi.vocabulary = ["sushi", "sashimi", "fish", "nori"]
sushi.name = "sushi"
sushi.question = "Are you looking for a sushi restaurant?"
sushi.needed_information =["restaurant", "sushi"]

vegan = Scenario()
vegan.vocabulary =["vegan","tofu", "beans"]
vegan.name = "vegan"
vegan.question = "Are you looking for a vegan restaurant?"
vegan.needed_information = ["restaurant", "vegan"]

itaRes = Scenario()
itaRes.vocabulary = ["pasta","pizza","risotto","gnocchi"]
itaRes.name = "italian"
itaRes.question = "Are you looking for an Italian restaurant?"
itaRes.needed_information = ["restaurant", "italian"]


Topic

In [17]:
restaurant = Topic()
restaurant.vocabulary = ["restaurant","eating","food", "eat"]
restaurant.scenarios = [sushi, vegan, itaRes]
restaurant.build_vocabulary()
restaurant.data_base = restoData
restaurant.frames = [neighborhood]
restaurant.filter = "neighborhood"
restaurant.name = "restaurants"

* Commute info 

Frames

In [18]:
#Retrieve neighbor
station = Frame()
station.name = "station"
station.priority = 1
station.questions = ["Where are you now?", "What station are you at?", "At what bus/tram stop?"]
station.decoder = "(?:is |for |about |in )+(\w+)"

Data Base

In [19]:
commuteData = pd.DataFrame({"station": ["elisedal", "almedal","varbergsgatan","lana","krokslätts torg" ],  
                     "tram": ["10:30", "11:30", "12:30", "13:30", "14:30"],
                     "bus": ["10:45", "11:45", "12:45", "13:45", "14:45"]})

Scenarios

In [20]:
"""
Commute info Scenarios
"""
tram = Scenario()
tram.vocabulary = ["tram", "tramline"]
tram.name = "tram"
tram.question = "Do you want to know when the tram leaves?"
tram.needed_information =["tram", "station"]

bus = Scenario()
bus.vocabulary =["bus","autobus"]
bus.name = "bus"
bus.question = "Do you want to know when the bus leaves?"
bus.needed_information = ["bus", "station"]


Topic

In [21]:
commuteInfo = Topic()
commuteInfo.vocabulary = ["station","line","stop"]   
commuteInfo.scenarios = [tram, bus]
commuteInfo.build_vocabulary()
commuteInfo.frames = [station]
commuteInfo.data_base = commuteData
commuteInfo.filter = "station"
commuteInfo.name = "arrival times for bus and tram"

# Chatbot

In [22]:
chatbot = Chatbot()
chatbot.topics = [weather, restaurant, commuteInfo]

In [24]:
chatbot.start_chatbot()

Welcome to the Chatbot!
I provide information about weather, restaurants, arrival times for bus and tram.
In order to shut me down, input STOP.

Chatbot:  What do you want to know?
User:  shit
Chatbot:  I am sorry, I did not understand you. Try again please. What do you want to know?
User:  i want to find a restaurant
Chatbot:  Are you looking for a sushi restaurant?
User:  no
Chatbot:  Are you looking for a vegan restaurant?
User:  no
Chatbot:  Are you looking for an Italian restaurant?
User:  no
Chatbot:  I am sorry, I don't have more information. Try again please. What do you want to know?
User:  STOP
The Chatbot has been turned off
