In [None]:
import numpy as np
import datetime
import pandas as pd
import abc
import re
from abc import ABC, abstractmethod
import spacy



In [None]:
# Used to identify locations
nlp = spacy.load("en_core_web_sm")

# Defining frames

In [None]:
from pandas._libs.hashtable import value_count

# Abstract Frame class
class Frame(metaclass=abc.ABCMeta): 

  def __init__(self): 
    self.tasks = None

  # Check of there are missing tasks and returning what tasks are missing 
  def missing_tasks(self): 
    missing_tasks=[]
    if None in self.tasks.values(): 
      for key, value in self.tasks.items():
        if value is None:
          missing_tasks.append(key)
      return missing_tasks
    
    else: 
      return False
  
  # Different greeting depending on the intent of the question 
  @abstractmethod
  def greeting(self):
    pass
    

  def confirm_intent(self, sentence):
    self.find_everything(sentence)
    answer = self.greeting()
    for task, value in self.tasks.items():
      if value != None:
        answer +=  f"{task}:{value}, "

    answer = answer[:-2]
    return answer
  
  # Different feteched data depending on intent of the question
  # Fetched the appropriate data from the database when all tasks in the frame are filled in 
  
  
  @abstractmethod
  def find_everything(self, sentence):
    pass
  
  
  @abstractmethod
  def fetch_data(self): 
    pass

    

class ResturantFrame(Frame):

  def __init__(self):
    self.tasks = {"Day": None, "Time": None, "Cuisine": None, "Location": None}

  def find_day(self, sentence):
    day_regex = r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)"
    match = re.search(day_regex, sentence, re.IGNORECASE)

    if match:
      day = match.group(0)
      self.tasks["Day"] = day
    else:
      return False
  
  def find_time(self, sentence):
    time_regex = r"([1-9]|1[0-2])(:[0-5]\d)? ?([ap]m|AM|PM)"
    match = re.search(time_regex, sentence)

    if match:
        time = match.group(0)
        self.tasks["Time"] = time
    else:
        return False

  def find_cuisine(self, sentence):
      cuisine_regex = r"(Italian|Mexican|Chinese|Thai|Indian|American|Japanese|French|Greek)"
      match = re.search(cuisine_regex, sentence, re.IGNORECASE)
      if match:
          cuisine = match.group(0)
          self.tasks["Cuisine"] = cuisine
      else:
          return False


  def find_location(self, sentence):
    doc = nlp(sentence)
    for ent in doc.ents:
        if ent.label_ == "GPE" or ent.label_ == "LOC":
            location = ent.text
            self.tasks["Location"] = location
            return True
    return False

  
  def find_everything(self, sentence):
    if self.tasks["Location"] == None:
      self.find_location(sentence)
    if self.tasks["Cuisine"] == None:
      self.find_cuisine(sentence)
    if self.tasks["Time"] == None:
      self.find_time(sentence)
    if self.tasks["Day"] == None:
      self.find_day(sentence)
  
  def greeting(self):
    if all(value is None for value in self.tasks.values()): 
      return "Digital assistant: I understand you want help with finding a restaurant.  "
    else: 
      return "Digital assistant: I understand that you want help with finding a restaurant with the following criterias: "
  
  # The actual fetching of the data is not implemented, only toy data is returned
  def fetch_data(self): 
    Day = self.tasks["Day"]
    Location = self.tasks["Location"]
    Cuisine = self.tasks["Cuisine"]
    Time = self.tasks["Time"]

    return f"Digital assistant: You want to find a restaurant in {Location} on {Day} at {Time} with a {Cuisine} cuisine. \nDigital assistant: I have found the following restaurant for you: Resturant Bongo "

class WeatherFrame(Frame):

  def __init__(self):
    self.tasks = {"Day": None, "Time": None, "Location": None}


  def find_day(self, sentence):
    day_regex = r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)"
    match = re.search(day_regex, sentence, re.IGNORECASE)

    if match:
      day = match.group(0)
      self.tasks["Day"] = day
    else:
      return False      
  
  def find_time(self, sentence):
    time_regex = r"([1-9]|1[0-2])(:[0-5]\d)? ?([ap]m|AM|PM)"
    match = re.search(time_regex, sentence)

    if match:
        time = match.group(0)
        self.tasks["Time"] = time
    else:
        return False

  def find_location(self, sentence):
    doc = nlp(sentence)
    for ent in doc.ents:
        if ent.label_ == "GPE" or ent.label_ == "LOC":
            location = ent.text
            self.tasks["Location"] = location
            return True
    return False

  def find_everything(self, sentence):
    if self.tasks["Location"] == None:
      self.find_location(sentence)
    if self.tasks["Time"] == None:
      self.find_time(sentence)
    if self.tasks["Day"] == None:
      self.find_day(sentence)
    
  def greeting(self):
    if all(value is None for value in self.tasks.values()): 
      return "Digital assistant: I understand that you want a weather forecast.  "
    else: 
      return "Digital assistant: I understand that you want a weather forecast with the following criterias: "
  
  def fetch_data(self): 
    Day = self.tasks["Day"]
    Location = self.tasks["Location"]
    Time = self.tasks["Time"]

    return f"Digital assistant: The weather in {Location} on {Day} at {Time} will be: sunny and 20 deg celcius "

class BusFrame(Frame):
  
  def __init__(self):
    self.tasks = {"Start": None, "Destination": None, "Time": None}
    current_time = datetime.datetime.now()
    time_string = current_time.strftime("%H:%M")
    self.tasks["Time"] = time_string
  
  def find_start(self, sentence):
    doc = nlp(sentence)  
    for ent in doc.ents:
        if ent.label_ == "GPE":
            if self.tasks["Destination"] is None or ent.text != self.tasks["Destination"]:
                self.tasks["Start"] = ent.text
                return ent.text   
    return None

  def find_destination(self, sentence):
      doc = nlp(sentence)
      for ent in doc.ents:
          if ent.label_ == "GPE":
              if self.tasks["Start"] is None or ent.text != self.tasks["Start"]:
                  self.tasks["Destination"] = ent.text
                  return ent.text          
      return None

  def find_everything(self, sentence):
    if self.tasks["Start"] == None:
      self.find_start(sentence)
    if self.tasks["Destination"] == None:
      self.find_destination(sentence)
  
  def greeting(self):
    if all(value is None for key, value in self.tasks.items() if key != 'Time'):
      return "Digital assistant: I understand that you want help with finding the next bus at  "
    else: 
      return "Digital assistant: I understand that you want to find the next bus with the following criterias: "
  
  def fetch_data(self): 
    Start = self.tasks["Start"]
    Destination = self.tasks["Destination"]

    return f"Digital assistant: The next bus leaving from {Start} with desitination {Destination} is: Bus 57 at 13:30"

# Implementing an intent identifier

Read dataset with classified intents


In [None]:
df = pd.read_csv("dataset2.csv", sep=';')
df.head()

Unnamed: 0,Sentence,Intent
0,Can you recommend a good Italian restaurant ne...,find_restaurant
1,I'm craving sushi - where's the best place to ...,find_restaurant
2,What's the best burger joint in town?,find_restaurant
3,I'm looking for a romantic restaurant with a v...,find_restaurant
4,I need to find a family-friendly restaurant wi...,find_restaurant


Training and testing the SVC model

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC

# Split the data into training and testing sets
train_x = df['Sentence'][:80]
train_y = df['Intent'][:80]
test_x = df['Sentence'][80:]
test_y = df['Intent'][80:]

# Convert the text data into numerical features using a CountVectorizer
vectorizer = CountVectorizer()
train_x_vectors = vectorizer.fit_transform(train_x)
test_x_vectors = vectorizer.transform(test_x)

# Train a Support Vector Machine (SVM) on the training data
clf_svm = SVC(kernel='linear', C=1, gamma='scale', probability=True)
clf_svm.fit(train_x_vectors, train_y)

# Test the accuracy of the model on the testing data
accuracy = clf_svm.score(test_x_vectors, test_y)
print('SVM accuracy:', accuracy)


SVM accuracy: 0.9827586206896551


# Building the dialog system

Implementing a method that identifies the intent and returns the corresponding Frame

In [None]:
from numpy.random.mtrand import noncentral_f
def find_frame(sentence): 
  new_sentence_vector = vectorizer.transform([sentence])
  prediction_scores = clf_svm.predict_proba(new_sentence_vector)

  # Check if all three intent scores are below the threshold of 0.5
  if all(score < 0.5 for score in prediction_scores[0]): 
      return None
  else:
      if clf_svm.predict(new_sentence_vector)[0] == "find_next_bus":
        return BusFrame()
      
      if clf_svm.predict(new_sentence_vector)[0] == "weather_forecast":
        return WeatherFrame()
      
      if clf_svm.predict(new_sentence_vector)[0] == "find_restaurant":
        return ResturantFrame()



Building the chat

In [None]:
def start_chat(sentence): 
  # Continue asking to rephrase until a frame can be identified 
  while find_frame(sentence) == None: 
    sentence = input("Digital assistant: I am sorry, I don't understand your request. Please rephrase. \nUser input ")


  frame = find_frame(sentence)
  # Confirm that the identified intent is correct
  print(frame.confirm_intent(sentence))
  answer = input("Digital assistant: Is this correct, Yes/No? \nUser input: ")

  if answer == "Yes": 
    # Search for tasks until all tasks are fulfilled 
    while frame.missing_tasks():
      missing_tasks = frame.missing_tasks()
      missing_tasks_string = " and ".join(missing_tasks) 
      result_string = f"Digital assistant: Good! Please specify informating about the following: {missing_tasks_string}. \nUser input: "  #Ask to specify information about the missing tasks
      new_sentence = input(result_string)
      frame.find_everything(new_sentence) # Continue searching for tasks 
    
    print(frame.fetch_data())
     
  else: 
    new_sentence = input("Digital assistant: I am sorry that I missunderstood. Please rephrase your inital request. \nUser input:")
    start_chat(sentence) #Restart process with new request

Using the chat

In [None]:
sentence = input("Digital assistant: What can I assist with? \nUser input: ")
start_chat(sentence)

Digital assistant: What can I assist with? 
User input: I want to book a restaurant
Digital assistant: I understand you want help with finding a restaurant.
Digital assistant: Is this correct, Yes/No? 
User input: Yes
Digital assistant: Good! Please specify informating about the following: Day and Time and Cuisine and Location. 
User input: I woud like to eat Italian 
Digital assistant: Good! Please specify informating about the following: Day and Time and Location. 
User input: Monday at 7:30 pm 
Digital assistant: Good! Please specify informating about the following: Location. 
User input: In New York
Digital assistant: You want to find a restaurant in New York on Monday at 7:30 pm with a Italian cuisine. 
Digital assistant: I have found the following restaurant for you: Resturant Bongo 
