# A) Implement a simple text-based digital assistant 

## Data examples

In [1]:
# Example of training sentences

# Five example sentences the system might get around the topic weather 
"""
What will be the weather like in [location] on [day]? # weather
Can you tell me the forecast for [location] on [day]? # forecast 
What's the weather forecast for [day] in [location]? 
Will it rain in [location] on [day]? # rain, sun, snow
Could you give me an update on the temperature in [location] for [day]? # temperature
"""
# Five for restaurants 
"""
Can you recommend a good restaurant in [town] for dinner tonight? # restaurant
I'm looking for somewhere to eat in [town] with vegetarian options, any suggestions? # (wanting) to eat
What's the best dining in [town] for a romantic date night? # dining
Could you suggest a place for diner in [town] with outdoor seating? # diner, lunch
Where can I find a restaurant in [town] that serves authentic local cuisine? 
"""

# Five for traveling with the tram
"""
When is the next tram to [destination] leaving? # tram, (maybe a different choice of vehicle?)
Can you tell me the schedule for trams leaving from [destination]? # (schedule)
What time is the next tram leaving to [destination] from my current location? #leaving
How often do trams run to [destination] and when is the next one? # run, go, travel
Is there a tram departing soon to [destination]? #departing
"""

'\nWhen is the next tram to [destination] leaving? # tram, (maybe a different choice of vehicle?)\nCan you tell me the schedule for trams leaving from [destination]? # (schedule)\nWhat time is the next tram leaving to [destination] from my current location? #leaving\nHow often do trams run to [destination] and when is the next one? # run, go, travel\nIs there a tram departing soon to [destination]? #departing\n'

In [2]:
#Pseudo databases:

restaurants = {
    "malmo": {
        "pizza": ["Pizzeria Sicilia"],
        "pasta": ["La Vecchia Signora"],
        "burger": ["Malmö Burger Co."],
        "sushi": ["Sakana Sushi"],
    },
    "gothenburg": {
        "pizza": ["Da Matteo Pizzeria"],
        "pasta": ["Trattoria La Strega"],
        "burger": ["The Barn"],
        "sushi": ["Sushibar Wine"],
    },
    "stockholm": {
        "pizza": ["Giro Pizzeria"],
        "pasta": ["Ristorante Da Peppe"],
        "burger": ["Flippin' Burgers"],
        "sushi": ["Sushiyama"],
    }
}

restaurant = restaurants["malmo"]["burger"][0]
print(restaurant)

weather = {
    "malmo": {
        '1': {"temperature": '2', "weather": "snowy"},
        '2': {"temperature": '3', "weather": "rainy"},
        '3': {"temperature": '4', "weather": "cloudy"},
        '4': {"temperature": '5', "weather": "sunny"},
        '5': {"temperature": '8', "weather": "sunny"},
        '6': {"temperature": '12', "weather": "sunny"},
        '7': {"temperature": '16', "weather": "sunny"},
        '8': {"temperature": '15', "weather": "cloudy"},
        '9': {"temperature": '12', "weather": "rainy"},
        '10': {"temperature": '8', "weather": "rainy"},
        '11': {"temperature": '5', "weather": "cloudy"},
        '12': {"temperature": '3', "weather": "snowy"},
    },
    "gothenburg": {
        '1': {"temperature": '2', "weather": "snowy"},
        '2': {"temperature": '3', "weather": "rainy"},
        '3': {"temperature": '4', "weather": "cloudy"},
        '4': {"temperature": '5', "weather": "sunny"},
        '5': {"temperature": '8', "weather": "sunny"},
        '6': {"temperature": '12', "weather": "sunny"},
        '7': {"temperature": '16', "weather": "sunny"},
        '8': {"temperature": '15', "weather": "cloudy"},
        '9': {"temperature": '12', "weather": "rainy"},
        '10': {"temperature": '8', "weather": "rainy"},
        '11': {"temperature": '5', "weather": "cloudy"},
        '12': {"temperature": '3', "weather": "snowy"},
    },
    "stockholm": {
        '1': {"temperature": '2', "weather": "snowy"},
        '2': {"temperature": '3', "weather": "rainy"},
        '3': {"temperature": '4', "weather": "cloudy"},
        '4': {"temperature": '5', "weather": "sunny"},
        '5': {"temperature": '8', "weather": "sunny"},
        '6': {"temperature": '12', "weather": "sunny"},
        '7': {"temperature": '16', "weather": "sunny"},
        '8': {"temperature": '15', "weather": "cloudy"},
        '9': {"temperature": '12', "weather": "rainy"},
        '10': {"temperature": '8', "weather": "rainy"},
        '11': {"temperature": '5', "weather": "cloudy"},
        '12': {"temperature": '3', "weather": "snowy"},
    }
}

weather_example = weather["gothenburg"]['12']
print(weather_example)



Malmö Burger Co.
{'temperature': '3', 'weather': 'snowy'}


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import accuracy_score

import re
import pandas as pd
import numpy as np

# Custom tokenizer for time formats
def time_tokenizer(text):
    # extract all substrings matching "\d+:\d+" pattern
    pattern = r'\d+:\d+'
    tokens = re.findall(pattern, text)
    return tokens

# Custom tokenizer for city names
def city_tokenizer(text):
    # Use regular expressions to match common patterns in city names
    pattern = r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b'
    cities = re.findall(pattern, text)
    return cities


def train_model(data, eng_stopwords= False, timeformats=False, cityformat=False):
    if timeformats:
        vectorizer = CountVectorizer(tokenizer=time_tokenizer) 
    elif cityformat: 
        vectorizer = CountVectorizer(lowercase=False, tokenizer=city_tokenizer)
    else: 
        if eng_stopwords:
            vectorizer = TfidfVectorizer(strip_accents='ascii', stop_words='english', lowercase=True) #modified
        else:
            vectorizer = TfidfVectorizer(strip_accents='ascii',lowercase=True)
    X = vectorizer.fit_transform(data['text'])
    y = data['label']
    clf = LogisticRegression(solver = 'liblinear')
    print(np.mean(cross_val_score(clf, X,y)))
    clf.fit(X,y)
    return clf, vectorizer
"""
def train_forest(data, eng_stopwords= False, countvectorizer=False):
    if countvectorizer:
        vectorizer = CountVectorizer(strip_accents='ascii', lowercase = True, stop_words = 'english') #  , max_features = 100 & maybe ngram_rangetuple (min_n, 3))
    else: 
        if eng_stopwords:
            vectorizer = TfidfVectorizer(strip_accents='ascii', stop_words='english', lowercase=True) #modified
        else:
            vectorizer = TfidfVectorizer(strip_accents='ascii',lowercase=True)
    X = vectorizer.fit_transform(data['text'])
    y = data['label']
    clf = RandomForestClassifier()
    print(np.mean(cross_val_score(clf, X,y)))
    clf.fit(X,y)
    importances = clf.feature_importances_
    indices = importances.argsort()[::-1][:15]  # get the indices of the top 15 features
    feature_names = [f"feature {i}" for i in range(X.shape[1])]  # create a list of feature names
    print("Feature ranking:")
    for f in range(15):
        print("%d. %s (%f)" % (f + 1, feature_names[indices[f]], importances[indices[f]]))
    return clf, vectorizer
"""
def get_prediction(question,clf,vectorizer):
    question_vectorized = vectorizer.transform([question])
    y_pred = clf.predict(question_vectorized)
    return y_pred


### Data loading & Training 

In [4]:
time_data = pd.read_csv('generated_time2.csv', sep=';',error_bad_lines=False)
location_data = pd.read_csv('location2-20230305-172424.csv', sep=';',error_bad_lines=False)
food_data = pd.read_csv('food2.csv', sep=',',error_bad_lines=False)
topic_data = pd.read_csv('topic2-20230306-223423.csv', sep=';',error_bad_lines=False)
transport_data = pd.read_csv('transport.csv', sep=',',error_bad_lines=False)
answer_data = pd.read_csv('yn2.csv', sep=',',error_bad_lines=False)

In [None]:
DeepnoteChart(food_data, """{"layer":[{"layer":[{"mark":{"clip":true,"type":"bar","color":"#4c78a8","tooltip":true},"encoding":{"x":{"sort":null,"type":"nominal","field":"label","scale":{"type":"linear"}},"y":{"sort":null,"type":"quantitative","field":"label","scale":{"type":"linear"},"format":{"type":"default","decimals":null},"aggregate":"count","formatType":"numberFormatFromNumberType"}}}]}],"title":"","config":{"legend":{}},"$schema":"https://vega.github.io/schema/vega-lite/v5.json","encoding":{}}""")

<__main__.DeepnoteChart at 0x7f0519ac42e0>

In [5]:
# training our models
#time_model, time_vec = train_forest(time_data, countvectorizer=False) #testing if results are better with a random forest
time_model, time_vec = train_model(time_data, timeformats=True) 
location_model, location_vec = train_model(location_data, cityformat=True) # Accruacy of =1, but only takes capital names of cities
food_model, food_vec = train_model(food_data, eng_stopwords=True) 
topic_model, topic_vec = train_model(topic_data, eng_stopwords=True) # added travel and transport, (transportation)
answer_model, answer_vec = train_model(answer_data) 
transport_model, transport_vec = train_model(transport_data, eng_stopwords=True)


0.9743119266055047
1.0
0.9800000000000001
0.9396011396011398
1.0
0.9846153846153847


## Agents

In [9]:

def main_agent(start = False, topic = None, prior_question = None):
    if start:
        print("[Chatbot] Hi, how can I help you today?")
        question = input()
        print("[You] "+question)
        topic = get_prediction(question,topic_model,topic_vec)
        main_agent(topic = topic, prior_question=question)  
        return
    if topic == "unclear":
        retry_agent()
    else:
        print("[Chatbot] Is it correct that you have a question about: "+topic + "?")
        follow_up = input() # control before moving into a domain!
        print("[You] " + follow_up)
        response = get_prediction(follow_up,answer_model,answer_vec)
        if response == "no":  
            retry_agent(notconfirmed=True)
            return
        else:
            if topic == "food":
                food_agent(prior_question = prior_question)
                return
            elif topic == "weather":
                weather_agent(prior_question = prior_question)
                return
            else:
                travel_agent(prior_question = prior_question)
                return

def retry_agent(notconfirmed = False):
    
    if notconfirmed: 
        print("[Chatbot] I see, let's try again. What do you need help with?")
    else: 
        retry_qs=["I'm sorry. Can you rephrase the question/answer?", "I don't understand you. Can you say it in other words?", "Unfortunately I didn't undestand that. Can you say is in a different way?"]
        random_index = np.random.randint(0,len(retry_qs))
        print("[Chatbot]" + retry_qs[random_index])
    question = input()
    print("[You] "+question)
    topic = get_prediction(question,topic_model,topic_vec)
    main_agent(topic = topic, prior_question=question)

def food_agent(prior_question = None, location = None, time = None, food_type = None):   
    if prior_question is not None:
        print("[Chatbot] Okay! Let's help you find somewhere to eat")
        location = get_prediction(prior_question,location_model,location_vec) #fix
        time = get_prediction(prior_question, time_model, time_vec) #fix
        food_type = get_prediction(prior_question, food_model, food_vec)
        food_agent(location = location, time = time, food_type = food_type)
        return   

    if location == "unclear" or None:
        city_qs=["In what city do you want to eat?", "Where are you looking to eat?", "Now which city are you asking for?", "Mention the city you want to eat in"]
        random_index = np.random.randint(0,len(city_qs))
        print(city_qs[random_index])
        location_question = input()
        print("[You] "+location_question)
        location = get_prediction(location_question,location_model,location_vec)
        food_agent(location = location, time = time, food_type = food_type)
        return
    
    elif time == "unclear" or None:
        time_qs=["And what time do you plan to eat? HH:MM", "For which point in time are you asking? HH:MM", "Please specify when you are going to eat, HH:MM", "At what time are we talking? HH:MM"]
        random_index = np.random.randint(0,len(time_qs))
        print(time_qs[random_index])
        time_question = input()
        print("[You] " + time_question)
        time = get_prediction(time_question,time_model, time_vec)
        food_agent(location = location, time = time, food_type = food_type)
        return
    
    elif food_type == "unclear" or None:
        food_type_qs=["What do you want to eat?", "What type of food are you keen on?", "Interested in any particular to eat?"]
        random_index = np.random.randint(0,len(food_type_qs))
        print(food_type_qs[random_index])
        food_question = input()
        print("[You] " + food_question)
        food_type = get_prediction(food_question,food_model,food_vec)
        food_agent(location = location, time = time, food_type = food_type)
        return
    else:
        summary = "Summary: The restaurant: "+ str(restaurants[location[0]][food_type[0]][0])+" in "+location+" serving " + food_type + " is booked at " + time+ ", do you need help with something else?"
        print(summary)
        final = input()
        print("[You]" +final)
        pred = get_prediction(final,answer_model,answer_vec)
        if pred == "no": 
            print("[Chatbot] Okay, goodbye and good luck!")
            return
        else: main_agent(start = True)

def weather_agent(prior_question = None, location = None, time = None):
    if prior_question is not None:
        print("[Chatbot] Good! Let's get you a forecast.")
        location = get_prediction(prior_question,location_model,location_vec)
        time = get_prediction(prior_question, time_model, time_vec) 
        weather_agent(location = location, time = time)
        return
    
    if location == "unclear" or None:
        city_qs=["In what city are you gonna be?", "For which city are you askin?", "Alright, which city are forcasting here?"]
        random_index = np.random.randint(0,len(city_qs))
        print(city_qs[random_index])
        location_question = input()
        print("[You] "+location_question)
        location = get_prediction(location_question,location_model,location_vec)
        weather_agent(location = location, time = time)
        return
    
    elif time == "unclear" or None:
        time_qs=["For which time do you the forcast? HH:MM", "For which point in time are you asking? HH:MM", "Please specify for what time you want to check HH:MM", "At what time are we talking? HH:MM"]
        random_index = np.random.randint(0,len(time_qs))
        print(time_qs[random_index])
        time_question = input()
        print("[You] "+ time_question)
        time = get_prediction(time_question,time_model, time_vec)
        weather_agent(location = location, time = time)
        return

    else:
        curr_weather = str(weather[location[0]][time[0]]['weather'])
        temp = str(weather[location[0]][time[0]]['temperature'])
        summary = "Summary: The weather in "+location+" at " + time+" is: "+curr_weather+ ",and the temp is: "+temp+", do you need help with something else?"
        print(summary)
        final = input()
        print("[You] " + final)
        pred = get_prediction(final,answer_model,answer_vec)
        if pred == "no": 
            print("[Chatbot] Okay, goodbye and good luck!")
            return
        else: main_agent(start = True)

def travel_agent(prior_question = None, location = None, time = None, transport = None, departure = None, arrival = None):
    if prior_question is not None:
        print("[Chatbot] Great! Let's help you plan your transportation.")
        location = get_prediction(prior_question,location_model,location_vec) 
        time = get_prediction(prior_question, time_model, time_vec) 
        transport = get_prediction(prior_question, transport_model, transport_vec)
        travel_agent(location = location, time = time, transport = transport)
        return
    
    if location is not None and location != "unclear":
        print("[Chatbot] Is it correct that you want to travel from: "+location)
        departure_question = input()
        print("[You] "+departure_question)
        answer = get_prediction(departure_question,answer_model,answer_vec)
        if answer == "yes":
            departure = location
            print("[Chatbot] And where do you want to travel to?")
            arrival_question = input()
            print("[You] "+ arrival_question)
            arrival = get_prediction(arrival_question, location_model, location_vec) 
            travel_agent(time = time, transport = transport, departure=departure, arrival = arrival)
            return
        else:
            arrival = location
            print("[Chatbot] Then where do you want to travel from?")
            departure_question = input()
            print("[You] "+ departure_question)
            departure = get_prediction(departure_question,location_model,location_vec)
            travel_agent(time = time, transport = transport, departure=departure, arrival = arrival)
            return
    
    elif location == "unclear":
        city_qs=["From which city are you departing?", "Alright, which city are going from?"]
        random_index = np.random.randint(0,len(city_qs))
        print(city_qs[random_index])
        location_question = input()
        print("[You] "+ location_question)
        location = get_prediction(location_question,location_model,location_vec)
        travel_agent(location = location, time = time, transport = transport, departure=departure, arrival = arrival)
        return
    
    elif time == "unclear" or None:
        time_qs=["At what time do you want to travel? HH:MM", "For which point in time are you asking? HH:MM", "At what time are we talking? HH:MM"]
        random_index = np.random.randint(0,len(time_qs))
        print(time_qs[random_index])
        time_question = input()
        print("[You] "+time_question)
        time = get_prediction(time_question,time_model, time_vec)
        travel_agent(location = location, time = time, transport = transport, departure=departure, arrival = arrival)
        return
    
    elif transport == "unclear" or None:
        transport_qs=["How do you want to travel?", "Any preference regarding the type of transportation?", "Please specify how you want to travel?"]
        random_index = np.random.randint(0,len(transport_qs))
        print(transport_qs[random_index])
        transport_question = input()
        print("[You] "+transport_question)
        transport = get_prediction(transport_question,transport_model, transport_vec)
        travel_agent(location = location, time = time, transport = transport, departure=departure, arrival = arrival)
        return

    else:
        print("Okay! The next " + transport + " to "+arrival+" from " + departure + " leaves  at " + time + ", do you need help with something else?")
        final = input()
        print("[You] " + final)
        pred = get_prediction(final,answer_model,answer_vec)
        if pred == "no": 
            print("[Chatbot] Okay, goodbye and good luck!")
            return
        else: main_agent(start = True)
        return


## Running the code 

In [10]:
main_agent(start = True)

[Chatbot] Hi, how can I help you today?
[You] hi i want to eat sushi in Gothenburg at 10:45
['[Chatbot] Is it correct that you have a question about: food?']
[You] yes
[Chatbot] Okay! Let's help you find somewhere to eat
['Summary: The restaurant: Sushibar Wine in gothenburg serving sushi is booked at 10, do you need help with something else?']
[You]no i dont 
[Chatbot] Okay, goodbye and good luck!


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7c80b213-9be0-4652-94d2-672258ef4413' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>