## Step 3 - Three slots (hotel-name, restaurant-food, restaurant-name)
In this notebook, we use Logistic Regression binary classifier to determine if hotel-name, restaurant-food, restaurant-name exists.  
Then, we implement QA model to identify the information carried.

In [1]:
# from sklearn.feature_extraction.text import TfidfVectorizer

from transformers import AutoTokenizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline


In [2]:
import pandas as pd

In [3]:
slot_names = ['hotel-name', 
              'restaurant-food', 
              'restaurant-name',
             ]

In [4]:
train_file = "./data/train.csv"
train_df = pd.read_csv(train_file)

train_df.head()

Unnamed: 0,text,answer,intent,slots
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye..."
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'}
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'}
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri..."
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric..."


In [5]:
train_df["text_lower"] = train_df["text"].str.lower()

In [6]:
# convert slots to json objects
train_df["slots"] = train_df["slots"].apply(lambda s: eval(s))
train_df.head()

Unnamed: 0,text,answer,intent,slots,text_lower
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye...","guten tag, i am staying overnight in cambridge..."
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...


In [7]:
# filter to hotel only
# train_df = train_df.query("intent==@category")
print(len(train_df))
# train_df.reset_index(inplace=True)
# train_df.drop(columns=["index"], inplace=True)
train_df.head()

3760


Unnamed: 0,text,answer,intent,slots,text_lower
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye...","guten tag, i am staying overnight in cambridge..."
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...


In [8]:
# create sentence transformer embeddings
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/LaBSE")

x_train = train_df["text_lower"]
x_train = list(train_df["text_lower"])

x_train_labse = model.encode(x_train)

print(x_train_labse.shape)

(3760, 768)


In [9]:
# train using logistic regression

In [10]:

def transform_to_binary(slots, slot):
    if slot in list(slots.keys()):
        res = "_"
    else:
        res = ""
    return res

In [11]:
# kw = 'hotel-internet'
# create a master classifier, which contains classifier for each slot
master_clf = dict()

for slot in slot_names:
    print("Training for slot =", slot)
    clf = LogisticRegression(random_state=42)
    y_train = train_df["slots"].apply(lambda slots: transform_to_binary(slots, slot))
    # print(y_train.shape)
    print()
    clf.fit(x_train_labse, y_train)
    print(f"Train score = {clf.score(x_train_labse, y_train) * 100:.2f} %")
    master_clf[slot] = clf
    print("------------")

print("Completed")

Training for slot = hotel-name

Train score = 99.10 %
------------
Training for slot = restaurant-food

Train score = 98.64 %
------------
Training for slot = restaurant-name

Train score = 98.67 %
------------
Completed


In [12]:
# create predicted dictionary for each item
train_df["pred_slots"] = train_df["text"].apply(lambda x: dict())
print(train_df.iloc[0]["pred_slots"])
train_df.head()

{}


Unnamed: 0,text,answer,intent,slots,text_lower,pred_slots
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye...","guten tag, i am staying overnight in cambridge...",{}
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?,{}
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...,{}
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...,{}
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...,{}


In [13]:
# double check train scores
for slot in slot_names:
    y_pred = master_clf[slot].predict(x_train_labse)
    # print("Train score =", master_clf[slot].score(x_train_tfidf, y_train))
    y_train = train_df["slots"].apply(lambda slots: transform_to_binary(slots, slot))
    print("Accuracy score =", accuracy_score(y_train, y_pred) * 100) # should be the same

    # go through non-empty result and add to pred_slots
    for i, item in enumerate(y_pred):
        # print(i)
        if item is not None and item != "":
            item_slot = train_df.iloc[i]["pred_slots"]
            # print(item_slot)
            item_slot.update({slot:item})
            train_df.at[i, "pred_slots"] = item_slot

Accuracy score = 99.09574468085106
Accuracy score = 98.6436170212766
Accuracy score = 98.67021276595744


In [14]:
train_df.head()

Unnamed: 0,text,answer,intent,slots,text_lower,pred_slots
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye...","guten tag, i am staying overnight in cambridge...",{}
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?,{}
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...,{'hotel-name': '_'}
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...,{'restaurant-food': '_'}
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...,{}


In [15]:
# model_name = "consciousAI/question-answering-roberta-base-s-v2"
model_name = "deepset/roberta-base-squad2"

def get_prediction(question, context, model=model_name):
    
    nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
    
    QA_input = {
        'question': question,
        'context': context
    }
    answer = nlp(QA_input)

    final_answer = answer["answer"].lower()
    
    return final_answer

In [16]:
# Slot:Question dictionary:
SQ_dict = {
    "hotel-name" : "What is the hotel name?",
    "restaurant-food" : "What kind of food does the restaurant serve?",
    "restaurant-name" : "What is the name of the restaurant?",
}

In [17]:
def update_pred_slots(row):
    pred_slots = row["pred_slots"]
    for slot, query in SQ_dict.items():
        if slot in pred_slots:
            pred_slots[slot] = get_prediction(query, row["text"])
    return pred_slots

train_df["pred_slots"] = train_df.apply(update_pred_slots, axis=1)

In [18]:
train_df.head()

Unnamed: 0,text,answer,intent,slots,text_lower,pred_slots
0,"Guten Tag, I am staying overnight in Cambridge...","['find_hotel', 'hotel-area=centre', 'hotel-int...",find_hotel,"{'hotel-area': 'centre', 'hotel-internet': 'ye...","guten tag, i am staying overnight in cambridge...",{}
1,Hi there! Can you give me some info on Cityroomz?,"['find_hotel', 'hotel-name=cityroomz']",find_hotel,{'hotel-name': 'cityroomz'},hi there! can you give me some info on cityroomz?,{}
2,I am looking for a hotel named alyesbray lodge...,"['find_hotel', 'hotel-name=alyesbray lodge gue...",find_hotel,{'hotel-name': 'alyesbray lodge guest house'},i am looking for a hotel named alyesbray lodge...,{'hotel-name': 'alyesbray lodge guest house'}
3,I am looking for a restaurant. I would like so...,"['find_restaurant', 'restaurant-food=chinese',...",find_restaurant,"{'restaurant-food': 'chinese', 'restaurant-pri...",i am looking for a restaurant. i would like so...,{'restaurant-food': 'chinese'}
4,I'm looking for an expensive restaurant in the...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...,{}


In [19]:
# get accuracy on train set
def get_accuracy(gold_slots, pred_slots, slot_lists=slot_names):
    """
        return accuracy of predicted slots vs gold slots in dictionary form
    """
    correct_count = 0
    for gold_slot, pred_slot in zip(gold_slots, pred_slots):
        gold = {k:v 
                for k, v in gold_slot.items()
                if k in slot_names
               }
        # print(gold)
        sys = {k:v 
                for k, v in pred_slot.items()
                if k in slot_names
               }
        # print(sys)
        # if gold_slot == pred_slot:
        if gold == sys:
            correct_count += 1
    return correct_count / len(gold_slots)

get_accuracy(train_df["slots"], train_df["pred_slots"])

0.8856382978723404

In [20]:
# test on dev set
dev_filename = "./data/dev.csv"
dev_df = pd.read_csv(dev_filename)
print(dev_df.columns)
dev_df.head()

Index(['text', 'answer_raw', 'answer', 'intent', 'slots'], dtype='object')


Unnamed: 0,text,answer_raw,answer,intent,slots
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food..."
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{}
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric..."
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}"
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ..."


In [21]:
# filter to hotel only
# category = 'find_hotel'
# dev_df = dev_df.query("intent==@category")
print(len(dev_df))
# dev_df.reset_index(inplace=True)
# dev_df.drop(columns=["index"], inplace=True)
dev_df.head()

413


Unnamed: 0,text,answer_raw,answer,intent,slots
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food..."
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{}
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric..."
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}"
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ..."


In [22]:
# apply some pre-processing
dev_df["text_lower"] = dev_df["text"].str.lower()
# convert slots to json objects
dev_df["slots"] = dev_df["slots"].apply(lambda s: eval(s))
dev_df.head()

Unnamed: 0,text,answer_raw,answer,intent,slots,text_lower
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food...",i'm looking for a local place to dine in the c...
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{},my husband and i are celebrating our anniversa...
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}",are there any accommodations in the east part ...
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ...","i'm looking for a nice place to stay, somewher..."


In [24]:
x_dev = dev_df["text"]
# x_dev_tfidf = vectorizer.transform(x_dev)
x_dev = dev_df["text_lower"]
x_dev = list(x_dev)
# x_train_tfidf = vectorizer.fit_transform(x_train)


x_dev_labse = model.encode(x_dev)

print(x_dev_labse.shape)

(413, 768)


In [25]:
# create predicted dictionary for each item
dev_df["pred_slots"] = dev_df["text"].apply(lambda x: dict())
dev_df.head()

Unnamed: 0,text,answer_raw,answer,intent,slots,text_lower,pred_slots
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food...",i'm looking for a local place to dine in the c...,{}
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{},my husband and i are celebrating our anniversa...,{}
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...,{}
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}",are there any accommodations in the east part ...,{}
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ...","i'm looking for a nice place to stay, somewher...",{}


In [26]:
for slot in slot_names:
    # y_dev = dev_df["target"]
    y_dev = dev_df["slots"].apply(lambda slots: transform_to_binary(slots, slot))
    
    dev_pred = master_clf[slot].predict(x_dev_labse)
    # dev_score = clf.score(x_dev_tfidf, y_dev)
    print("Slot prediction =", slot)
    print(f"Dev accuracy = {accuracy_score(y_dev, dev_pred) * 100:.2f} %")

    # go through non-empty result and add to pred_slots
    for i, item in enumerate(dev_pred):
    # print(i)
        if item is not None and item != "":
            item_slot = dev_df.iloc[i]["pred_slots"]
            # print(item_slot)
            item_slot.update({slot:item})
            dev_df.at[i, "pred_slots"] = item_slot
            
    # print("Dev accuracy =", dev_score * 100)

Slot prediction = hotel-name
Dev accuracy = 98.79 %
Slot prediction = restaurant-food
Dev accuracy = 98.06 %
Slot prediction = restaurant-name
Dev accuracy = 96.61 %


In [27]:
dev_df.head()

Unnamed: 0,text,answer_raw,answer,intent,slots,text_lower,pred_slots
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food...",i'm looking for a local place to dine in the c...,{'restaurant-food': '_'}
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{},my husband and i are celebrating our anniversa...,{}
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...,{}
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}",are there any accommodations in the east part ...,{}
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ...","i'm looking for a nice place to stay, somewher...",{}


In [28]:
def update_pred_slots(row):
    pred_slots = row["pred_slots"]
    for slot, query in SQ_dict.items():
        if slot in pred_slots:
            pred_slots[slot] = get_prediction(query, row["text"])
    return pred_slots

dev_df["pred_slots"] = dev_df.apply(update_pred_slots, axis=1)

In [29]:
dev_df

Unnamed: 0,text,answer_raw,answer,intent,slots,text_lower,pred_slots
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food...",i'm looking for a local place to dine in the c...,{'restaurant-food': 'chinese'}
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{},my husband and i are celebrating our anniversa...,{}
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for an expensive restaurant in the...,{}
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}",are there any accommodations in the east part ...,{}
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ...","i'm looking for a nice place to stay, somewher...",{}
...,...,...,...,...,...,...,...
408,I'm looking for info about 4-star accommodatio...,find_hotel|hotel-internet=yes|hotel-stars=4,"['find_hotel', 'hotel-internet=yes', 'hotel-st...",find_hotel,"{'hotel-internet': 'yes', 'hotel-stars': '4'}",i'm looking for info about 4-star accommodatio...,{}
409,I'm looking for a place to eat that is cheap a...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric...",i'm looking for a place to eat that is cheap a...,{}
410,"Hi, I'm looking for an expensive restaurant in...",find_restaurant|restaurant-area=north|restaura...,"['find_restaurant', 'restaurant-area=north', '...",find_restaurant,"{'restaurant-area': 'north', 'restaurant-price...","hi, i'm looking for an expensive restaurant in...",{}
411,Can you help me find a restaurant? I want some...,find_restaurant|restaurant-pricerange=expensive,"['find_restaurant', 'restaurant-pricerange=exp...",find_restaurant,{'restaurant-pricerange': 'expensive'},can you help me find a restaurant? i want some...,{}


In [30]:
# calculate overall accuracy 
score = get_accuracy(dev_df["slots"], dev_df["pred_slots"])
print(f"Overall accuracy = {score:.2f} %")

Overall accuracy = 0.84 %


In [56]:
# output
dev_df.to_csv("./data/dev_step3.csv")

In [31]:
#test set
test_filename = "./data/test.csv"
test_df = pd.read_csv(test_filename)

In [33]:

x_test = test_df["text_lower"]
x_test = list(x_test)

x_test_labse = model.encode(x_test)

print(x_test_labse.shape)

(400, 768)


In [34]:
# create predicted dictionary for each item
test_df["pred_slots"] = test_df["text"].apply(lambda x: dict())


In [35]:
for slot in slot_names:
    
    test_pred = master_clf[slot].predict(x_test_labse)

    # go through non-empty result and add to pred_slots
    for i, item in enumerate(test_pred):
        if item is not None and item != "":
            item_slot = test_df.iloc[i]["pred_slots"]
            # print(item_slot)
            item_slot.update({slot:item})
            test_df.at[i, "pred_slots"] = item_slot

In [36]:
test_df["pred_slots"] = test_df.apply(update_pred_slots, axis=1)

In [37]:
# output
test_df.to_csv("./data/test_step3.csv")