# Step 4 Consolidate all outputs from different models
This notebook consolidates the output from `intent classification model`, `8/11 Slots classification model`, `3/11 Slot classification model`.

In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [2]:
dev_files = ["./data/dev.csv","./data/dev_step1.csv", "./data/dev_step2.csv", "./data/dev_step3.csv"]

dev0_df = pd.read_csv(dev_files[0])# processed original dev set
dev1_df = pd.read_csv(dev_files[1],index_col=0)# contains intent prediction
dev2_df = pd.read_csv(dev_files[2],index_col=0)# contains 8/11 slots prediction
dev3_df = pd.read_csv(dev_files[3],index_col=0)# contains 3/11 slots prediction

print(dev0_df.shape)
print(dev1_df.shape)
print(dev2_df.shape)
print(dev3_df.shape)

(413, 5)
(413, 4)
(413, 7)
(413, 7)


In [5]:
test_files = ["./data/test.csv","./data/test_step1.csv", "./data/test_step2.csv", "./data/test_step3.csv"]

test0_df = pd.read_csv(test_files[0])# processed original test set
test1_df = pd.read_csv(test_files[1],index_col=0)# contains intent prediction
test2_df = pd.read_csv(test_files[2],index_col=0)# contains 8/11 slots prediction
test3_df = pd.read_csv(test_files[3],index_col=0)# contains 3/11 slots prediction

print(test0_df.shape)
print(test1_df.shape)
print(test2_df.shape)
print(test3_df.shape)

(400, 1)
(400, 3)
(400, 3)
(400, 3)


In [6]:
dev0_df#["answer_raw"]

Unnamed: 0,text,answer_raw,answer,intent,slots
0,I'm looking for a local place to dine in the c...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-food..."
1,My husband and I are celebrating our anniversa...,find_hotel,['find_hotel'],find_hotel,{}
2,I'm looking for an expensive restaurant in the...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric..."
3,Are there any accommodations in the east part ...,find_hotel|hotel-area=east|hotel-parking=yes,"['find_hotel', 'hotel-area=east', 'hotel-parki...",find_hotel,"{'hotel-area': 'east', 'hotel-parking': 'yes'}"
4,"I'm looking for a nice place to stay, somewher...",find_hotel|hotel-internet=yes|hotel-pricerange...,"['find_hotel', 'hotel-internet=yes', 'hotel-pr...",find_hotel,"{'hotel-internet': 'yes', 'hotel-pricerange': ..."
...,...,...,...,...,...
408,I'm looking for info about 4-star accommodatio...,find_hotel|hotel-internet=yes|hotel-stars=4,"['find_hotel', 'hotel-internet=yes', 'hotel-st...",find_hotel,"{'hotel-internet': 'yes', 'hotel-stars': '4'}"
409,I'm looking for a place to eat that is cheap a...,find_restaurant|restaurant-area=centre|restaur...,"['find_restaurant', 'restaurant-area=centre', ...",find_restaurant,"{'restaurant-area': 'centre', 'restaurant-pric..."
410,"Hi, I'm looking for an expensive restaurant in...",find_restaurant|restaurant-area=north|restaura...,"['find_restaurant', 'restaurant-area=north', '...",find_restaurant,"{'restaurant-area': 'north', 'restaurant-price..."
411,Can you help me find a restaurant? I want some...,find_restaurant|restaurant-pricerange=expensive,"['find_restaurant', 'restaurant-pricerange=exp...",find_restaurant,{'restaurant-pricerange': 'expensive'}


In [20]:
# preprocessing before merge:
def preprocess_n_consolidation(df1, df2, df3):
    """
    pass in either (dev1_df, dev2_df, dev3_df) 
    or (test1_df, test2_df, test3_df) for consolidation
    """
    order = [
        "hotel-area","hotel-internet","hotel-name","hotel-parking","hotel-pricerange",'hotel-stars',"hotel-type",
        "restaurant-area","restaurant-food","restaurant-name","restaurant-pricerange",
    ]
    
    df1 = df1.rename(columns={"predicted":"pred_intent"})
    df2 = df2.rename(columns={"pred_slots":"pred_slots_2"})
    df3 = df3.rename(columns={"pred_slots":"pred_slots_3"})
    
    df = df1.merge(df2[["pred_slots_2"]], left_index=True, right_index=True, how="left",validate="1:1")
    df = df.merge(df3[["pred_slots_3"]], left_index=True, right_index=True, how="left",validate="1:1")
    
    df['pred_slots_2'] = df['pred_slots_2'].apply(lambda x: eval(x))
    df['pred_slots_3'] = df['pred_slots_3'].apply(lambda x: eval(x))
    
    def Merge(dict1, dict2):
        res = {**dict1, **dict2}
        return res

    df['pred_slots'] = df.apply(lambda x: Merge(x.pred_slots_2, x.pred_slots_3), axis=1)

    def reorder_dict(dict, order):
        reordered_dict = {key: dict[key] for key in order if key in dict.keys()}
        return reordered_dict

    df['pred_slots'] = df.apply(lambda x: reorder_dict(x.pred_slots, order), axis=1)

    def dict_2_list(dict):
        l = []
        for k, v in dict.items():
            l.append(k +"="+v)
        return l
    
    df['pred_answers'] = df.apply(lambda x: dict_2_list(x.pred_slots), axis=1)

    def to_answer_raw(pred_intent, pred_answers):
        pred_answers.insert(0, pred_intent)
        s = "|".join(pred_answers)
        return s

    df['pred_answer_raw'] = df.apply(lambda x: to_answer_raw(x.pred_intent, x.pred_answers), axis=1)
    
    return(df)

In [21]:
dev4_df = preprocess_n_consolidation(dev1_df, dev2_df, dev3_df)
# dev4_df

In [22]:
test4_df = preprocess_n_consolidation(test1_df, test2_df, test3_df)
# test4_df

In [24]:
slot_names = ['hotel-name', 'hotel-stars', 
              'hotel-area', 'hotel-internet', 
              'hotel-pricerange', 'hotel-parking', 
              'hotel-type',
              'restaurant-food', 
              'restaurant-name', 
              'restaurant-pricerange', 
              'restaurant-area']

def get_accuracy(gold_slots, pred_slots, slot_lists=slot_names):
    """
        return accuracy of predicted slots vs gold slots in dictionary form
    """
    correct_count = 0
    for gold_slot, pred_slot in zip(gold_slots, pred_slots):
        gold = {k:v 
                for k, v in gold_slot.items()
                if k in slot_names
               }
        # print(gold)
        sys = {k:v 
                for k, v in pred_slot.items()
                if k in slot_names
               }
        # print(sys)
        # if gold_slot == pred_slot:
        if gold == sys:
            correct_count += 1
    return correct_count / len(gold_slots)



In [25]:
#dev
get_accuracy(dev0_df["slots"].apply(lambda x: eval(x)), dev4_df["pred_slots"])

0.7336561743341404

In [26]:
#dev
print("Accuracy score =", accuracy_score(dev0_df["answer_raw"], dev4_df["pred_answer_raw"]) * 100)

Accuracy score = 73.36561743341404


In [27]:
# Output
dev4_df.to_csv("./data/dev_step4.csv")
dev4_df[["text","pred_answer_raw"]].to_csv('./data/dev_pred.txt', sep='\t', index=False)

In [30]:
# Output test
test4_df.to_csv("./data/test_step4.csv")
test4_df[["text","pred_answer_raw"]].to_csv('./data/test_pred.txt', sep='\t', index=False)



In [35]:
test4_df

Unnamed: 0,text,text_lower,pred_intent,pred_slots_2,pred_slots_3,pred_slots,pred_answers,pred_answer_raw
0,"Hello, I am looking for a restaurant in Cambri...","hello, i am looking for a restaurant in cambri...",find_restaurant,{},{'restaurant-name': 'golden wok'},{'restaurant-name': 'golden wok'},"[find_restaurant, restaurant-name=golden wok]",find_restaurant|restaurant-name=golden wok
1,"Hi, I'm looking for a hotel to stay in that in...","hi, i'm looking for a hotel to stay in that in...",find_hotel,{'hotel-internet': 'yes'},{},{'hotel-internet': 'yes'},"[find_hotel, hotel-internet=yes]",find_hotel|hotel-internet=yes
2,I am looking for a place to stay in the north ...,i am looking for a place to stay in the north ...,find_hotel,"{'hotel-stars': '4', 'hotel-area': 'north'}",{},"{'hotel-area': 'north', 'hotel-stars': '4'}","[find_hotel, hotel-area=north, hotel-stars=4]",find_hotel|hotel-area=north|hotel-stars=4
3,"I need a place to dine, and I'd like to know w...","i need a place to dine, and i'd like to know w...",find_restaurant,{},{'restaurant-food': 'asian oriental'},{'restaurant-food': 'asian oriental'},"[find_restaurant, restaurant-food=asian oriental]",find_restaurant|restaurant-food=asian oriental
4,I need a five starts hotel close to a mall and...,i need a five starts hotel close to a mall and...,find_hotel,{'hotel-internet': 'yes'},{},{'hotel-internet': 'yes'},"[find_hotel, hotel-internet=yes]",find_hotel|hotel-internet=yes
...,...,...,...,...,...,...,...,...
395,I am looking for a place to stay. The hotel sh...,i am looking for a place to stay. the hotel sh...,find_hotel,{'hotel-area': 'north'},{},{'hotel-area': 'north'},"[find_hotel, hotel-area=north]",find_hotel|hotel-area=north
396,I am looking to book a hotel in the Cambridge ...,i am looking to book a hotel in the cambridge ...,find_hotel,{},{},{},[find_hotel],find_hotel
397,I would like to go to an Indian restaurant in ...,i would like to go to an indian restaurant in ...,find_restaurant,{'restaurant-area': 'north'},{'restaurant-food': 'indian'},"{'restaurant-area': 'north', 'restaurant-food'...","[find_restaurant, restaurant-area=north, resta...",find_restaurant|restaurant-area=north|restaura...
398,I'm looking for a place to eat in the centre t...,i'm looking for a place to eat in the centre t...,find_restaurant,{'restaurant-area': 'centre'},{'restaurant-food': 'chinese'},"{'restaurant-area': 'centre', 'restaurant-food...","[find_restaurant, restaurant-area=centre, rest...",find_restaurant|restaurant-area=centre|restaur...


In [39]:
kaggle_df = test4_df.copy()
kaggle_df = kaggle_df.reset_index()


kaggle_df = kaggle_df.rename(columns={"index":"ID", "pred_answer_raw":"Expected"})

kaggle_df[["ID", "Expected"]].to_csv('./data/WOZ_test_ans.csv', index=False)

In [51]:
kaggle_df[kaggle_df["Expected"].str.contains("restaurant-name")]

Unnamed: 0,ID,text,text_lower,pred_intent,pred_slots_2,pred_slots_3,pred_slots,pred_answers,Expected
0,0,"Hello, I am looking for a restaurant in Cambri...","hello, i am looking for a restaurant in cambri...",find_restaurant,{},{'restaurant-name': 'golden wok'},{'restaurant-name': 'golden wok'},"[find_restaurant, restaurant-name=golden wok]",find_restaurant|restaurant-name=golden wok
15,15,i am looking for the chiquito restaurant bar\n,i am looking for the chiquito restaurant bar\n,find_restaurant,{},{'restaurant-name': 'chiquito restaurant bar'},{'restaurant-name': 'chiquito restaurant bar'},"[find_restaurant, restaurant-name=chiquito res...",find_restaurant|restaurant-name=chiquito resta...
16,16,I need information about a certain restaurant ...,i need information about a certain restaurant ...,find_restaurant,{},{'restaurant-name': 'charlie chan'},{'restaurant-name': 'charlie chan'},"[find_restaurant, restaurant-name=charlie chan]",find_restaurant|restaurant-name=charlie chan
18,18,I'm looking for this restaurant called pizza h...,i'm looking for this restaurant called pizza h...,find_restaurant,{},{'restaurant-name': 'pizza hut fen ditton'},{'restaurant-name': 'pizza hut fen ditton'},"[find_restaurant, restaurant-name=pizza hut fe...",find_restaurant|restaurant-name=pizza hut fen ...
27,27,I am looking for a restaurant named Meghna.\n,i am looking for a restaurant named meghna.\n,find_restaurant,{},{'restaurant-name': 'meghna'},{'restaurant-name': 'meghna'},"[find_restaurant, restaurant-name=meghna]",find_restaurant|restaurant-name=meghna
38,38,"Hello, I would like some information about a r...","hello, i would like some information about a r...",find_restaurant,{},{'restaurant-name': 'bedouin'},{'restaurant-name': 'bedouin'},"[find_restaurant, restaurant-name=bedouin]",find_restaurant|restaurant-name=bedouin
70,70,I'm looking for a restaurant called nandos. Ar...,i'm looking for a restaurant called nandos. ar...,find_restaurant,{},{'restaurant-name': 'nandos'},{'restaurant-name': 'nandos'},"[find_restaurant, restaurant-name=nandos]",find_restaurant|restaurant-name=nandos
82,82,Find me a restaurant called cocum\n,find me a restaurant called cocum\n,find_restaurant,{},{'restaurant-name': 'cocum'},{'restaurant-name': 'cocum'},"[find_restaurant, restaurant-name=cocum]",find_restaurant|restaurant-name=cocum
94,94,"Hi, I'm trying to find out more about a restau...","hi, i'm trying to find out more about a restau...",find_restaurant,{},{'restaurant-name': 'cocum'},{'restaurant-name': 'cocum'},"[find_restaurant, restaurant-name=cocum]",find_restaurant|restaurant-name=cocum
95,95,i need info about the slug and lettuce restaur...,i need info about the slug and lettuce restaur...,find_restaurant,{},{'restaurant-name': 'slug and lettuce restaura...,{'restaurant-name': 'slug and lettuce restaura...,"[find_restaurant, restaurant-name=slug and let...",find_restaurant|restaurant-name=slug and lettu...
