# Experiments on Cross Lingual Transfer for Intent Detection
### Zero Shot Experiments


The first step is to prepare the data from Schuster et al. For now we are only examining English and Spanish datasets, since preprocessing Thai requires extra steps and is slightly more complex(tokenization). Firstly, we parse the tsv data into dataframes

In [2]:
from util import *
import pickle
import sklearn
import torch
import numpy as np
#from model import *

In [3]:
mapping = {}
with open('label_map.json','r') as f:
    mapping = json.load(f)
    mapping = {int(k):v for k,v in mapping.items()}
    
    
# preprocess training and test files to pandas df

# eng train
en_df, en_mapping = df_format(("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/en/train-en.tsv"),mapping)
en_df.to_pickle("training_files/en_train.p")

# eng eval
en_df_eval, en_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/en/eval-en.tsv",mapping)
en_df_eval.to_pickle("training_files/en_eval.p")

# eng test
en_df_test, en_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/en/test-en.tsv",mapping)
en_df_test.to_pickle("training_files/en_test.p")

# es train
es_df, es_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/es/train-es.tsv",mapping)
es_df.to_pickle("training_files/es_train.p")

# es eval
es_df_eval, es_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/es/eval-es.tsv",mapping)
es_df_eval.to_pickle("training_files/es_eval.p")

# es test
es_df_test, es_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/es/test-es.tsv",mapping)
es_df_test.to_pickle("training_files/es_test.p")


# th train
th_df, th_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/th/train-th_TH.tsv",mapping)
th_df.to_pickle("training_files/th_train.p")

# th eval
th_df_eval, th_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/th/eval-th_TH.tsv",mapping)
th_df_eval.to_pickle("training_files/th_eval.p")

# th test
th_df_test, th_mapping = df_format("/home/santi/BA/multilingual_task_oriented_dialog_slotfilling/th/test-th_TH.tsv",mapping)
th_df_test.to_pickle("training_files/th_test.p")

mapping_list = list(mapping.values())

opening /home/santi/BA/multilingual_task_oriented_dialog_slotfilling/en/train-en.tsv


FileNotFoundError: [Errno 2] No such file or directory: 'training_files/en_train.p'

In [None]:
en_train = en_df.drop_duplicates("text")
en_eval = en_df_eval.drop_duplicates("text")
en_test = en_df_test.drop_duplicates("text")

es_train = es_df.drop_duplicates("text")
es_eval = es_df_eval.drop_duplicates("text")
es_test = es_df_test.drop_duplicates("text")

th_train = th_df.drop_duplicates("text")
th_eval = th_df_eval.drop_duplicates("text")
th_test = th_df_test.drop_duplicates("text")

In [None]:
th_test

In [None]:
en_full_train = pd.concat([en_train,en_eval])
es_full_train = pd.concat([es_train,es_eval])
th_full_train = pd.concat([th_train, th_eval])

In [None]:
path2model = "/home/santi/BA/final_models/"
path2model_en = "/home/santi/BA/final_models/en/"
path2model_es = "/home/santi/BA/final_models/es/"
path2model_th = "/home/santi/BA/final_models/th/"
path2model_x = "/home/santi/BA/final_models/x/"

In [None]:
experiment_results = {}

In [None]:
def analyze_wrong(wrong_predictions,model):
    wrongs = [(inp.text_a,inp.label) for inp in wrong_predictions]
    wrong_preds, vecs = model.predict([t for t,l in wrongs])

    dom_corr = 0
    weak_dom = 0
    rem_alarms = ["reminder","alarm"]
    results = []

    for (text, lab_true), lab_pred in zip(wrongs,wrong_preds):

        lab_pred = mapping[lab_pred]
        lab_true = mapping[lab_true]
        dom_pred = lab_pred.split("/")[0]
        dom_true = lab_true.split("/")[0]

        if dom_pred == dom_true:
            dom_corr += 1

        if (dom_pred in rem_alarms) and (dom_true in rem_alarms):
            weak_dom += 1    

        results.append((text,lab_pred, lab_true))

        #print(text,"\t" ,lab_pred,"\t", lab_true,"\t", dom_pred,"\t", dom_true)

    return results, dom_corr/len(wrongs)

In [None]:
macro = lambda x,y:  sklearn.metrics.f1_score(x,y, average= 'macro')
micro = lambda x,y:  sklearn.metrics.f1_score(x,y, average= 'micro')
report = lambda x,y:  sklearn.metrics.classification_report(x,y,digits = 5,labels = list(range(0,12)), target_names = mapping_list)
report_dict = lambda x,y:  sklearn.metrics.classification_report(x,y,digits = 5,output_dict = True,labels = list(range(0,12)),target_names = mapping_list)
accuracy = lambda x,y:  sklearn.metrics.accuracy_score(x,y)


In [None]:
def custom_eval(df, model, ex_name = "experiment 1", verbose = True):
    results, predictions_vs, wrongs = model.eval_model(df, macro=macro, micro=micro,accuracy=accuracy, report=report, report_dict = report_dict)
    results["name"] = ex_name
    
    false_preds,dom_acc = analyze_wrong(wrongs,model)
    results["wrong_predictions"] = false_preds
    results["domain_of_wrongs"] = dom_acc
    results["domain_accuracy"] = results["accuracy"] + (1-results["accuracy"])*dom_acc
    
    if verbose:
        print("results for experiment: ",ex_name)

        print(results["report"])
        print("domain accuracy: ",results["domain_accuracy"])
        
    return results

We load up a pretrained XLM model with a Max Ent layer for classification. Arguments are left pretty vanilla except fp16 which is not relevant for the results. 

In [None]:
args={"fp16": False,'learning_rate':1e-5, 'num_train_epochs': 5, 'reprocess_input_data': True, 'overwrite_output_dir': True}

In [None]:
model = build_xlmr(args, 12)

In [None]:
# train english model
# full train = train + eval

model.train_model(en_full_train, output_dir = path2model_en)

In [None]:
# test eng
results = custom_eval(en_test, model, "train_en_test_en")
experiment_results[results["name"]] = results

In [None]:
# test es
results = custom_eval(es_test, model, "train_en_test_es")
experiment_results[results["name"]] = results

In [None]:
for text, predicted, real in experiment_results["train_en_test_es"]["wrong_predictions"]:
    print(text, "\t", predicted, "\t", real)

In [None]:
# test th
results = custom_eval(th_test, model, "train_en_test_th")
experiment_results[results["name"]] = results

In [None]:
for text, predicted, real in experiment_results["train_en_test_th"]["wrong_predictions"]:
    print(text, "\t", predicted, "\t", real)

In [None]:
model.train_model(es_full_train, output_dir = path2model_x)

In [None]:
results = custom_eval(en_test, model, "train_en_es_test_en")
experiment_results[results["name"]] = results

In [None]:
results = custom_eval(es_test, model, "train_en_es_test_es")
experiment_results[results["name"]] = results

In [None]:
results = custom_eval(th_test, model, "train_en_es_test_th")
experiment_results[results["name"]] = results

In [None]:
model.train_model(th_full_train, output_dir = path2model_x)

In [None]:
results = custom_eval(th_test, model, "train_en_es_th_test_th")
experiment_results[results["name"]] = results

In [None]:
for text, predicted, real in experiment_results["train_en_es_th_test_th"]["wrong_predictions"]:
    print(text, "\t", predicted, "\t", real)

In [None]:
results = custom_eval(es_test, model, "train_en_es_th_test_es")
experiment_results[results["name"]] = results

In [None]:
results = custom_eval(en_test, model, "train_en_es_th_test_en")
experiment_results[results["name"]] = results

In [None]:
##### SANITY CHECK #####
def unique_sents(test_df, train_df):
    print("unique utterances in test data out of :", len(test_df))
    unique_sents = []
    train_set = set(train_df["text"])
    for sent in test_df["text"]:
        if sent not in train_set:
            unique_sents.append(sent)
    print(len(unique_sents)/len(test_df)*100,"% of the sentences are unique")

In [None]:
cuanto tiempo queda en mi alarma actual ? 	 alarm/show_alarms 	 alarm/time_left_on_alarm 	 alarm 	 alarmunique_sents(en_test, en_eval)
unique_sents(en_test,en_full_train)

In [None]:
unique_sents(es_test, es_eval)
unique_sents(es_test,es_full_train)

In [None]:
unique_sents(th_test, th_eval)
unique_sents(th_test,th_full_train)

In [None]:
#I AM SANE

In [None]:
predict_sent = lambda sent: mapping[model.predict([sent])[0][0]]

In [None]:
# Some random tests

In [None]:
predict_sent("what's the weather in Potsdam")

In [None]:
predict_sent("don't wake me up tomorrow")

In [None]:

predict_sent("ตั้ง เวลา พรุ่ง บ่าย พรุ่งนี้")

In [None]:
mapping_list

In [None]:
predict_sent("que temperatura hay aqui")

In [None]:
predict_sent("no necesito que levantarme el sabado" )

In [None]:
predict_sent("sabado no necesito que levantarme" )

In [None]:
predict_sent("ไม่ ต้อง ปลุก ฉัน วัน เสาร์ นะ" )

In [None]:
predict_sent("วัน เสาร์ ไม่ ต้อง ปลุก ฉัน นะ")

In [None]:
predict_sent("you don't have to wake me up on saturday")

In [None]:
predict_sent("saturday you don't have to wake me up")

In [None]:
en_full_train[en_full_train["text"].str.contains("^on (saturday|sunday|monday|tuesday)",case=False, regex=True)]

In [None]:
mapping[10]

In [None]:
predict_sent("I don't have to wake up early on saturday")

In [None]:
# this is a weird sentence 
predict_sent("saturday you don't have to wake me up")

In [None]:
predict_sent("am Samstag musst du mich nicht aufwecken")

In [None]:
predict_sent("ich nicht muss aufstehen am Samstag")

In [None]:
"el sabado no necesito el despertador" 
# doesn't work
# implicit 

In [None]:
"cuanto falta hasta el alarma"
"cuanto tiempo queda hasta que me levanto"
"que temperatura hay aqui"