In [2]:
from simpletransformers.ner import NERModel
import pandas as pd
import logging
import sklearn
import sklearn_crfsuite
import torch

In [3]:
from sklearn_crfsuite import metrics

In [4]:
torch.manual_seed(1366)

<torch._C.Generator at 0x7f371d9ff250>

In [5]:
# path to files
path_2_en_train = "data/formatted/en_format_train.conll"
path_2_en_test = "data/formatted/en_format_test.conll"
path_2_es_train = "data/formatted/es_format_train.conll"
path_2_es_test = "data/formatted/es_format_test.conll"
path_2_th_train = "data/formatted/th_format_train.conll"
path_2_th_test = "data/formatted/th_format_test.conll"

path_2_checkpoints = "prelim_models"

In [6]:
labels = ['B-alarm/alarm_modifier', 'I-reminder/reference', 'B-reminder/reminder_modifier', 'I-reminder/todo', 'NoLabel',
          'B-timer/attributes', 'B-datetime', 'B-reminder/todo', 'B-reminder/recurring_period', 'B-timer/noun', 'I-weather/noun',
          'B-negation', 'B-reminder/noun', 'I-weather/attribute', 'I-alarm/alarm_modifier', 'B-weather/noun', 'I-datetime', 'B-weather/attribute',
          'I-reminder/recurring_period', 'I-location', 'B-demonstrative_reference', 'B-location', 'I-reminder/reminder_modifier', 'B-reminder/reference',
          'B-weather/temperatureUnit', 'I-reminder/noun', 'B-news/type', 'I-demonstrative_reference', 'I-negation', 'B-alarm/recurring_period', "I-alarm/recurring_period"]

In [12]:
args = {'fp16': True,
        'reprocess_input_data': True,
        'evaluate_during_training':False,
        "evaluate_during_training_verbose": False,
        'overwrite_output_dir': True,
        'num_train_epochs': 3,# set to 1 for test.
        'save_steps':-1,
        "save_model_every_epoch":False, }

In [13]:
macro = lambda x,y:  metrics.flat_f1_score(x,y, average= 'macro')
micro = lambda x,y:  metrics.flat_f1_score(x,y, average= 'micro')
report = lambda x,y:  metrics.flat_classification_report(x,y,digits = 5)
report_dict = lambda x,y:  metrics.flat_classification_report(x,y,digits = 5,output_dict = True,labels = list(range(len(labels))),target_names = labels)
accuracy = lambda x,y:  metrics.flat_accuracy_score(x,y)
seq_accuracy = lambda x,y:  metrics.sequence_accuracy_score(x,y)

In [10]:
def test_model(model,test_file, return_dict = False):
    result, _, predictions = model.eval_model\
                                        (test_file,
                                         macro=macro,
                                         micro=micro,
                                         accuracy=accuracy,
                                         report=report,
                                         seq_accuracy = seq_accuracy)
    print("tested on: ",test_file)
    print(result["report"])
    print("sequence accuracy", result["seq_accuracy"])
    if return_dict:
        return result
    
    
def load_test(model_path, test_file, model_type = 'xlmroberta'):
    model = NERModel(
        'xlmroberta', model_path
    )
    test_model(model, test_file)    

In [15]:

args["output_dir"] = "models/slot_en_train"
model = NERModel('xlmroberta','xlm-roberta-base', labels = labels,  args=args)
#train eng
model.train_model( path_2_en_train)
#baseline
test_model(model, path_2_en_test)
#zero-shot cross_lingual"
test_model(model, path_2_es_test)
test_model(model, path_2_th_test)


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-st

HBox(children=(FloatProgress(value=0.0, max=1692.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=212.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=8621.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=1078.0, style=ProgressStyle(desc…


tested on:  data/formatted/en_format_test.conll
                              precision    recall  f1-score   support

      B-alarm/alarm_modifier    0.00000   0.00000   0.00000         3
                  B-datetime    0.41813   0.33712   0.37328      6158
                  B-location    0.82392   0.67050   0.73933      1305
                  B-negation    0.00000   0.00000   0.00000         3
                 B-news/type    0.00000   0.00000   0.00000         1
             B-reminder/noun    0.00000   0.00000   0.00000       980
 B-reminder/recurring_period    0.00000   0.00000   0.00000        80
        B-reminder/reference    0.00000   0.00000   0.00000        85
B-reminder/reminder_modifier    0.00000   0.00000   0.00000         2
             B-reminder/todo    0.77778   0.02384   0.04627      1468
         B-weather/attribute    0.66997   0.34792   0.45800      2719
              B-weather/noun    0.35294   0.03021   0.05565      1589
   B-weather/temperatureUnit    0.00000 

HBox(children=(FloatProgress(value=0.0, max=3043.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=381.0, style=ProgressStyle(descr…


tested on:  data/formatted/es_format_test.conll
                             precision    recall  f1-score   support

     B-alarm/alarm_modifier    0.00000   0.00000   0.00000         4
                 B-datetime    0.53453   0.54801   0.54119      2062
                 B-location    0.82328   0.71805   0.76707       266
                 B-negation    0.00000   0.00000   0.00000         2
            B-reminder/noun    0.00000   0.00000   0.00000       493
B-reminder/recurring_period    0.00000   0.00000   0.00000        26
       B-reminder/reference    0.00000   0.00000   0.00000        14
            B-reminder/todo    0.68085   0.04372   0.08216       732
        B-weather/attribute    0.57812   0.31049   0.40400       715
             B-weather/noun    0.83721   0.18557   0.30380       388
     I-alarm/alarm_modifier    0.00000   0.00000   0.00000         4
                 I-datetime    0.89297   0.80917   0.84901      4774
                 I-location    0.95946   0.50000   0.

HBox(children=(FloatProgress(value=0.0, max=1692.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=212.0, style=ProgressStyle(descr…


tested on:  data/formatted/th_format_test.conll
                             precision    recall  f1-score   support

                 B-datetime    0.92183   0.92935   0.92558      1104
                 B-location    0.97468   0.95652   0.96552       161
            B-reminder/noun    0.81070   0.86404   0.83652       228
B-reminder/recurring_period    0.00000   0.00000   0.00000        10
       B-reminder/reference    0.00000   0.00000   0.00000         7
            B-reminder/todo    0.89498   0.64474   0.74952       304
        B-weather/attribute    0.94589   0.96468   0.95519       453
             B-weather/noun    0.91667   0.95872   0.93722       218
                 I-datetime    0.95249   0.98641   0.96915      2134
                 I-location    0.88265   0.92021   0.90104       188
            I-reminder/noun    0.86693   0.98444   0.92196       450
I-reminder/recurring_period    0.00000   0.00000   0.00000        25
       I-reminder/reference    0.00000   0.00000   0.

HBox(children=(FloatProgress(value=0.0, max=1692.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=212.0, style=ProgressStyle(descr…


tested on:  data/formatted/th_format_test.conll
                             precision    recall  f1-score   support

                 B-datetime    0.92183   0.92935   0.92558      1104
                 B-location    0.97468   0.95652   0.96552       161
            B-reminder/noun    0.81070   0.86404   0.83652       228
B-reminder/recurring_period    0.00000   0.00000   0.00000        10
       B-reminder/reference    0.00000   0.00000   0.00000         7
            B-reminder/todo    0.89498   0.64474   0.74952       304
        B-weather/attribute    0.94589   0.96468   0.95519       453
             B-weather/noun    0.91667   0.95872   0.93722       218
                 I-datetime    0.95249   0.98641   0.96915      2134
                 I-location    0.88265   0.92021   0.90104       188
            I-reminder/noun    0.86693   0.98444   0.92196       450
I-reminder/recurring_period    0.00000   0.00000   0.00000        25
       I-reminder/reference    0.00000   0.00000   0.

In [None]:
args["output_dir"] = "models/slot_es_train"
model = NERModel('xlmroberta','xlm-roberta-base', labels = labels,  args=args)
#train es
model.train_model(path_2_es_train)

test_model(model, path_2_en_test)
test_model(model, path_2_es_test)
test_model(model, path_2_th_test)

In [None]:
args["output_dir"] = "models/slot_th_train"

model = NERModel('xlmroberta','xlm-roberta-base', labels = labels,  args=args)
#thai th
model.train_model(path_2_th_train)


test_model(model, path_2_en_test)
test_model(model, path_2_es_test)
test_model(model, path_2_th_test)

In [None]:
args["output_dir"] = "models/slot_en_th_train"

model = NERModel('xlmroberta','xlm-roberta-base', labels = labels,  args=args)

#sequentially train
model.train_model(path_2_en_train,output_dir)
model.train_model(path_2_th_train,output_dir)


test_model(model, path_2_en_test)
test_model(model, path_2_es_test)
test_model(model, path_2_th_test)

In [None]:
args["output_dir"] = "models/slot_en_es_train"
model = NERModel('xlmroberta','xlm-roberta-base', labels = labels,  args=args)

model.train_model(path_2_en_train,output_dir )
model.train_model(path_2_es_train,output_dir )


test_model(model, path_2_en_test)
test_model(model, path_2_es_test)
test_model(model, path_2_th_test)

In [None]:
model.train_model(path_2_th_train,output_dir = path_2_checkpoints)
