### FactRuEval example (Cased model), MutiHeadAttention

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import warnings

warnings.filterwarnings("ignore")

In [2]:
import os


data_path = "/home/lis/ner/ulmfit/data/factrueval/"
train_path = os.path.join(data_path, "train_with_pos.csv")
valid_path = os.path.join(data_path, "valid_with_pos.csv")
model_dir = " /datadrive/models/multi_cased_L-12_H-768_A-12/"
init_checkpoint_pt = os.path.join("/datadrive/models/multi_cased_L-12_H-768_A-12/", "pytorch_model.bin")
bert_config_file = os.path.join("/datadrive/bert/multi_cased_L-12_H-768_A-12/", "bert_config.json")
vocab_file = os.path.join("/datadrive/bert/multi_cased_L-12_H-768_A-12/", "vocab.txt")

In [3]:
import torch
torch.cuda.set_device(1)
torch.cuda.is_available(), torch.cuda.current_device()

(True, 1)

### 1. Create dataloaders

In [4]:
from modules import BertNerData as NerData

INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English


In [5]:
data = NerData.create(train_path, valid_path, vocab_file)

For factrueval we use the following sample of labels:

In [6]:
print(data.label2idx)

{'<pad>': 0, '[CLS]': 1, '[SEP]': 2, 'B_O': 3, 'I_O': 4, 'B_ORG': 5, 'I_ORG': 6, 'B_LOC': 7, 'I_LOC': 8, 'B_PER': 9, 'I_PER': 10}


### 2. Create model
For creating pytorch model we need to create `NerModel` object.

In [7]:
from modules.models.bert_models import BertBiLSTMAttnCRF

In [8]:
model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)

In [9]:
model.decoder

AttnCRFDecoder(
  (attn): MultiHeadAttention(
    (attention): _MultiHeadAttention(
      (attention): ScaledDotProductAttention(
        (softmax): Softmax()
        (dropout): Dropout(p=0.5)
      )
    )
    (proj): Linear(in_features=192, out_features=256, bias=True)
    (dropout): Dropout(p=0.5)
    (layer_norm): LayerNormalization()
  )
  (linear): Linears(
    (linears): ModuleList(
      (0): Linear(in_features=256, out_features=128, bias=True)
    )
    (output_linear): Linear(in_features=128, out_features=11, bias=True)
  )
  (crf): CRF()
)

In [10]:
model.get_n_trainable_params()

1151425

### 3. Create learner

For training our pytorch model we need to create `NerLearner` object.

In [16]:
from modules import NerLearner

In [17]:
learner = NerLearner(model, data,
                     best_model_path="/datadrive/models/factrueval/exp_final_attn_cased.cpt",
                     base_lr=0.0001, lr_max=0.005, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[5:])

INFO:root:Use lr OneCycleScheduler...


### 4. Learn your NER model
Call `learner.fit`

In [18]:
learner.fit(1, target_metric='prec')

INFO:root:Resuming train... Current epoch 0.


HBox(children=(IntProgress(value=0, max=233), HTML(value='')))

INFO:root:
lr after epoch: 0.004996961373853585
INFO:root:
epoch 1, average train epoch loss=6.0783





HBox(children=(IntProgress(value=0, max=26), HTML(value='')))



INFO:root:on epoch 0 by max_prec: 0.901
INFO:root:Saving new best model...


              precision    recall  f1-score   support

       B_ORG      0.834     0.737     0.783       259
       I_ORG      0.896     0.708     0.791      1000
       B_LOC      0.899     0.792     0.842       192
       I_LOC      0.930     0.746     0.828       303
       B_PER      0.915     0.979     0.946       188
       I_PER      0.933     0.982     0.956       649

   micro avg      0.906     0.810     0.855      2591
   macro avg      0.901     0.824     0.858      2591
weighted avg      0.905     0.810     0.851      2591



### 5. Evaluate
Create new data loader from existing path.

In [32]:
from modules.data.data import get_bert_data_loader_for_predict

In [33]:
dl = get_bert_data_loader_for_predict(data_path + "valid.csv", learner)

In [34]:
learner.load_model()

In [35]:
preds = learner.predict(dl)

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))



IOB precision

In [36]:
from modules.train.train import validate_step
print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))

              precision    recall  f1-score   support

       B_ORG      0.849     0.849     0.849       259
       I_ORG      0.921     0.860     0.889      1000
       B_LOC      0.933     0.865     0.897       192
       I_LOC      0.936     0.822     0.875       303
       B_PER      0.989     0.979     0.984       188
       I_PER      0.988     0.982     0.985       649

   micro avg      0.938     0.894     0.916      2591
   macro avg      0.936     0.893     0.913      2591
weighted avg      0.938     0.894     0.915      2591



Span precision

In [37]:
from modules.utils.plot_metrics import get_bert_span_report
clf_report = get_bert_span_report(dl, preds)
print(clf_report)

              precision    recall  f1-score   support

         LOC      0.865     0.802     0.832       192
         ORG      0.822     0.822     0.822       259
         PER      0.946     0.936     0.941       188

   micro avg      0.872     0.850     0.861       639
   macro avg      0.878     0.854     0.865       639
weighted avg      0.872     0.850     0.860       639



### 6. Get mean and stdv on 10 runs

In [None]:
from modules.utils.plot_metrics import *


num_runs = 10
best_reports = []
try:
    for i in range(num_runs):
        model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)
        best_model_path = "/datadrive/models/factrueval/exp_{}_attn_cased.cpt".format(i)
        learner = NerLearner(model, data,
                             best_model_path=best_model_path, verbose=False,
                             base_lr=0.0001, lr_max=0.001, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[5:])
        learner.fit(100, target_metric='prec')
        idx, res = get_mean_max_metric(learner.history, "f1", True)
        best_reports.append(learner.history[idx])
except KeyboardInterrupt:
    print("End of exp")

In [37]:
import numpy as np

#### f1

Mean and std

In [45]:
np.mean([get_mean_max_metric([r]) for r in best_reports]), np.round(np.std([get_mean_max_metric([r]) for r in best_reports]), 3)

(0.9163, 0.006)

Best

In [41]:
get_mean_max_metric(best_reports)

0.926

#### precision

Mean and std

In [46]:
np.mean([get_mean_max_metric([r], "prec") for r in best_reports]), np.round(np.std([get_mean_max_metric([r], "prec") for r in best_reports]), 3)

(0.9253000000000002, 0.007)

Best

In [43]:
get_mean_max_metric(best_reports, "prec")

0.934