### FactRuEval example (Cased model), MutiHeadAttention

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
import sys

sys.path.append("../")

warnings.filterwarnings("ignore")

In [2]:
import os


data_path = "/home/lis/ner/ulmfit/data/factrueval/"
train_path = os.path.join(data_path, "train_with_pos.csv")
valid_path = os.path.join(data_path, "valid_with_pos.csv")
model_dir = " /datadrive/models/multi_cased_L-12_H-768_A-12/"
init_checkpoint_pt = os.path.join("/datadrive/models/multi_cased_L-12_H-768_A-12/", "pytorch_model.bin")
bert_config_file = os.path.join("/datadrive/bert/multi_cased_L-12_H-768_A-12/", "bert_config.json")
vocab_file = os.path.join("/datadrive/bert/multi_cased_L-12_H-768_A-12/", "vocab.txt")

In [3]:
import torch
torch.cuda.set_device(1)
torch.cuda.is_available(), torch.cuda.current_device()

(True, 1)

### 1. Create dataloaders

In [4]:
from modules import BertNerData as NerData

INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English


In [5]:
data = NerData.create(train_path, valid_path, vocab_file)

HBox(children=(IntProgress(value=0, max=3728), HTML(value='')))



HBox(children=(IntProgress(value=0, max=415), HTML(value='')))



For factrueval we use the following sample of labels:

In [6]:
print(data.label2idx)

{'<pad>': 0, '[CLS]': 1, '[SEP]': 2, 'B_O': 3, 'I_O': 4, 'B_ORG': 5, 'I_ORG': 6, 'B_LOC': 7, 'I_LOC': 8, 'B_PER': 9, 'I_PER': 10}


### 2. Create model
For creating pytorch model we need to create `NerModel` object.

In [7]:
from modules.models.bert_models import BertBiLSTMAttnCRF

In [8]:
model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)

In [9]:
model.decoder

AttnCRFDecoder(
  (attn): MultiHeadAttention(
    (attention): _MultiHeadAttention(
      (attention): ScaledDotProductAttention(
        (softmax): Softmax()
        (dropout): Dropout(p=0.5)
      )
    )
    (proj): Linear(in_features=192, out_features=256, bias=True)
    (dropout): Dropout(p=0.5)
    (layer_norm): LayerNormalization()
  )
  (linear): Linears(
    (linears): ModuleList(
      (0): Linear(in_features=256, out_features=128, bias=True)
    )
    (output_linear): Linear(in_features=128, out_features=11, bias=True)
  )
  (crf): CRF()
)

In [18]:
model.get_n_trainable_params()

1151425

### 3. Create learner

For training our pytorch model we need to create `NerLearner` object.

In [10]:
from modules import NerLearner

In [11]:
num_epochs = 100
learner = NerLearner(model, data,
                     best_model_path="/datadrive/models/factrueval/exp_final_attn_cased1.cpt",
                     lr=0.001, clip=1.0, sup_labels=data.id2label[5:],
                     t_total=num_epochs * len(data.train_dl))

### 4. Learn your NER model
Call `learner.fit`

In [None]:
learner.fit(num_epochs, target_metric='f1')

### 5. Evaluate
Create new data loader from existing path.

In [12]:
from modules.data.bert_data import get_bert_data_loader_for_predict

In [13]:
dl = get_bert_data_loader_for_predict(data_path + "valid.csv", learner)

HBox(children=(IntProgress(value=0, max=415), HTML(value='')))



In [14]:
learner.load_model()

In [15]:
preds = learner.predict(dl)

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))



IOB precision

In [17]:
from modules.train.train import validate_step
print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))

              precision    recall  f1-score   support

       B_ORG      0.846     0.869     0.857       259
       I_ORG      0.935     0.855     0.893      1000
       B_LOC      0.926     0.911     0.919       192
       I_LOC      0.909     0.861     0.885       303
       B_PER      0.969     0.984     0.976       188
       I_PER      0.982     0.983     0.982       649

   micro avg      0.937     0.903     0.919      2591
   macro avg      0.928     0.911     0.919      2591
weighted avg      0.937     0.903     0.919      2591



Tokens report

In [72]:
from sklearn_crfsuite.metrics import flat_classification_report

In [73]:
from modules.utils.utils import bert_labels2tokens

In [74]:
pred_tokens, pred_labels = bert_labels2tokens(dl, preds)
true_tokens, true_labels = bert_labels2tokens(dl, [x.labels for x in dl.dataset])

In [75]:
assert pred_tokens == true_tokens
tokens_report = flat_classification_report(true_labels, pred_labels)

In [76]:
print(tokens_report)

              precision    recall  f1-score   support

       I_LOC       0.93      0.90      0.92       230
         I_O       0.99      0.99      0.99      7203
       I_ORG       0.92      0.87      0.89       543
       I_PER       0.98      0.98      0.98       321

   micro avg       0.98      0.98      0.98      8297
   macro avg       0.96      0.94      0.95      8297
weighted avg       0.98      0.98      0.98      8297



In [133]:
from modules.utils.plot_metrics import analyze_bert_errors

In [134]:
res_tokens, res_labels, errors = analyze_bert_errors(dl, preds)

In [136]:
len([error for error in errors if error])

88

Span precision

In [79]:
from modules.utils.utils import voting_choicer

In [80]:
print(get_bert_span_report(dl, preds, fn=voting_choicer))

              precision    recall  f1-score   support

         ORG      0.809     0.834     0.821       259
         LOC      0.851     0.859     0.855       192
         PER      0.936     0.936     0.936       188

   micro avg      0.858     0.872     0.865       639
   macro avg      0.865     0.877     0.871       639
weighted avg      0.859     0.872     0.865       639



### 6. Get mean and stdv on 10 runs

In [None]:
from modules.utils.plot_metrics import *


num_runs = 10
best_reports = []
try:
    for i in range(num_runs):
        model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)
        best_model_path = "/datadrive/models/factrueval/exp_{}_attn_cased.cpt".format(i)
        learner = NerLearner(model, data,
                             best_model_path=best_model_path, verbose=False,
                             base_lr=0.0001, lr_max=0.001, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[5:])
        learner.fit(100, target_metric='prec')
        idx, res = get_mean_max_metric(learner.history, "f1", True)
        best_reports.append(learner.history[idx])
except KeyboardInterrupt:
    print("End of exp")

In [37]:
import numpy as np

#### f1

Mean and std

In [45]:
np.mean([get_mean_max_metric([r]) for r in best_reports]), np.round(np.std([get_mean_max_metric([r]) for r in best_reports]), 3)

(0.9163, 0.006)

Best

In [41]:
get_mean_max_metric(best_reports)

0.926

#### precision

Mean and std

In [46]:
np.mean([get_mean_max_metric([r], "prec") for r in best_reports]), np.round(np.std([get_mean_max_metric([r], "prec") for r in best_reports]), 3)

(0.9253000000000002, 0.007)

Best

In [43]:
get_mean_max_metric(best_reports, "prec")

0.934