### FactRuEval elmo, MutiHeadAttention

In [14]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
import sys

sys.path.append("../")

warnings.filterwarnings("ignore")

In [15]:
import os


data_path = "/home/lis/ner/ulmfit/data/factrueval/"
train_path = os.path.join(data_path, "train_with_pos.csv")
valid_path = os.path.join(data_path, "valid_with_pos.csv")
model_dir = "/datadrive/elmo/"
config_name = "cnn_50_100_512_4096_sample.json"

In [16]:
import torch
torch.cuda.set_device(1)
torch.cuda.is_available(), torch.cuda.current_device()

(True, 1)

### 1. Create dataloaders

In [17]:
from modules.data.elmo_data import ElmoNerData as NerData

In [18]:
data = NerData.create(train_path, valid_path, model_dir, config_name)

For factrueval we use the following sample of labels:

In [19]:
print(data.label2idx)

{'<pad>': 0, '<bos>': 1, '<eos>': 2, 'O': 3, 'B_ORG': 4, 'I_ORG': 5, 'B_LOC': 6, 'B_PER': 7, 'I_PER': 8, 'I_LOC': 9}


### 2. Create model
For creating pytorch model we need to create `NerModel` object.

In [20]:
from modules.models.elmo_models import ElmoBiLSTMAttnCRF

In [21]:
model = ElmoBiLSTMAttnCRF.create(len(data.label2idx), model_dir, config_name, enc_hidden_dim=128)

INFO:root:char embedding size: 3896
INFO:root:word embedding size: 329681


In [22]:
model.decoder

AttnCRFDecoder(
  (attn): MultiHeadAttention(
    (attention): _MultiHeadAttention(
      (attention): ScaledDotProductAttention(
        (softmax): Softmax()
        (dropout): Dropout(p=0.5)
      )
    )
    (proj): Linear(in_features=192, out_features=128, bias=True)
    (dropout): Dropout(p=0.5)
    (layer_norm): LayerNormalization()
  )
  (linear): Linears(
    (linears): ModuleList(
      (0): Linear(in_features=128, out_features=64, bias=True)
    )
    (output_linear): Linear(in_features=64, out_features=10, bias=True)
  )
  (crf): CRF()
)

In [23]:
model.get_n_trainable_params()

665818

### 3. Create learner

For training our pytorch model we need to create `NerLearner` object.

In [24]:
from modules import NerLearner

In [25]:
learner = NerLearner(model, data,
                     best_model_path="/datadrive/models/factrueval/elmo_attn_cased.cpt",
                     base_lr=0.0001, lr_max=0.005, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[4:])

INFO:root:Use lr OneCycleScheduler...


### 4. Learn your NER model
Call `learner.fit`

In [13]:
learner.fit(1, target_metric='prec')

INFO:root:Resuming train... Current epoch 0.


HBox(children=(IntProgress(value=0, max=233), HTML(value='')))

INFO:root:
lr after epoch: 0.004996961373853585
INFO:root:
epoch 1, average train epoch loss=3.3685





HBox(children=(IntProgress(value=0, max=26), HTML(value='')))



INFO:root:on epoch 0 by max_prec: 0.822
INFO:root:Saving new best model...


              precision    recall  f1-score   support

       B_ORG      0.838     0.777     0.806       260
       I_ORG      0.802     0.728     0.763       283
       B_LOC      0.868     0.841     0.854       195
       B_PER      0.938     0.942     0.940       191
       I_PER      0.927     0.977     0.951       130
       I_LOC      0.560     0.400     0.467        35

   micro avg      0.858     0.816     0.837      1094
   macro avg      0.822     0.778     0.797      1094
weighted avg      0.853     0.816     0.833      1094



### 5. Evaluate
Create new data loader from existing path.

In [26]:
from modules.data.elmo_data import get_elmo_data_loader_for_predict

In [27]:
dl = get_elmo_data_loader_for_predict(data_path + "valid_with_pos.csv", learner)

In [28]:
learner.load_model()

In [29]:
preds = learner.predict(dl)

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))



IOB precision

In [30]:
from modules.train.train import validate_step
print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))

              precision    recall  f1-score   support

       B_ORG      0.785     0.842     0.813       260
       I_ORG      0.833     0.777     0.804       283
       B_LOC      0.936     0.826     0.877       195
       B_PER      0.963     0.948     0.955       191
       I_PER      0.954     0.954     0.954       130
       I_LOC      0.923     0.343     0.500        35

   micro avg      0.877     0.838     0.857      1094
   macro avg      0.899     0.782     0.817      1094
weighted avg      0.880     0.838     0.854      1094



Span precision

In [32]:
from modules.utils.plot_metrics import get_elmo_span_report
clf_report = get_elmo_span_report(dl, preds)
print(clf_report)

              precision    recall  f1-score   support

         LOC      0.866     0.764     0.812       195
         PER      0.873     0.864     0.868       191
         ORG      0.719     0.777     0.747       260

   micro avg      0.804     0.799     0.801       646
   macro avg      0.819     0.802     0.809       646
weighted avg      0.809     0.799     0.802       646



### 6. Get mean and stdv on 10 runs

In [None]:
from modules.utils.plot_metrics import *


num_runs = 10
best_reports = []
try:
    for i in range(num_runs):
        model = ElmoBiLSTMAttnCRF.create(len(data.label2idx), model_dir, config_name, enc_hidden_dim=128)
        best_model_path = "/datadrive/models/factrueval/elmo_{}_attn_cased.cpt".format(i)
        learner = NerLearner(model, data,
                             best_model_path=best_model_path, verbose=False,
                             base_lr=0.0001, lr_max=0.001, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[4:])
        learner.fit(50, target_metric='f1')
        idx, res = get_mean_max_metric(learner.history, "f1", True)
        best_reports.append(learner.history[idx])
except KeyboardInterrupt:
    print("End of exp")

In [19]:
import numpy as np

#### f1

Mean and std

In [20]:
np.mean([get_mean_max_metric([r]) for r in best_reports]), np.round(np.std([get_mean_max_metric([r]) for r in best_reports]), 3)

(0.8689, 0.006)

Best

In [21]:
get_mean_max_metric(best_reports)

0.882

#### precision

Mean and std

In [22]:
np.mean([get_mean_max_metric([r], "prec") for r in best_reports]), np.round(np.std([get_mean_max_metric([r], "prec") for r in best_reports]), 3)

(0.8894, 0.016)

Best

In [23]:
get_mean_max_metric(best_reports, "prec")

0.913