In [1]:
import comet_ml

import os
import collections

from transformers import BertTokenizer, BertModel
import torch
import numpy as np
import random

import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from ner.utils import create_dataset_and_document_dataloader
from ner.trainer import Trainer
from ner.model import BertNERBiLSTM, BertNER, DocumentContextBertBaseNER

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

SEED = 42

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

comet_ml is installed but `COMET_API_KEY` is not set.


In [2]:
torch.cuda.get_device_name(device=0)

'TITAN V'

In [3]:
TOKENIZER = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
torch.cuda.set_device(0)
DEVICE = 'cuda' if torch.cuda.is_available else 'cpu'
BATCH_SIZE = 32

### CoNLL

In [4]:
train_dataset, train_documents, train_dataloader = create_dataset_and_document_dataloader('conll', 'data/conll2003/train.txt', batch_size=BATCH_SIZE, shuffle=False, tokenizer=TOKENIZER)
eval_dataset, eval_documents, eval_dataloader = create_dataset_and_document_dataloader('conll', 'data/conll2003/valid.txt', batch_size=BATCH_SIZE, shuffle=False, tokenizer=TOKENIZER)
test_dataset, test_documents, test_dataloader = create_dataset_and_document_dataloader('conll', 'data/conll2003/test.txt', batch_size=BATCH_SIZE, shuffle=False, tokenizer=TOKENIZER)

In [5]:
eval_dataset.idx2tag = train_dataset.idx2tag
eval_dataset.tag2idx = train_dataset.tag2idx
test_dataset.idx2tag = train_dataset.idx2tag
test_dataset.tag2idx = train_dataset.tag2idx

### Experiment

In [6]:
classes = len(train_dataset.ner_tags)

params = {
    'model': 'Bert-Base-Cased',
    'corpus': 'conll',
    'document_context': True,
    'hidden_size': 768,
    'batch_size': BATCH_SIZE,
    'shuffle_batch': False,
    'optimizer': 'AdamW',
    'learning_rate': 1e-6,
    'epochs': 5,
    'last_epoch_lstm': False,
    'seed': SEED
}

model = DocumentContextBertBaseNER(classes, DEVICE).to(DEVICE)
optimizer = optim.AdamW(model.parameters(), lr=params['learning_rate'])
criterion = nn.CrossEntropyLoss(ignore_index=-100).to(DEVICE)

experiment = comet_ml.Experiment(api_key='fxEY7T7JQW6R5I9DkDazSYRpp', project_name='ner-with-nonlocal-features', workspace='ryzhtus', log_graph=True)
experiment.set_model_graph(model)

trainer = Trainer(experiment, model, params, optimizer, criterion, None, False, params['epochs'], False, train_dataloader, eval_dataloader, test_dataloader,
                  train_documents, eval_documents, test_documents, train_dataset.tag2idx, train_dataset.idx2tag, DEVICE)

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/ryzhtus/ner-with-nonlocal-features/40ef710bc8584b8bab799fb1c28e6067



In [7]:
trainer.fit()

[1 / 5] Train: Loss = 0.27678, Token F1-score = 75.41%, Span F1-score = 61.55%: 100%|██████████| 439/439 [03:22<00:00,  2.17it/s]
[1 / 5] Eval : Loss = 0.02224, Token F1-score = 92.77%, Span F1-score = 87.16%: 100%|██████████| 102/102 [00:40<00:00,  2.54it/s]
[2 / 5] Train: Loss = 0.09521, Token F1-score = 92.95%, Span F1-score = 86.27%: 100%|██████████| 439/439 [03:20<00:00,  2.19it/s]
[2 / 5] Eval : Loss = 0.01689, Token F1-score = 94.21%, Span F1-score = 89.27%: 100%|██████████| 102/102 [00:39<00:00,  2.59it/s]
[3 / 5] Train: Loss = 0.07111, Token F1-score = 94.89%, Span F1-score = 89.48%: 100%|██████████| 439/439 [03:20<00:00,  2.19it/s]
[3 / 5] Eval : Loss = 0.01535, Token F1-score = 94.99%, Span F1-score = 90.54%: 100%|██████████| 102/102 [00:39<00:00,  2.59it/s]
[4 / 5] Train: Loss = 0.04412, Token F1-score = 96.76%, Span F1-score = 93.06%: 100%|██████████| 439/439 [03:20<00:00,  2.19it/s]
[4 / 5] Eval : Loss = 0.01307, Token F1-score = 95.82%, Span F1-score = 91.60%: 100%|█████

In [8]:
trainer.test()

Test : Loss = 0.03027, Token F1-score = 92.82%, Span F1-score = 86.41%: 100%|██████████| 108/108 [00:34<00:00,  3.17it/s]


Classification Report
              precision    recall  f1-score   support

         LOC     0.8950    0.9138    0.9043      1670
        MISC     0.7375    0.7709    0.7538       707
         ORG     0.8319    0.8070    0.8193      1705
         PER     0.9217    0.9143    0.9180      1622

   micro avg     0.8639    0.8643    0.8641      5704
   macro avg     0.8465    0.8515    0.8488      5704
weighted avg     0.8642    0.8643    0.8641      5704



In [9]:
experiment.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/ryzhtus/ner-with-nonlocal-features/40ef710bc8584b8bab799fb1c28e6067
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     test_Test Precision               : 0.9015576323987539
COMET INFO:     test_Test Recall                  : 0.9563780568407139
COMET INFO:     test_Test Span F1                 : 0.8640785207256156
COMET INFO:     test_Test Token F1                : 0.9281590763309815
COMET INFO:     train_Train Precision [5]         : (0.708461790751066, 0.9565844469300053)
COMET INFO:     train_Train Recall [5]            : (0.8059473173643757, 0.9871214652370683)
COMET INFO:     train_Train Span F1 [5]           : (0.6154893617021276, 0.9375183831253414)
COMET INFO:     train_Train Token F1 [5]          : (0.7540668854290302, 0.9716130768