In [1]:
import sys
sys.path.insert(0, '../util')
sys.path.insert(1, '../experiments')

import os
# Disable weights and biases (if installed)
os.environ["WANDB_DISABLED"] = "true"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
from pathlib import Path
import transformers
from transformers import AutoModelForTokenClassification, AutoTokenizer, Trainer, TrainingArguments, pipeline, DataCollatorForTokenClassification, EarlyStoppingCallback, trainer_utils

from huggingface_utils import load_custom_dataset, LabelAligner, compute_metrics, eval_on_test_set
from run_experiment import get_train_args
from convert_annotations import entity_values

# Parameters

In [3]:
level = 'fine' # Change to 'value' to look at high-level entity classes only
spans = 'short' # Change to 'long' to consider long spans induced by specifications

In [4]:
config_files = {
    ('coarse' , 'short') : '01_ggponc_coarse_short.yaml',
    ('fine', 'short') : '02_ggponc_fine_short.yaml',
    ('coarse' , 'long' ) : '03_ggponc_coarsee_long.yaml',
    ('fine', 'long' ) : '04_ggponc_fine_long.yaml'
}

In [5]:
import hydra
from hydra import compose, initialize

hydra.core.global_hydra.GlobalHydra.instance().clear()
initialize(config_path=Path('..') / 'experiments', job_name='foo')
config = compose(config_name=config_files[(level, spans)], overrides=['cuda=0', 'link=false'])

In [6]:
train_file = config['train_dataset']
dev_file = config['dev_dataset']
test_file = config['test_dataset']

# Setup IOB-encoded dataset with train / dev / test splits

In [7]:
dataset, tags = load_custom_dataset(train=train_file, dev=dev_file, test=test_file, tag_strings=config['task'])



  0%|          | 0/3 [00:00<?, ?it/s]



In [8]:
tokenizer = AutoTokenizer.from_pretrained(config['base_model_checkpoint'])
assert isinstance(tokenizer, transformers.PreTrainedTokenizerFast)

In [9]:
label_aligner = LabelAligner(tokenizer)

In [10]:
dataset = dataset.map(lambda e: label_aligner.tokenize_and_align_labels(e, config['label_all_tokens']), batched=True)

  0%|          | 0/24 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

In [11]:
id2label = dict(enumerate(tags))
id2label

{0: 'O',
 1: 'B-Other_Finding',
 2: 'I-Other_Finding',
 3: 'B-Diagnosis_or_Pathology',
 4: 'I-Diagnosis_or_Pathology',
 5: 'B-Therapeutic',
 6: 'I-Therapeutic',
 7: 'B-Diagnostic',
 8: 'I-Diagnostic',
 9: 'B-Nutrient_or_Body_Substance',
 10: 'I-Nutrient_or_Body_Substance',
 11: 'B-External_Substance',
 12: 'I-External_Substance',
 13: 'B-Clinical_Drug',
 14: 'I-Clinical_Drug'}

In [12]:
dataset

DatasetDict({
    train: Dataset({
        features: ['_tags', 'attention_mask', 'fname', 'input_ids', 'labels', 'offset_mapping', 'sentence_id', 'special_tokens_mask', 'tags', 'token_type_ids', 'tokens'],
        num_rows: 23528
    })
    dev: Dataset({
        features: ['_tags', 'attention_mask', 'fname', 'input_ids', 'labels', 'offset_mapping', 'sentence_id', 'special_tokens_mask', 'tags', 'token_type_ids', 'tokens'],
        num_rows: 4655
    })
    test: Dataset({
        features: ['_tags', 'attention_mask', 'fname', 'input_ids', 'labels', 'offset_mapping', 'sentence_id', 'special_tokens_mask', 'tags', 'token_type_ids', 'tokens'],
        num_rows: 4826
    })
})

# Configure and train 🤗 token classification model

In [13]:
from run_experiment import get_train_args

In [14]:
num_train_epochs = 10 # Remove this line to train for 100 epochs

In [15]:
config['num_train_epochs'] = num_train_epochs

In [16]:
training_args = get_train_args(cp_path='../ner_results', run_name='ner_baseline', report_to=[], **config, resume_from_checkpoint=None)

INFO:run_experiment:ner_baseline


In [17]:
training_args

TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_steps=None,
evaluation_strategy=IntervalStrategy.EPOCH,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
gradient_checkpointing=True,
greater_is_better=True,
group_by_length=False,
hub_model_id=None,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.05,
learning_rate=1e-05,
length_column_name=length,
load_best_model_at_end=True,
local_rank=-1,
log_level=-1,
log_level_replica=-1,
log_on_each_node=True,
logging_dir=../ner_results/runs/Dec13_14-28-54_geras,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_step

In [18]:
def model_init():
    return AutoModelForTokenClassification.from_pretrained(
        config['base_model_checkpoint'],
        num_labels=len(tags), 
        id2label=id2label,
    )

data_collator = DataCollatorForTokenClassification(tokenizer)
tr = Trainer(
    args=training_args,
    model_init=model_init,
    train_dataset=dataset["train"],
    eval_dataset=dataset["dev"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics(tags, True),
)

loading configuration file https://huggingface.co/deepset/gbert-base/resolve/main/config.json from cache at /dhc/home/florian.borchert/.cache/huggingface/transformers/0f9d6c73cd85ab98cecc6866492c84f23e72bbaf2240a24da0e5d5e3b8810707.080f0bd0794ab07ca509487675f6cb88cfbdc04fc142b21be92212223e82cb14
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "O",
    "1": "B-Other_Finding",
    "2": "I-Other_Finding",
    "3": "B-Diagnosis_or_Pathology",
    "4": "I-Diagnosis_or_Pathology",
    "5": "B-Therapeutic",
    "6": "I-Therapeutic",
    "7": "B-Diagnostic",
    "8": "I-Diagnostic",
    "9": "B-Nutrient_or_Body_Substance",
    "10": "I-Nutrient_or_Body_Substance",
    "11": "B-External_Substance",
    "12": "I-External_Substance",
    "13": "B-Clinical_Drug",
    "14": "I-Clinical_Drug"
  },
  "initia

### Train the model

In [19]:
train_result = tr.train()

loading configuration file https://huggingface.co/deepset/gbert-base/resolve/main/config.json from cache at /dhc/home/florian.borchert/.cache/huggingface/transformers/0f9d6c73cd85ab98cecc6866492c84f23e72bbaf2240a24da0e5d5e3b8810707.080f0bd0794ab07ca509487675f6cb88cfbdc04fc142b21be92212223e82cb14
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "O",
    "1": "B-Other_Finding",
    "2": "I-Other_Finding",
    "3": "B-Diagnosis_or_Pathology",
    "4": "I-Diagnosis_or_Pathology",
    "5": "B-Therapeutic",
    "6": "I-Therapeutic",
    "7": "B-Diagnostic",
    "8": "I-Diagnostic",
    "9": "B-Nutrient_or_Body_Substance",
    "10": "I-Nutrient_or_Body_Substance",
    "11": "B-External_Substance",
    "12": "I-External_Substance",
    "13": "B-Clinical_Drug",
    "14": "I-Clinical_Drug"
  },
  "initia

Epoch,Training Loss,Validation Loss,Clinical Drug Precision,Clinical Drug Recall,Clinical Drug F1,Clinical Drug Number,Diagnosis Or Pathology Precision,Diagnosis Or Pathology Recall,Diagnosis Or Pathology F1,Diagnosis Or Pathology Number,Diagnostic Precision,Diagnostic Recall,Diagnostic F1,Diagnostic Number,External Substance Precision,External Substance Recall,External Substance F1,External Substance Number,Nutrient Or Body Substance Precision,Nutrient Or Body Substance Recall,Nutrient Or Body Substance F1,Nutrient Or Body Substance Number,Other Finding Precision,Other Finding Recall,Other Finding F1,Other Finding Number,Therapeutic Precision,Therapeutic Recall,Therapeutic F1,Therapeutic Number,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,0.552,0.438262,0.757522,0.887967,0.817574,964,0.805428,0.876629,0.839522,4604,0.79575,0.822285,0.8088,2459,0.0,0.0,0.0,57,0.478261,0.424893,0.45,233,0.720899,0.68724,0.703667,3127,0.819464,0.869539,0.843759,3273,0.782637,0.815927,0.798935,0.955793
2,0.4214,0.427277,0.767462,0.900415,0.82864,964,0.85653,0.867507,0.861983,4604,0.798044,0.862952,0.82923,2459,0.0,0.0,0.0,57,0.550562,0.630901,0.588,233,0.742216,0.708986,0.725221,3127,0.830199,0.890315,0.859207,3273,0.805809,0.833186,0.819269,0.959097
3,0.3922,0.427908,0.760908,0.904564,0.82654,964,0.827711,0.895308,0.860184,4604,0.826259,0.847092,0.836546,2459,0.875,0.122807,0.215385,57,0.590308,0.575107,0.582609,233,0.755684,0.712184,0.733289,3127,0.830561,0.900092,0.86393,3273,0.805866,0.841951,0.823514,0.95974
4,0.3851,0.423742,0.767563,0.91805,0.836089,964,0.842148,0.892268,0.866484,4604,0.849269,0.827166,0.838072,2459,0.75,0.157895,0.26087,57,0.561702,0.566524,0.564103,233,0.762638,0.728494,0.745175,3127,0.861507,0.887565,0.874342,3273,0.821647,0.839234,0.830347,0.961441
5,0.372,0.424664,0.753846,0.914938,0.826617,964,0.85102,0.897046,0.873427,4604,0.828338,0.865392,0.84646,2459,0.55,0.192982,0.285714,57,0.61244,0.549356,0.579186,233,0.763328,0.7189,0.740448,3127,0.84194,0.901619,0.870758,3273,0.817104,0.847863,0.8322,0.961695
6,0.3693,0.424598,0.766696,0.917012,0.835144,964,0.85959,0.893571,0.876251,4604,0.834707,0.864579,0.849381,2459,0.44,0.192982,0.268293,57,0.615385,0.652361,0.633333,233,0.760905,0.741925,0.751295,3127,0.847385,0.905897,0.875665,3273,0.821377,0.85425,0.837491,0.9626
7,0.3607,0.426531,0.780142,0.912863,0.8413,964,0.851495,0.902911,0.87645,4604,0.839477,0.861326,0.850261,2459,0.461538,0.210526,0.289157,57,0.573222,0.587983,0.580508,233,0.757429,0.74992,0.753656,3127,0.853503,0.900703,0.876468,3273,0.820758,0.855949,0.837984,0.962627
8,0.3587,0.428078,0.777778,0.914938,0.840801,964,0.860499,0.899001,0.879329,4604,0.838735,0.862952,0.850671,2459,0.433333,0.22807,0.298851,57,0.578723,0.583691,0.581197,233,0.746629,0.761433,0.753958,3127,0.853877,0.901619,0.877099,3273,0.820593,0.857784,0.838776,0.9626
9,0.355,0.429315,0.778073,0.912863,0.840095,964,0.857172,0.900738,0.878416,4604,0.838155,0.871899,0.854694,2459,0.482759,0.245614,0.325581,57,0.609442,0.609442,0.609442,233,0.751517,0.752478,0.751997,3127,0.856392,0.896425,0.875952,3273,0.821973,0.857104,0.839171,0.96288
10,0.3536,0.429483,0.778565,0.911826,0.839943,964,0.860828,0.898784,0.879396,4604,0.836583,0.872306,0.854071,2459,0.481481,0.22807,0.309524,57,0.58952,0.579399,0.584416,233,0.746147,0.758555,0.752299,3127,0.856434,0.896731,0.876119,3273,0.821312,0.857308,0.838924,0.962727


The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: offset_mapping, tags, fname, _tags, sentence_id, special_tokens_mask, tokens.
***** Running Evaluation *****
  Num examples = 4655
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../ner_results/checkpoint-736
Configuration saved in ../ner_results/checkpoint-736/config.json
Model weights saved in ../ner_results/checkpoint-736/pytorch_model.bin
tokenizer config file saved in ../ner_results/checkpoint-736/tokenizer_config.json
Special tokens file saved in ../ner_results/checkpoint-736/special_tokens_map.json
Deleting older checkpoint [../ner_results/checkpoint-3680] due to args.save_total_limit
Deleting older checkpoint [../ner_results/checkpoint-4416] due to args.save_total_limit
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forwa

# Evaluate Model

In [21]:
model = tr.model

In [22]:
from transformers.pipelines.token_classification import AggregationStrategy

In [23]:
pipe = pipeline("ner", model, tokenizer=tokenizer, device=0, aggregation_strategy=AggregationStrategy.FIRST)

In [24]:
pipe("Therapie mit pegyliertem Interferon Alpha 2b/Ribavirin, non response. Beendigung der Therapie mit Nivolumab. 1-0-1, derzeit pausiert 23.03.2035: Oesophagusvarizen II. 4. Fortführen der Chemotherapie mit FOLFOX. Groessenprogrediente mikronodulaere Veraenderungen der Lunge.")

[{'entity_group': 'Therapeutic',
  'score': 0.96289647,
  'word': 'Therapie',
  'start': 0,
  'end': 8},
 {'entity_group': 'Clinical_Drug',
  'score': 0.8416674,
  'word': 'Interferon Alpha 2b',
  'start': 25,
  'end': 44},
 {'entity_group': 'Clinical_Drug',
  'score': 0.96495634,
  'word': 'Ribavirin',
  'start': 45,
  'end': 54},
 {'entity_group': 'Therapeutic',
  'score': 0.88501763,
  'word': 'Beendigung',
  'start': 70,
  'end': 80},
 {'entity_group': 'Therapeutic',
  'score': 0.9672504,
  'word': 'Therapie',
  'start': 85,
  'end': 93},
 {'entity_group': 'Clinical_Drug',
  'score': 0.96899027,
  'word': 'Nivolumab',
  'start': 98,
  'end': 107},
 {'entity_group': 'Diagnosis_or_Pathology',
  'score': 0.9015603,
  'word': 'Oesophagusvarizen',
  'start': 145,
  'end': 162},
 {'entity_group': 'Therapeutic',
  'score': 0.9395949,
  'word': 'Fortführen',
  'start': 170,
  'end': 180},
 {'entity_group': 'Therapeutic',
  'score': 0.9715175,
  'word': 'Chemotherapie',
  'start': 185,
  'e

In [25]:
pipe("Sotorasib ist ein monoklonaler Antikörper, der gegen den epidermalen Wachstumsfaktorrezeptor (EGFR) gerichtet ist und dient zur Therapie des fortgeschrittenen kolorektalen Karzinoms zusammen mit Irinotecan oder in Kombination mit FOLFOX bzw. allein nach Versagen einer Behandlung mit Oxaliplatin und Irinotecan.")

[{'entity_group': 'Clinical_Drug',
  'score': 0.97322357,
  'word': 'Sotorasib',
  'start': 0,
  'end': 9},
 {'entity_group': 'Clinical_Drug',
  'score': 0.94508743,
  'word': 'Antikörper',
  'start': 31,
  'end': 41},
 {'entity_group': 'Nutrient_or_Body_Substance',
  'score': 0.93117774,
  'word': 'Wachstumsfaktorrezeptor',
  'start': 69,
  'end': 92},
 {'entity_group': 'Nutrient_or_Body_Substance',
  'score': 0.9528037,
  'word': 'EGFR',
  'start': 94,
  'end': 98},
 {'entity_group': 'Therapeutic',
  'score': 0.9749288,
  'word': 'Therapie',
  'start': 128,
  'end': 136},
 {'entity_group': 'Diagnosis_or_Pathology',
  'score': 0.945694,
  'word': 'Karzinoms',
  'start': 172,
  'end': 181},
 {'entity_group': 'Clinical_Drug',
  'score': 0.9710377,
  'word': 'Irinotecan',
  'start': 195,
  'end': 205},
 {'entity_group': 'Therapeutic',
  'score': 0.95461553,
  'word': 'FOLFOX',
  'start': 230,
  'end': 236},
 {'entity_group': 'Diagnosis_or_Pathology',
  'score': 0.73630005,
  'word': 'Ver

In [26]:
test_metrics = eval_on_test_set(dataset["test"], tr, tokenizer, "test")

The following columns in the test set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: offset_mapping, tags, fname, _tags, sentence_id, special_tokens_mask, tokens.
***** Running Prediction *****
  Num examples = 4826
  Batch size = 32


4826it [00:03, 1559.24it/s]


In [27]:
print(f"""
F1: {test_metrics["test/overall_f1"]:.2f}
 P: {test_metrics["test/overall_precision"]:.2f}
 R: {test_metrics["test/overall_recall"]:.2f}
""")


F1: 0.86
 P: 0.84
 R: 0.87



### Detailed analysis of model performance

See notebook: [03_NER_Analysis](03_NER_Analysis.ipynb)