In [1]:
!pip install transformers
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import re
import pandas as pd
from datasets import Dataset

def load_data_to_df(file):
  f = open(file, "r")
  lines = f.readlines()

  tokens = []
  pos = []
  ner_tags = []

  tokens_temp = []
  pos_temp = []
  tags_temp = []

  for i in lines:
    if i == '\n':
      tokens.append(tokens_temp)
      pos.append(pos_temp)
      ner_tags.append(tags_temp)

      tokens_temp = []
      pos_temp = []
      tags_temp = []

    else:
      i = re.sub('\n','',i)
      elements = re.split("\t", i)
      tokens_temp.append(elements[0])
      pos_temp.append(elements[1])
      tags_temp.append(elements[2])

  data = pd.DataFrame({"tokens" : tokens,
                      "pos": pos,
                      "ner_tags" : ner_tags})
  return data 

train = load_data_to_df('s_train.txt')
dev = load_data_to_df('s_test.txt')
test = load_data_to_df('s_test.txt')

In [3]:
train.head()

Unnamed: 0,tokens,pos,ner_tags
0,"[Maßnahmenbekanntgabe, zu, MA, 40, ,, Prüfung,...","[NN, APPR, NE, CARD, $,, NN, ART, NN]","[O, O, B-ORG, I-ORG, O, O, O, O]"
1,[INHALTSVERZEICHNIS],[NE],[O]
2,[ABKÜRZUNGSVERZEICHNIS],[NE],[O]
3,"[bzw., beziehungsweise, Nr., Nummer]","[KON, KON, NN, NN]","[O, O, O, O]"
4,"[Erledigung, des, Prüfungsberichtes, Der, Stad...","[NN, ART, NN, ART, NN, NE, VVFIN, ART, NN, APP...","[O, O, O, O, B-ORG, I-ORG, O, O, O, O, O, O, O..."


In [4]:
label2id = {'O': 0,'B-ORG':1,'I-ORG':2}
id2label = {0 :'O', 1:'B-ORG', 2:'I-ORG'}
label_list = ['O','B-ORG','I-ORG']

model_checkpoint = "bert-base-german-dbmdz-cased"

In [5]:
from transformers import (
    AutoConfig,
    AutoModelForTokenClassification,
    AutoTokenizer
)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


Moving 0 files to the new cache system


0it [00:00, ?it/s]

In [6]:
config = AutoConfig.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label = id2label,
    label2id = label2id
)


In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    config=config
)

Some weights of the model checkpoint at bert-base-german-dbmdz-cased were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

In [8]:
def align_labels(data, label_encoding_dict):
    tokenized_inputs = tokenizer(data["tokens"],
                        max_length = 128, padding = 'max_length',
                        truncation=True, is_split_into_words=True)

    label_id_temp = {}
    for i, label in enumerate(data['ner_tags']):
        label_id_temp.update({i:label})

    word_ids = tokenized_inputs.word_ids(batch_index=0)

    labels = []
    for w_id in word_ids:
        if w_id == None:
            labels.append(-100)
        else:
            tag = label_id_temp[w_id]
            labels.append(label2id[tag])

    tokenized_inputs["labels"] = labels

    return tokenized_inputs

def tokenized_for_bert(df):
  for_bert = df.copy()
  for_bert['input_ids'] = ""
  for_bert['token_type_ids'] = ""
  for_bert['attention_mask']= ""
  for_bert['labels'] = ""

  for index, row in df.iterrows():
      inputs = align_labels(row,label2id)
      for_bert.at[index,'input_ids'] = inputs['input_ids']
      for_bert.at[index,'token_type_ids'] = inputs['token_type_ids']
      for_bert.at[index,'attention_mask'] = inputs['attention_mask']
      for_bert.at[index,'labels'] = inputs['labels']
  for_bert = for_bert.drop('pos', axis = 1)
  dataset_for_bert = Dataset.from_pandas(for_bert)
  print(dataset_for_bert)
  return dataset_for_bert

In [9]:
train_dataset = tokenized_for_bert(train)
dev_dataset = tokenized_for_bert(dev)
test_dataset = tokenized_for_bert(test)

Dataset({
    features: ['tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 2327
})
Dataset({
    features: ['tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 748
})
Dataset({
    features: ['tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 748
})


In [10]:
!pip install seqeval

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [11]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
import torch
from seqeval.metrics import classification_report

In [13]:
import numpy as np
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    
    result = classification_report(true_labels, true_predictions, output_dict = True)

    print(result)
    
    return result['ORG']


batch_size = 16
learning_rate = 5e-5
epoch = 10
args = TrainingArguments(
    f"test-ner",
    evaluation_strategy = "epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epoch,
    weight_decay=1e-5,
)

data_collator = DataCollatorForTokenClassification(tokenizer)


trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

print("Training...")
trainer.train()
print("Evaluating...")
trainer.evaluate()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2327
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 1460
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a cal

Training...


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Support
1,No log,0.04545,0.712404,0.850033,0.775157,1527
2,No log,0.048326,0.759137,0.829731,0.792866,1527
3,No log,0.04111,0.831601,0.785855,0.808081,1527
4,0.041900,0.052787,0.805458,0.753766,0.778755,1527
5,0.041900,0.056548,0.825494,0.793058,0.808951,1527
6,0.041900,0.059151,0.770286,0.845449,0.806119,1527
7,0.008000,0.068455,0.814222,0.832351,0.823187,1527
8,0.008000,0.069629,0.793167,0.851343,0.821226,1527
9,0.008000,0.075114,0.807292,0.81205,0.809664,1527
10,0.008000,0.076131,0.806534,0.824492,0.815415,1527


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.712403951701427, 'recall': 0.8500327439423707, 'f1-score': 0.7751567632128995, 'support': 1527}, 'micro avg': {'precision': 0.712403951701427, 'recall': 0.8500327439423707, 'f1-score': 0.7751567632128995, 'support': 1527}, 'macro avg': {'precision': 0.712403951701427, 'recall': 0.8500327439423707, 'f1-score': 0.7751567632128995, 'support': 1527}, 'weighted avg': {'precision': 0.712403951701427, 'recall': 0.8500327439423707, 'f1-score': 0.7751567632128996, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.7591372079089275, 'recall': 0.8297314996725605, 'f1-score': 0.7928660826032541, 'support': 1527}, 'micro avg': {'precision': 0.7591372079089275, 'recall': 0.8297314996725605, 'f1-score': 0.7928660826032541, 'support': 1527}, 'macro avg': {'precision': 0.7591372079089275, 'recall': 0.8297314996725605, 'f1-score': 0.7928660826032541, 'support': 1527}, 'weighted avg': {'precision': 0.7591372079089275, 'recall': 0.8297314996725605, 'f1-score': 0.7928660826032542, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8316008316008316, 'recall': 0.7858546168958742, 'f1-score': 0.8080808080808081, 'support': 1527}, 'micro avg': {'precision': 0.8316008316008316, 'recall': 0.7858546168958742, 'f1-score': 0.8080808080808081, 'support': 1527}, 'macro avg': {'precision': 0.8316008316008316, 'recall': 0.7858546168958742, 'f1-score': 0.8080808080808081, 'support': 1527}, 'weighted avg': {'precision': 0.8316008316008316, 'recall': 0.7858546168958742, 'f1-score': 0.8080808080808081, 'support': 1527}}


Saving model checkpoint to test-ner/checkpoint-500
Configuration saved in test-ner/checkpoint-500/config.json
Model weights saved in test-ner/checkpoint-500/pytorch_model.bin
tokenizer config file saved in test-ner/checkpoint-500/tokenizer_config.json
Special tokens file saved in test-ner/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8054583624912526, 'recall': 0.7537655533726261, 'f1-score': 0.7787550744248984, 'support': 1527}, 'micro avg': {'precision': 0.8054583624912526, 'recall': 0.7537655533726261, 'f1-score': 0.7787550744248984, 'support': 1527}, 'macro avg': {'precision': 0.8054583624912526, 'recall': 0.7537655533726261, 'f1-score': 0.7787550744248984, 'support': 1527}, 'weighted avg': {'precision': 0.8054583624912526, 'recall': 0.7537655533726261, 'f1-score': 0.7787550744248986, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.825494205862304, 'recall': 0.7930582842174198, 'f1-score': 0.8089512358049432, 'support': 1527}, 'micro avg': {'precision': 0.825494205862304, 'recall': 0.7930582842174198, 'f1-score': 0.8089512358049432, 'support': 1527}, 'macro avg': {'precision': 0.825494205862304, 'recall': 0.7930582842174198, 'f1-score': 0.8089512358049432, 'support': 1527}, 'weighted avg': {'precision': 0.8254942058623042, 'recall': 0.7930582842174198, 'f1-score': 0.8089512358049432, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.7702863961813843, 'recall': 0.8454485920104781, 'f1-score': 0.8061192631907588, 'support': 1527}, 'micro avg': {'precision': 0.7702863961813843, 'recall': 0.8454485920104781, 'f1-score': 0.8061192631907588, 'support': 1527}, 'macro avg': {'precision': 0.7702863961813843, 'recall': 0.8454485920104781, 'f1-score': 0.8061192631907588, 'support': 1527}, 'weighted avg': {'precision': 0.7702863961813843, 'recall': 0.8454485920104781, 'f1-score': 0.8061192631907587, 'support': 1527}}


Saving model checkpoint to test-ner/checkpoint-1000
Configuration saved in test-ner/checkpoint-1000/config.json
Model weights saved in test-ner/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in test-ner/checkpoint-1000/tokenizer_config.json
Special tokens file saved in test-ner/checkpoint-1000/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8142216527866752, 'recall': 0.8323510150622135, 'f1-score': 0.8231865284974094, 'support': 1527}, 'micro avg': {'precision': 0.8142216527866752, 'recall': 0.8323510150622135, 'f1-score': 0.8231865284974094, 'support': 1527}, 'macro avg': {'precision': 0.8142216527866752, 'recall': 0.8323510150622135, 'f1-score': 0.8231865284974094, 'support': 1527}, 'weighted avg': {'precision': 0.8142216527866752, 'recall': 0.8323510150622135, 'f1-score': 0.8231865284974094, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.7931665649786455, 'recall': 0.8513425016371972, 'f1-score': 0.8212255211623499, 'support': 1527}, 'micro avg': {'precision': 0.7931665649786455, 'recall': 0.8513425016371972, 'f1-score': 0.8212255211623499, 'support': 1527}, 'macro avg': {'precision': 0.7931665649786455, 'recall': 0.8513425016371972, 'f1-score': 0.8212255211623499, 'support': 1527}, 'weighted avg': {'precision': 0.7931665649786455, 'recall': 0.8513425016371972, 'f1-score': 0.8212255211623499, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8072916666666666, 'recall': 0.8120497707924034, 'f1-score': 0.8096637283708782, 'support': 1527}, 'micro avg': {'precision': 0.8072916666666666, 'recall': 0.8120497707924034, 'f1-score': 0.8096637283708782, 'support': 1527}, 'macro avg': {'precision': 0.8072916666666666, 'recall': 0.8120497707924034, 'f1-score': 0.8096637283708782, 'support': 1527}, 'weighted avg': {'precision': 0.8072916666666666, 'recall': 0.8120497707924034, 'f1-score': 0.8096637283708782, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720207, 'support': 1527}, 'micro avg': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720207, 'support': 1527}, 'macro avg': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720207, 'support': 1527}, 'weighted avg': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720208, 'support': 1527}}




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, ner_tags. If tokens, ner_tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


Evaluating...


{'ORG': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720207, 'support': 1527}, 'micro avg': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720207, 'support': 1527}, 'macro avg': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720207, 'support': 1527}, 'weighted avg': {'precision': 0.8065342729019859, 'recall': 0.8244924688932548, 'f1-score': 0.8154145077720208, 'support': 1527}}


{'eval_loss': 0.07613125443458557,
 'eval_precision': 0.8065342729019859,
 'eval_recall': 0.8244924688932548,
 'eval_f1-score': 0.8154145077720207,
 'eval_support': 1527,
 'eval_runtime': 6.3229,
 'eval_samples_per_second': 118.301,
 'eval_steps_per_second': 7.433,
 'epoch': 10.0}

In [15]:
compute_metrics

<function __main__.compute_metrics(p)>