In [1]:
!pip install transformers
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 4.8 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 70.6 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 52.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.23.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.6.1-py3-none-any.whl (441 kB)
[K     |████████████████████████████████| 441 kB 4.9 MB/

In [2]:
import re
import pandas as pd
from datasets import Dataset

def load_data_to_df(file):
  f = open(file, "r")
  lines = f.readlines()

  tokens = []
  pos = []
  ner_tags = []

  tokens_temp = []
  pos_temp = []
  tags_temp = []

  for i in lines:
    if i == '\n':
      tokens.append(tokens_temp)
      pos.append(pos_temp)
      ner_tags.append(tags_temp)

      tokens_temp = []
      pos_temp = []
      tags_temp = []

    else:
      i = re.sub('\n','',i)
      elements = re.split("\t", i)
      tokens_temp.append(elements[0])
      pos_temp.append(elements[1])
      tags_temp.append(elements[2])

  data = pd.DataFrame({"tokens" : tokens,
                      "pos": pos,
                      "ner_tags" : ner_tags})
  return data 

train = load_data_to_df('s_train.txt')
dev = load_data_to_df('s_test.txt')
test = load_data_to_df('s_test.txt')

In [3]:
train.head()

Unnamed: 0,tokens,pos,ner_tags
0,"[Maßnahmenbekanntgabe, zu, MA, 40, ,, Prüfung,...","[NN, APPR, NE, CARD, $,, NN, ART, NN]","[O, O, B-ORG, I-ORG, O, O, O, O]"
1,[INHALTSVERZEICHNIS],[NE],[O]
2,[ABKÜRZUNGSVERZEICHNIS],[NE],[O]
3,"[bzw., beziehungsweise, Nr., Nummer]","[KON, KON, NN, NN]","[O, O, O, O]"
4,"[Erledigung, des, Prüfungsberichtes, Der, Stad...","[NN, ART, NN, ART, NN, NE, VVFIN, ART, NN, APP...","[O, O, O, O, B-ORG, I-ORG, O, O, O, O, O, O, O..."


In [4]:
label2id = {'O': 0,'B-ORG':1,'I-ORG':2}
id2label = {0 :'O', 1:'B-ORG', 2:'I-ORG'}
label_list = ['O','B-ORG','I-ORG']

model_checkpoint = "bert-base-german-dbmdz-cased"

In [5]:
from transformers import (
    AutoConfig,
    AutoModelForTokenClassification,
    AutoTokenizer
)

In [6]:
config = AutoConfig.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label = id2label,
    label2id = label2id
)


Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    config=config
)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/240k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/479k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-german-dbmdz-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

In [8]:
def align_labels(data, label_encoding_dict):
    tokenized_inputs = tokenizer(data["tokens"],
                        max_length = 128, padding = 'max_length',
                        truncation=True, is_split_into_words=True)

    label_id_temp = {}
    for i, label in enumerate(data['ner_tags']):
        label_id_temp.update({i:label})

    word_ids = tokenized_inputs.word_ids(batch_index=0)

    labels = []
    for w_id in word_ids:
        if w_id == None:
            labels.append(-100)
        else:
            tag = label_id_temp[w_id]
            labels.append(label2id[tag])

    tokenized_inputs["labels"] = labels

    return tokenized_inputs

def tokenized_for_bert(df):
  for_bert = df.copy()
  for_bert['input_ids'] = ""
  for_bert['token_type_ids'] = ""
  for_bert['attention_mask']= ""
  for_bert['labels'] = ""

  for index, row in df.iterrows():
      inputs = align_labels(row,label2id)
      for_bert.at[index,'input_ids'] = inputs['input_ids']
      for_bert.at[index,'token_type_ids'] = inputs['token_type_ids']
      for_bert.at[index,'attention_mask'] = inputs['attention_mask']
      for_bert.at[index,'labels'] = inputs['labels']
  for_bert = for_bert.drop('pos', axis = 1)
  dataset_for_bert = Dataset.from_pandas(for_bert)
  print(dataset_for_bert)
  return dataset_for_bert

In [9]:
train_dataset = tokenized_for_bert(train)
dev_dataset = tokenized_for_bert(dev)
test_dataset = tokenized_for_bert(test)

Dataset({
    features: ['tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 2327
})
Dataset({
    features: ['tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 748
})
Dataset({
    features: ['tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 748
})


In [10]:
!pip install seqeval

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[K     |████████████████████████████████| 43 kB 987 kB/s 
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16180 sha256=1da29dbbba6624c592873bcfb60ced64428f8a642f58c1301d8c67d820ae5dd5
  Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [11]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
import torch
from seqeval.metrics import classification_report

In [12]:
import numpy as np
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    
    result = classification_report(true_labels, true_predictions, output_dict = True)

    print(result)
    
    return result['ORG']


batch_size = 16
learning_rate = 5e-5
epoch = 6
args = TrainingArguments(
    f"test-ner",
    evaluation_strategy = "epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epoch,
    weight_decay=1e-5,
)

data_collator = DataCollatorForTokenClassification(tokenizer)


trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

print("Training...")
trainer.train()
print("Evaluating...")
trainer.evaluate()

The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2327
  Num Epochs = 6
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 876
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Training...


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Support
1,No log,0.048911,0.695513,0.852652,0.766108,1527
2,No log,0.049386,0.765906,0.835625,0.799248,1527
3,No log,0.046181,0.809459,0.784545,0.796807,1527
4,0.041800,0.054108,0.814864,0.804191,0.809492,1527
5,0.041800,0.067855,0.80127,0.826457,0.813669,1527
6,0.041800,0.069811,0.806513,0.827112,0.816683,1527


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.6955128205128205, 'recall': 0.8526522593320236, 'f1-score': 0.766107678729038, 'support': 1527}, 'micro avg': {'precision': 0.6955128205128205, 'recall': 0.8526522593320236, 'f1-score': 0.766107678729038, 'support': 1527}, 'macro avg': {'precision': 0.6955128205128205, 'recall': 0.8526522593320236, 'f1-score': 0.766107678729038, 'support': 1527}, 'weighted avg': {'precision': 0.6955128205128205, 'recall': 0.8526522593320236, 'f1-score': 0.7661076787290378, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.765906362545018, 'recall': 0.8356254092992796, 'f1-score': 0.7992483557782649, 'support': 1527}, 'micro avg': {'precision': 0.765906362545018, 'recall': 0.8356254092992796, 'f1-score': 0.7992483557782649, 'support': 1527}, 'macro avg': {'precision': 0.765906362545018, 'recall': 0.8356254092992796, 'f1-score': 0.7992483557782649, 'support': 1527}, 'weighted avg': {'precision': 0.765906362545018, 'recall': 0.8356254092992796, 'f1-score': 0.7992483557782648, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8094594594594594, 'recall': 0.7845448592010478, 'f1-score': 0.7968074492850016, 'support': 1527}, 'micro avg': {'precision': 0.8094594594594594, 'recall': 0.7845448592010478, 'f1-score': 0.7968074492850016, 'support': 1527}, 'macro avg': {'precision': 0.8094594594594594, 'recall': 0.7845448592010478, 'f1-score': 0.7968074492850016, 'support': 1527}, 'weighted avg': {'precision': 0.8094594594594595, 'recall': 0.7845448592010478, 'f1-score': 0.7968074492850017, 'support': 1527}}


Saving model checkpoint to test-ner/checkpoint-500
Configuration saved in test-ner/checkpoint-500/config.json
Model weights saved in test-ner/checkpoint-500/pytorch_model.bin
tokenizer config file saved in test-ner/checkpoint-500/tokenizer_config.json
Special tokens file saved in test-ner/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8148639681486397, 'recall': 0.8041912246234446, 'f1-score': 0.8094924192485168, 'support': 1527}, 'micro avg': {'precision': 0.8148639681486397, 'recall': 0.8041912246234446, 'f1-score': 0.8094924192485168, 'support': 1527}, 'macro avg': {'precision': 0.8148639681486397, 'recall': 0.8041912246234446, 'f1-score': 0.8094924192485168, 'support': 1527}, 'weighted avg': {'precision': 0.8148639681486397, 'recall': 0.8041912246234446, 'f1-score': 0.8094924192485169, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8012698412698412, 'recall': 0.8264571054354944, 'f1-score': 0.8136686009026434, 'support': 1527}, 'micro avg': {'precision': 0.8012698412698412, 'recall': 0.8264571054354944, 'f1-score': 0.8136686009026434, 'support': 1527}, 'macro avg': {'precision': 0.8012698412698412, 'recall': 0.8264571054354944, 'f1-score': 0.8136686009026434, 'support': 1527}, 'weighted avg': {'precision': 0.8012698412698412, 'recall': 0.8264571054354944, 'f1-score': 0.8136686009026435, 'support': 1527}}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'micro avg': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'macro avg': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'weighted avg': {'precision': 0.806513409961686, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}}




Training completed. Do not forget to share your model on huggingface.co/models =)


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 748
  Batch size = 16


Evaluating...


{'ORG': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'micro avg': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'macro avg': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'weighted avg': {'precision': 0.806513409961686, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}}


{'eval_loss': 0.06981100887060165,
 'eval_precision': 0.8065134099616859,
 'eval_recall': 0.8271119842829077,
 'eval_f1-score': 0.8166828322017459,
 'eval_support': 1527,
 'eval_runtime': 6.2166,
 'eval_samples_per_second': 120.322,
 'eval_steps_per_second': 7.56,
 'epoch': 6.0}

In [13]:
output, label_ids, metrics = trainer.predict(test_dataset)

The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 748
  Batch size = 16


{'ORG': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'micro avg': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'macro avg': {'precision': 0.8065134099616859, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}, 'weighted avg': {'precision': 0.806513409961686, 'recall': 0.8271119842829077, 'f1-score': 0.8166828322017459, 'support': 1527}}
