# Loads

In [None]:
!pip install datasets -q
!pip install seqeval -q
!pip install transformers[torch] -q
!pip install flair -q

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import re
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from datasets import load_metric
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from transformers import (
    RobertaTokenizer,
    BertTokenizerFast,
    DataCollatorForTokenClassification,
    RobertaForTokenClassification,
    BertForTokenClassification,
    TrainingArguments,
    Trainer,
    pipeline
)

from flair.data import Corpus, Sentence
from flair.datasets import ColumnCorpus
from flair.embeddings import FlairEmbeddings, StackedEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

from seqeval.metrics import f1_score, classification_report

# BIO Data

В этом разделе делается BIO разметка для текста "Песнь о Нибелунгах" на основе датасета Besnier и Mattingly (2021).

In [None]:
#считывание текста
with open('Nib_test.txt') as fi:
    text = fi.read()
    splitted_text = text.split('.')

In [None]:
#разделение на предложения
text_dict = {'id': [], 'text': []}
for i in range(len(splitted_text)):
    clean_text = splitted_text[i].strip().replace("' ", '').replace("'", '').replace("^ ", '').replace("\n ", '') + '.'
    if re.search('[a-zA-Z]', clean_text):
        text_dict['id'].append(i)
        text_dict['text'].append(clean_text)

In [None]:
#разделение знаков препинания
new_texts = []
for i in text_dict['text']:
    new_i = re.findall(r"[\w']+|[.,!?;]", i)
    new_sent = ' '.join(new_i)
    new_texts.append(new_sent)
text_dict['text'] = new_texts

In [None]:
text_pd = pd.DataFrame.from_dict(text_dict)
text_pd

Unnamed: 0,id,text
0,0,I Aventiure von den Nibelungen .
1,1,line tet in alten maeren wunders vil geseit vo...
2,2,"Ez wuohs in Buregonden ein vil edel magedin , ..."
3,3,dar umbe muosen degene vil Verliesen den lip .
4,4,"Ir pflägen dri künige edel unde rich , Gunther..."
...,...,...
5724,5908,die liute heten alle jämer unde nöt .
5725,5909,"mit leide was verendet des küniges höchgezit ,..."
5726,5910,Ine kan iuch niht bescheiden waz sider dä gesc...
5727,5911,Ine sage iu nu niht mere von der grözen not di...


In [None]:
#считывание части датасета с Nibelungenlied
ner_nib_pd = pd.read_excel('ner-nib.xlsx')
ner_nib_pd

Unnamed: 0,text,category,lemma,tokens
0,Nibelungenlied,PERSON,Alberich,Albriche:Albrichæ:Albrich:Albrichen:Albriches
1,Nibelungenlied,PERSON,Aldrian,Adrianes:Adrian:Adrianen:Adrians
2,Nibelungenlied,PERSON,Alzey,Alzeye
3,Nibelungenlied,PERSON,Amelrich,Amelrinch:Amelreichen
4,Nibelungenlied,PERSON,Amelungen,Amelunge
...,...,...,...,...
130,Nibelungenlied,PLACE,Thüringen,Durigen
131,Nibelungenlied,PLACE,Ungarn,Vngerlande
132,Nibelungenlied,GROUP,Dänen,Tenen
133,Nibelungenlied,GROUP,Hunnen,Hunin:Hunen


In [None]:
#список всех токенов из датасета
ner_nib_dict = ner_nib_pd.to_dict('list')
tokens = []
for i in ner_nib_dict['tokens']:
    if type(i) != float:
        new_i = i.split(':')
        tokens.append(new_i)
    else:
        tokens.append([])
ner_nib_dict['tokens'] = tokens

In [None]:
#два словаря: full_dict, в котором показано, какой именно токен встречается в предложении, а также дана его лемма и тег
#bio_data состоит из слова и его тега для всех предложений, id соответствует номеру предложения
full_dict = {'id': [], 'text': [], 'token': [], 'lemma': [], 'category': []}
bio_data = {'id': [], 'bio_token': [], 'bio_tag': []}
id = 0
flag = False #оба флага показывают, записалось ли слово, то есть есть ли у него тег, если нет, то слову приписывается тег O
bio_flag = False #(один флаг для предложения, другой для конкретного слова)
for sentence in text_dict['text']:
    for word in sentence.split():
        for i in range(len(ner_nib_dict['text'])):
            for token in ner_nib_dict['tokens'][i]:
                if token == word:
                    full_dict['id'].append(id)
                    full_dict['text'].append(sentence)
                    full_dict['token'].append(token)
                    full_dict['lemma'].append(ner_nib_dict['lemma'][i])
                    full_dict['category'].append(ner_nib_dict['category'][i])

                    bio_data['id'].append(id)
                    bio_data['bio_token'].append(word)
                    bio_data['bio_tag'].append('B-'+ ner_nib_dict['category'][i])

                    flag = True
                    bio_flag = True
                    break
                elif ner_nib_dict['lemma'][i] == word.strip():
                    full_dict['id'].append(id)
                    full_dict['text'].append(sentence)
                    full_dict['token'].append(ner_nib_dict['lemma'][i])
                    full_dict['lemma'].append(ner_nib_dict['lemma'][i])
                    full_dict['category'].append(ner_nib_dict['category'][i])

                    bio_data['id'].append(id)
                    bio_data['bio_token'].append(word)
                    bio_data['bio_tag'].append('B-'+ ner_nib_dict['category'][i])

                    flag = True
                    bio_flag = True
                    break
        if bio_flag == False:
            bio_data['id'].append(id)
            bio_data['bio_token'].append(word)
            bio_data['bio_tag'].append('O')
        bio_flag = False

    if flag == False:
        full_dict['id'].append(id)
        full_dict['text'].append(sentence)
        full_dict['token'].append('')
        full_dict['lemma'].append('')
        full_dict['category'].append('')

    id += 1
    flag = False

In [None]:
full_pd = pd.DataFrame.from_dict(full_dict)
full_pd.head(20)

Unnamed: 0,id,text,token,lemma,category
0,0,I Aventiure von den Nibelungen .,Nibelungen,Nibelung,PERSON
1,1,line tet in alten maeren wunders vil geseit vo...,,,
2,2,"Ez wuohs in Buregonden ein vil edel magedin , ...",,,
3,3,dar umbe muosen degene vil Verliesen den lip .,,,
4,4,"Ir pflägen dri künige edel unde rich , Gunther...",Gunther,Gunther,PERSON
5,4,"Ir pflägen dri künige edel unde rich , Gunther...",Giselher,Giselher,PERSON
6,5,"diu frouwe was ir swester , die helde hetens i...",,,
7,6,Ein richiu küniginne frou Uote ir muoter hiez .,,,
8,7,"ir vater der hiez Dancrät , der in diu erbe li...",,,
9,8,"Die herren wären milte , von arde hoch erborn ...",,,


In [None]:
bio_data_pd = pd.DataFrame.from_dict(bio_data)
bio_data_pd.head(20)

Unnamed: 0,id,bio_token,bio_tag
0,0,I,O
1,0,Aventiure,O
2,0,von,O
3,0,den,O
4,0,Nibelungen,B-PERSON
5,0,.,O
6,1,line,O
7,1,tet,O
8,1,in,O
9,1,alten,O


In [None]:
#в отдельный файл записывается полная BIO разметка
bio_data_pd.to_csv('bio_nib_full.csv', index=False)

Дальше отсортировываются только те предложения, в которых есть тег B.

In [None]:
#для каждого id предложения составляется список тегов, которые в него входят
tags_dict = {}
for i in range(len(list(bio_data['id']))):
    if bio_data['id'][i] not in tags_dict.keys():
        tags_dict[bio_data['id'][i]] = []
        tags_dict[bio_data['id'][i]].append(bio_data['bio_tag'][i])
    else:
        tags_dict[bio_data['id'][i]].append(bio_data['bio_tag'][i])

In [None]:
#список id предложений, в которых есть теги помимо O
new_ids = []
for key, value in tags_dict.items():
    if not len(set(tags_dict[key])) == 1:
        new_ids.append(key)

In [None]:
#словарь аналогичный bio_data, только с предложениями, в которых есть теги помимо O
new_bio_data_dict = {'id': [], 'bio_token': [], 'bio_tag': []}
for i in range(len(list(bio_data['id']))):
    if bio_data['id'][i] in new_ids:
        new_bio_data_dict['id'].append(bio_data['id'][i])
        new_bio_data_dict['bio_token'].append(bio_data['bio_token'][i])
        new_bio_data_dict['bio_tag'].append(bio_data['bio_tag'][i])

In [None]:
new_bio_data_pd = pd.DataFrame.from_dict(new_bio_data_dict)
new_bio_data_pd.head(20)

Unnamed: 0,id,bio_token,bio_tag
0,0,I,O
1,0,Aventiure,O
2,0,von,O
3,0,den,O
4,0,Nibelungen,B-PERSON
5,0,.,O
6,4,Ir,O
7,4,pflägen,O
8,4,dri,O
9,4,künige,O


In [None]:
#запись его в файл
new_bio_data_pd.to_csv('bio_nib_only_tags.csv', index=False)

# Models

## Data

In [None]:
#считывание данных
bio_data = pd.read_csv('bio_nib_only_tags.csv')

In [None]:
#разделение на токены и теги
bio_tokens = bio_data.groupby('id')['bio_token'].agg(list)
bio_tags = bio_data.groupby('id')['bio_tag'].agg(list)

In [None]:
#разделение на train, validation и test
train_bio_tokens, test_bio_tokens, train_bio_tags, test_bio_tags = train_test_split(bio_tokens, bio_tags, test_size=0.2, random_state=42)
train_bio_tokens, val_bio_tokens, train_bio_tags, val_bio_tags = train_test_split(train_bio_tokens, train_bio_tags, test_size=0.3, random_state=42)

## Functions and classes

In [None]:
#функция для извлечения пар токен-тег
def get_tokens_pairs(texts, tags, bio=True):

    res_tokens = []
    res_tags = []

    for text, text_tags in zip(texts, tags):
        result = [('[CLS]', 'O')]
        all_tokens = ['[CLS]']
        all_tags = ['O']

        for (word, tag) in zip(text, text_tags):
            tokens = tokenizer.tokenize(word)
            all_tokens.extend(tokens)

            if bio:
                if tag.startswith('B'):
                    all_tags.append(tag)
                    all_tags.extend([tag]*(len(tokens)-1))
                else:
                    all_tags.extend([tag]*len(tokens))
            else:
                if tag == 'O':
                    all_tags.extend([tag]*len(tokens))
                else:
                    all_tags.extend([tag[2:]]*len(tokens))

        all_tokens.append('[SEP]')
        all_tags.append('O')

        res_tokens.append(all_tokens)
        res_tags.append(all_tags)

    return res_tokens, res_tags

In [None]:
#словари, переводящие тег в цифровое значение, и наоборот
tag2id = {'B-PERSON': 0, 'B-PLACE': 1, 'B-GROUP': 2, 'O': 3}

id2tag = {idx: tag for tag, idx in tag2id.items()}

In [None]:
#класс для перевода NER датасета в нужный для обучения модели формат
class NERDataset(torch.utils.data.Dataset):
    def __init__(self, tokens, labels, label2id):
        self.tokens = tokens
        self.labels = labels
        self.label2id = label2id

    def prepare_data(self, tokens, labels):
        tokens = torch.tensor([tokenizer.convert_tokens_to_ids(text) for text in tokens])
        labels = torch.tensor([self.label2id[l] for l in labels])
        return tokens, labels

    def __getitem__(self, idx):
        tokens, labels = self.prepare_data(self.tokens[idx], self.labels[idx])
        return {'input_ids': tokens, 'labels': labels}

    def __len__(self):
        return len(self.tokens)

In [None]:
#загрузка метрик для оценки
metric = load_metric("seqeval")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    true_labels = [[id2tag[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [id2tag[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)

    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

  metric = load_metric("seqeval")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

## Distilroberta-base-mhg-charter-mlm

### Train

In [None]:
#определение модели, токенайзера и data collator
model_name = 'atzenhofer/distilroberta-base-mhg-charter-mlm'
tokenizer = RobertaTokenizer.from_pretrained(model_name, model_max_length=512)
data_collator = DataCollatorForTokenClassification(tokenizer)

tokenizer_config.json:   0%|          | 0.00/386 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

In [None]:
#пары токен-тег для train и validation
train_tokens, train_tags = get_tokens_pairs(train_bio_tokens, train_bio_tags)
val_tokens, val_tags = get_tokens_pairs(val_bio_tokens, val_bio_tags)

In [None]:
#приводим в нужный формат
train_dataset = NERDataset(train_tokens, train_tags, tag2id)
val_dataset = NERDataset(val_tokens, val_tags, tag2id)

In [None]:
#идентификация модели
model = RobertaForTokenClassification.from_pretrained(
    model_name,
    num_labels = len(tag2id),
    output_attentions = False,
    output_hidden_states = False
)

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at atzenhofer/distilroberta-base-mhg-charter-mlm and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
#задаем гиперпараметры и другую дополнительную информацию
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/4 курс/Диплом/models/output_dirs/distilroberta-base-mhg-charter-mlm-run20epoch',
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=20,
    weight_decay=0.01,
    save_strategy='no'
)

#инициализируем trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

#обучение
trainer.train()

#сохранение
trainer.save_model('/content/drive/MyDrive/4 курс/Диплом/models/distilroberta-base-mhg-charter-mlm-20epoch')



Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.037646,0.93038,0.945675,0.937965,0.990181
2,No log,0.028305,0.958778,0.96426,0.961511,0.993779
3,0.066500,0.028909,0.943554,0.967834,0.95554,0.993169
4,0.066500,0.036635,0.951782,0.973553,0.962544,0.993962
5,0.010400,0.038681,0.941949,0.974267,0.957836,0.993413
6,0.010400,0.036908,0.958042,0.979271,0.96854,0.994816
7,0.004200,0.035108,0.964739,0.977841,0.971246,0.995243
8,0.004200,0.033969,0.977666,0.969979,0.973807,0.99567
9,0.004200,0.037843,0.964739,0.977841,0.971246,0.995182
10,0.002500,0.038988,0.959689,0.969979,0.964806,0.99445


  _warn_prf(average, modifier, msg_start, len(result))


### Test

In [None]:
test_tokens, test_tags = get_tokens_pairs(test_bio_tokens, test_bio_tags)

In [None]:
#проверка на test
classifier = pipeline("ner", model='/content/drive/MyDrive/4 курс/Диплом/models/distilroberta-base-mhg-charter-mlm-20epoch')

pred_labels = []
true_labels = []
for i in tqdm(range(len(test_bio_tokens))):
    res = []
    for word in test_bio_tokens.iloc[i]:
        r = classifier(word)
        res.extend(r)
    preds = []
    for word in res:
        preds.append(id2tag[int(word['entity'].split('_')[-1])])
    pred_labels.append(preds)

for i in tqdm(range(len(test_tokens))):
    true_labels.append(test_tags[i][1:-1])

res_dict = classification_report(true_labels, pred_labels, output_dict=True)

pd.DataFrame(res_dict).T

100%|██████████| 309/309 [04:10<00:00,  1.23it/s]
100%|██████████| 309/309 [00:00<00:00, 491296.41it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,precision,recall,f1-score,support
GROUP,0.0,0.0,0.0,2.0
PERSON,0.911817,0.999034,0.953435,1035.0
PLACE,0.95,0.883721,0.915663,129.0
micro avg,0.91547,0.984563,0.94876,1166.0
macro avg,0.620606,0.627585,0.623032,1166.0
weighted avg,0.914477,0.984563,0.94762,1166.0


## GHisBERT

### Train

In [None]:
#определение модели, токенайзера и data collator
model_name = 'christinbeck/GHisBERT'
tokenizer = BertTokenizerFast.from_pretrained(model_name, model_max_length=512)
data_collator = DataCollatorForTokenClassification(tokenizer)

tokenizer_config.json:   0%|          | 0.00/176 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/219k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/589 [00:00<?, ?B/s]

In [None]:
#пары токен-тег для train и validation
train_tokens, train_tags = get_tokens_pairs(train_bio_tokens, train_bio_tags)
val_tokens, val_tags = get_tokens_pairs(val_bio_tokens, val_bio_tags)

In [None]:
#приводим в нужный формат
train_dataset = NERDataset(train_tokens, train_tags, tag2id)
val_dataset = NERDataset(val_tokens, val_tags, tag2id)

In [None]:
#идентификация модели
model = BertForTokenClassification.from_pretrained(
    model_name,
    num_labels = len(tag2id),
    output_attentions = False,
    output_hidden_states = False
)

pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at christinbeck/GHisBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
#задаем гиперпараметры и другую дополнительную информацию
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/4 курс/Диплом/models/output_dirs/GHisBERT-run20epoch',
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=20,
    weight_decay=0.01,
    save_strategy='no'
)

#инициализируем trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

#обучение
trainer.train()

#сохранение
trainer.save_model('/content/drive/MyDrive/4 курс/Диплом/models/GHisBERT-20epoch')



Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.072637,0.872051,0.879231,0.875626,0.98027
2,No log,0.050768,0.973108,0.89387,0.931807,0.988518
3,0.089500,0.050008,0.935574,0.916743,0.926063,0.988033
4,0.089500,0.04272,0.955556,0.94419,0.949839,0.991752
5,0.022800,0.042432,0.964218,0.936871,0.950348,0.991671
6,0.022800,0.040082,0.97151,0.935956,0.953402,0.992318
7,0.010900,0.052383,0.933929,0.956999,0.945323,0.990539
8,0.010900,0.049541,0.975191,0.935041,0.954694,0.992399
9,0.010900,0.0512,0.914387,0.94785,0.930818,0.988437
10,0.007900,0.040848,0.947795,0.963403,0.955535,0.99248


  _warn_prf(average, modifier, msg_start, len(result))


### Test

In [None]:
test_tokens, test_tags = get_tokens_pairs(test_bio_tokens, test_bio_tags)

In [None]:
#проверка на test
classifier = pipeline("ner", model='/content/drive/MyDrive/4 курс/Диплом/models/GHisBERT-20epoch')

pred_labels = []
true_labels = []
for i in tqdm(range(len(test_bio_tokens))):
    res = classifier(' '.join(test_bio_tokens.iloc[i]))
    preds = []
    for word in res:
        preds.append(id2tag[int(word['entity'].split('_')[-1])])
    pred_labels.append(preds)

for i in tqdm(range(len(test_tokens))):
    true_labels.append(test_tags[i][1:-1])

res_dict = classification_report(true_labels, pred_labels, output_dict=True)

pd.DataFrame(res_dict).T

100%|██████████| 309/309 [00:50<00:00,  6.06it/s]
100%|██████████| 309/309 [00:00<00:00, 549402.26it/s]


Unnamed: 0,precision,recall,f1-score,support
GROUP,0.0,0.0,0.0,2.0
PERSON,0.964286,0.98063,0.972389,826.0
PLACE,0.989362,0.845455,0.911765,110.0
micro avg,0.963714,0.962687,0.9632,938.0
macro avg,0.651216,0.608695,0.628051,938.0
weighted avg,0.96517,0.962687,0.963206,938.0


## Flair

### Data for Flair

In [None]:
#приводим train, test и validation к формату, необходимому для метода ColumnCorpus в Flair
train_bio_tokens_flat = []
for sent in train_bio_tokens:
    for word in sent:
        train_bio_tokens_flat.append(word)
    train_bio_tokens_flat.append('')

train_bio_tags_flat = []
for sent in train_bio_tags:
    for word in sent:
        train_bio_tags_flat.append(word)
    train_bio_tags_flat.append('')

train = pd.DataFrame({'tokens': train_bio_tokens_flat, 'tags': train_bio_tags_flat})
train.to_csv('/content/train.txt', header=None, index=False, sep=' ')

In [None]:
test_bio_tokens_flat = []
for sent in test_bio_tokens:
    for word in sent:
        test_bio_tokens_flat.append(word)
    test_bio_tokens_flat.append('')

test_bio_tags_flat = []
for sent in test_bio_tags:
    for word in sent:
        test_bio_tags_flat.append(word)
    test_bio_tags_flat.append('')

test = pd.DataFrame({'tokens': test_bio_tokens_flat, 'tags': test_bio_tags_flat})
test.to_csv('/content/test.txt', header=None, index=False, sep=' ')

In [None]:
val_bio_tokens_flat = []
for sent in val_bio_tokens:
    for word in sent:
        val_bio_tokens_flat.append(word)
    val_bio_tokens_flat.append('')

val_bio_tags_flat = []
for sent in val_bio_tags:
    for word in sent:
        val_bio_tags_flat.append(word)
    val_bio_tags_flat.append('')

val = pd.DataFrame({'tokens': val_bio_tokens_flat, 'tags': val_bio_tags_flat})
val.to_csv('/content/val.txt', header=None, index=False, sep=' ')

### Train

In [None]:
#определяем колонки
columns = {0: 'text', 1: 'ner'}

#папка, в которой хранятся train, test и validation
data_folder = '/content'

#определяем корпус
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='val.txt')

2024-05-25 09:05:41,037 Reading data from /content
2024-05-25 09:05:41,046 Train: /content/train.txt
2024-05-25 09:05:41,050 Dev: /content/val.txt
2024-05-25 09:05:41,053 Test: /content/test.txt


In [None]:
#какую колонку надо предсказывать
label_type = 'ner'

In [None]:
#словарь из тегов в корпусе
label_dict = corpus.make_label_dictionary(label_type=label_type, add_unk=False)
print(label_dict)

2024-05-25 09:05:46,392 Computing label dictionary. Progress:


0it [00:00, ?it/s]
862it [00:00, 12369.49it/s]

2024-05-25 09:05:46,518 Dictionary created for label 'ner' with 3 values: PERSON (seen 938 times), PLACE (seen 161 times), GROUP (seen 1 times)
Dictionary with 3 tags: PERSON, PLACE, GROUP





In [None]:
#инициализируем Flair эмбеддинги
embedding_types = [
    FlairEmbeddings('de-forward'),
    FlairEmbeddings('de-backward'),
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

2024-05-25 09:05:48,890 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-mix-german-forward-v0.2rc.pt not found in cache, downloading to /tmp/tmpnhqx_ix4


100%|██████████| 69.4M/69.4M [00:07<00:00, 9.90MB/s]

2024-05-25 09:05:57,198 copying /tmp/tmpnhqx_ix4 to cache at /root/.flair/embeddings/lm-mix-german-forward-v0.2rc.pt
2024-05-25 09:05:57,292 removing temp file /tmp/tmpnhqx_ix4





2024-05-25 09:05:59,139 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-mix-german-backward-v0.2rc.pt not found in cache, downloading to /tmp/tmpf9xwmp2g


100%|██████████| 69.4M/69.4M [00:06<00:00, 10.9MB/s]

2024-05-25 09:06:06,558 copying /tmp/tmpf9xwmp2g to cache at /root/.flair/embeddings/lm-mix-german-backward-v0.2rc.pt





2024-05-25 09:06:06,657 removing temp file /tmp/tmpf9xwmp2g


In [None]:
#инииализируем SequenceTagger
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type)

2024-05-25 09:06:06,945 SequenceTagger predicts: Dictionary with 13 tags: O, S-PERSON, B-PERSON, E-PERSON, I-PERSON, S-PLACE, B-PLACE, E-PLACE, I-PLACE, S-GROUP, B-GROUP, E-GROUP, I-GROUP


In [None]:
#инициализируем trainer
trainer = ModelTrainer(tagger, corpus)

In [None]:
#обучение
trainer.train('/content/drive/MyDrive/4 курс/Диплом/models/flair-20epoch-1',
              learning_rate=1e-1,
              mini_batch_size=4,
              max_epochs=20)

2024-05-25 09:28:03,885 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:03,898 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.25, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
      )
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.25, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True)
  (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=512, out_features=15, bias=True)
  (loss_function): ViterbiLoss()
  (crf): CRF()
)"
2024-05-25 09:28:03,903 ------------------------------------



2024-05-25 09:28:03,995 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:04,001 Final evaluation on model from best epoch (best-model.pt)
2024-05-25 09:28:04,009  - metric: "('micro avg', 'f1-score')"
2024-05-25 09:28:04,013 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:04,014 Computation:
2024-05-25 09:28:04,016  - compute on device: cuda:0
2024-05-25 09:28:04,019  - embedding storage: cpu
2024-05-25 09:28:04,020 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:04,027 Model training base path: "/content/drive/MyDrive/4 курс/Диплом/models/flair-20epoch-1"
2024-05-25 09:28:04,031 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:04,036 -------------------------------------------------------------------------------

100%|██████████| 6/6 [00:02<00:00,  2.96it/s]

2024-05-25 09:28:20,940 DEV : loss 0.024281982332468033 - f1-score (micro avg)  0.9578
2024-05-25 09:28:20,969  - 0 epochs without improvement
2024-05-25 09:28:20,975  - 0 epochs without improvement
2024-05-25 09:28:20,981  - 0 epochs without improvement
2024-05-25 09:28:20,987  - 0 epochs without improvement
2024-05-25 09:28:20,992  - 0 epochs without improvement
2024-05-25 09:28:20,998 saving best model





2024-05-25 09:28:22,044 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:23,161 epoch 2 - iter 21/216 - loss 0.02087761 - time (sec): 1.11 - samples/sec: 1738.77 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:24,276 epoch 2 - iter 42/216 - loss 0.01895562 - time (sec): 2.23 - samples/sec: 1691.01 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:25,325 epoch 2 - iter 63/216 - loss 0.01973640 - time (sec): 3.28 - samples/sec: 1766.97 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:26,065 epoch 2 - iter 84/216 - loss 0.01951714 - time (sec): 4.02 - samples/sec: 1890.84 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:26,873 epoch 2 - iter 105/216 - loss 0.01728138 - time (sec): 4.83 - samples/sec: 1967.31 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:27,602 epoch 2 - iter 126/216 - loss 0.01630979 - time (sec): 5.56 - samples/sec: 2032.01 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:28,2

100%|██████████| 6/6 [00:02<00:00,  2.64it/s]

2024-05-25 09:28:33,184 DEV : loss 0.019561659544706345 - f1-score (micro avg)  0.9637
2024-05-25 09:28:33,219  - 0 epochs without improvement
2024-05-25 09:28:33,225  - 0 epochs without improvement
2024-05-25 09:28:33,232  - 0 epochs without improvement
2024-05-25 09:28:33,238  - 0 epochs without improvement
2024-05-25 09:28:33,245  - 0 epochs without improvement
2024-05-25 09:28:33,251 saving best model





2024-05-25 09:28:34,162 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:35,129 epoch 3 - iter 21/216 - loss 0.00398560 - time (sec): 0.96 - samples/sec: 1930.33 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:36,198 epoch 3 - iter 42/216 - loss 0.00539641 - time (sec): 2.03 - samples/sec: 1898.29 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:37,168 epoch 3 - iter 63/216 - loss 0.00616698 - time (sec): 3.00 - samples/sec: 1844.17 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:38,324 epoch 3 - iter 84/216 - loss 0.00543664 - time (sec): 4.16 - samples/sec: 1791.76 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:39,106 epoch 3 - iter 105/216 - loss 0.00718899 - time (sec): 4.94 - samples/sec: 1880.93 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:39,929 epoch 3 - iter 126/216 - loss 0.00980118 - time (sec): 5.76 - samples/sec: 1943.07 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:40,7

100%|██████████| 6/6 [00:02<00:00,  2.62it/s]

2024-05-25 09:28:45,849 DEV : loss 0.02295096032321453 - f1-score (micro avg)  0.9629
2024-05-25 09:28:45,880  - 1 epochs without improvement
2024-05-25 09:28:45,886  - 1 epochs without improvement
2024-05-25 09:28:45,892  - 1 epochs without improvement
2024-05-25 09:28:45,898  - 1 epochs without improvement
2024-05-25 09:28:45,903  - 1 epochs without improvement
2024-05-25 09:28:45,909 ----------------------------------------------------------------------------------------------------





2024-05-25 09:28:46,741 epoch 4 - iter 21/216 - loss 0.02423891 - time (sec): 0.83 - samples/sec: 2173.25 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:47,494 epoch 4 - iter 42/216 - loss 0.02222719 - time (sec): 1.58 - samples/sec: 2263.65 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:48,231 epoch 4 - iter 63/216 - loss 0.02230423 - time (sec): 2.32 - samples/sec: 2317.06 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:49,484 epoch 4 - iter 84/216 - loss 0.01997453 - time (sec): 3.57 - samples/sec: 2042.82 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:50,633 epoch 4 - iter 105/216 - loss 0.01783926 - time (sec): 4.72 - samples/sec: 1932.58 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:51,948 epoch 4 - iter 126/216 - loss 0.01676859 - time (sec): 6.04 - samples/sec: 1820.87 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:28:53,057 epoch 4 - iter 147/216 - loss 0.01845648 - time (sec): 7.15 - samples/sec: 1809.36 - lr: 0.100000 - momentum: 0.000000
202

100%|██████████| 6/6 [00:02<00:00,  2.88it/s]

2024-05-25 09:28:58,031 DEV : loss 0.018757566809654236 - f1-score (micro avg)  0.9677
2024-05-25 09:28:58,058  - 0 epochs without improvement
2024-05-25 09:28:58,064  - 0 epochs without improvement
2024-05-25 09:28:58,069  - 0 epochs without improvement
2024-05-25 09:28:58,075  - 0 epochs without improvement
2024-05-25 09:28:58,083  - 0 epochs without improvement
2024-05-25 09:28:58,092 saving best model





2024-05-25 09:28:58,974 ----------------------------------------------------------------------------------------------------
2024-05-25 09:28:59,798 epoch 5 - iter 21/216 - loss 0.01752544 - time (sec): 0.82 - samples/sec: 2099.51 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:00,682 epoch 5 - iter 42/216 - loss 0.01709918 - time (sec): 1.71 - samples/sec: 2207.00 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:01,482 epoch 5 - iter 63/216 - loss 0.01477670 - time (sec): 2.51 - samples/sec: 2225.86 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:02,244 epoch 5 - iter 84/216 - loss 0.01660954 - time (sec): 3.27 - samples/sec: 2261.13 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:03,289 epoch 5 - iter 105/216 - loss 0.01524232 - time (sec): 4.31 - samples/sec: 2154.19 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:04,374 epoch 5 - iter 126/216 - loss 0.01436313 - time (sec): 5.40 - samples/sec: 2078.34 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:05,4

100%|██████████| 6/6 [00:02<00:00,  2.46it/s]

2024-05-25 09:29:10,785 DEV : loss 0.01881861686706543 - f1-score (micro avg)  0.96
2024-05-25 09:29:10,815  - 1 epochs without improvement
2024-05-25 09:29:10,821  - 1 epochs without improvement
2024-05-25 09:29:10,835  - 1 epochs without improvement
2024-05-25 09:29:10,841  - 1 epochs without improvement
2024-05-25 09:29:10,847  - 1 epochs without improvement
2024-05-25 09:29:10,855 ----------------------------------------------------------------------------------------------------





2024-05-25 09:29:11,610 epoch 6 - iter 21/216 - loss 0.01213714 - time (sec): 0.75 - samples/sec: 2361.25 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:12,439 epoch 6 - iter 42/216 - loss 0.01052915 - time (sec): 1.58 - samples/sec: 2200.24 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:13,330 epoch 6 - iter 63/216 - loss 0.00847144 - time (sec): 2.47 - samples/sec: 2184.90 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:14,127 epoch 6 - iter 84/216 - loss 0.00982054 - time (sec): 3.27 - samples/sec: 2209.39 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:15,036 epoch 6 - iter 105/216 - loss 0.01039395 - time (sec): 4.18 - samples/sec: 2190.78 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:15,952 epoch 6 - iter 126/216 - loss 0.00995629 - time (sec): 5.09 - samples/sec: 2195.10 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:17,032 epoch 6 - iter 147/216 - loss 0.00990979 - time (sec): 6.18 - samples/sec: 2110.75 - lr: 0.100000 - momentum: 0.000000
202

100%|██████████| 6/6 [00:01<00:00,  3.02it/s]

2024-05-25 09:29:22,839 DEV : loss 0.01786991022527218 - f1-score (micro avg)  0.9679
2024-05-25 09:29:22,867  - 0 epochs without improvement
2024-05-25 09:29:22,873  - 0 epochs without improvement
2024-05-25 09:29:22,878  - 0 epochs without improvement
2024-05-25 09:29:22,885  - 0 epochs without improvement
2024-05-25 09:29:22,891  - 0 epochs without improvement
2024-05-25 09:29:22,897 saving best model





2024-05-25 09:29:23,809 ----------------------------------------------------------------------------------------------------
2024-05-25 09:29:24,601 epoch 7 - iter 21/216 - loss 0.01523166 - time (sec): 0.79 - samples/sec: 2153.06 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:25,450 epoch 7 - iter 42/216 - loss 0.01047576 - time (sec): 1.64 - samples/sec: 2241.43 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:26,267 epoch 7 - iter 63/216 - loss 0.00950425 - time (sec): 2.46 - samples/sec: 2307.00 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:26,984 epoch 7 - iter 84/216 - loss 0.00946546 - time (sec): 3.17 - samples/sec: 2360.79 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:27,818 epoch 7 - iter 105/216 - loss 0.00883157 - time (sec): 4.01 - samples/sec: 2365.22 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:28,560 epoch 7 - iter 126/216 - loss 0.00838880 - time (sec): 4.75 - samples/sec: 2365.53 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:29,3

100%|██████████| 6/6 [00:02<00:00,  2.38it/s]

2024-05-25 09:29:35,044 DEV : loss 0.02264339104294777 - f1-score (micro avg)  0.9668
2024-05-25 09:29:35,071  - 1 epochs without improvement
2024-05-25 09:29:35,077  - 1 epochs without improvement
2024-05-25 09:29:35,082  - 1 epochs without improvement
2024-05-25 09:29:35,088  - 1 epochs without improvement
2024-05-25 09:29:35,094  - 1 epochs without improvement
2024-05-25 09:29:35,100 ----------------------------------------------------------------------------------------------------





2024-05-25 09:29:35,832 epoch 8 - iter 21/216 - loss 0.00751686 - time (sec): 0.73 - samples/sec: 2313.87 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:36,659 epoch 8 - iter 42/216 - loss 0.01058189 - time (sec): 1.56 - samples/sec: 2337.24 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:37,428 epoch 8 - iter 63/216 - loss 0.00907505 - time (sec): 2.33 - samples/sec: 2354.56 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:38,203 epoch 8 - iter 84/216 - loss 0.01243745 - time (sec): 3.10 - samples/sec: 2333.69 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:38,957 epoch 8 - iter 105/216 - loss 0.01023406 - time (sec): 3.86 - samples/sec: 2358.31 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:39,697 epoch 8 - iter 126/216 - loss 0.01087919 - time (sec): 4.60 - samples/sec: 2379.39 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:40,522 epoch 8 - iter 147/216 - loss 0.01148692 - time (sec): 5.42 - samples/sec: 2374.72 - lr: 0.100000 - momentum: 0.000000
202

100%|██████████| 6/6 [00:02<00:00,  2.40it/s]

2024-05-25 09:29:45,607 DEV : loss 0.020099500194191933 - f1-score (micro avg)  0.9662
2024-05-25 09:29:45,657  - 2 epochs without improvement
2024-05-25 09:29:45,667  - 2 epochs without improvement
2024-05-25 09:29:45,673  - 2 epochs without improvement
2024-05-25 09:29:45,684  - 2 epochs without improvement
2024-05-25 09:29:45,691  - 2 epochs without improvement
2024-05-25 09:29:45,699 ----------------------------------------------------------------------------------------------------





2024-05-25 09:29:46,695 epoch 9 - iter 21/216 - loss 0.00478833 - time (sec): 0.99 - samples/sec: 1691.34 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:47,875 epoch 9 - iter 42/216 - loss 0.01021327 - time (sec): 2.17 - samples/sec: 1604.34 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:48,751 epoch 9 - iter 63/216 - loss 0.01030900 - time (sec): 3.05 - samples/sec: 1790.91 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:49,574 epoch 9 - iter 84/216 - loss 0.01021977 - time (sec): 3.87 - samples/sec: 1896.37 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:51,021 epoch 9 - iter 105/216 - loss 0.00921643 - time (sec): 5.32 - samples/sec: 1734.76 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:52,402 epoch 9 - iter 126/216 - loss 0.00836264 - time (sec): 6.70 - samples/sec: 1643.76 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:29:53,309 epoch 9 - iter 147/216 - loss 0.00999346 - time (sec): 7.61 - samples/sec: 1722.02 - lr: 0.100000 - momentum: 0.000000
202

100%|██████████| 6/6 [00:02<00:00,  2.30it/s]

2024-05-25 09:29:58,846 DEV : loss 0.017118511721491814 - f1-score (micro avg)  0.9703
2024-05-25 09:29:58,897  - 0 epochs without improvement
2024-05-25 09:29:58,905  - 0 epochs without improvement
2024-05-25 09:29:58,910  - 0 epochs without improvement
2024-05-25 09:29:58,917  - 0 epochs without improvement
2024-05-25 09:29:58,924  - 0 epochs without improvement
2024-05-25 09:29:58,930 saving best model





2024-05-25 09:30:00,148 ----------------------------------------------------------------------------------------------------
2024-05-25 09:30:01,345 epoch 10 - iter 21/216 - loss 0.01419147 - time (sec): 1.19 - samples/sec: 1572.40 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:02,321 epoch 10 - iter 42/216 - loss 0.00794211 - time (sec): 2.17 - samples/sec: 1709.07 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:03,110 epoch 10 - iter 63/216 - loss 0.00819806 - time (sec): 2.96 - samples/sec: 1880.40 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:03,854 epoch 10 - iter 84/216 - loss 0.00857316 - time (sec): 3.70 - samples/sec: 1945.98 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:04,735 epoch 10 - iter 105/216 - loss 0.00786553 - time (sec): 4.58 - samples/sec: 2027.13 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:05,491 epoch 10 - iter 126/216 - loss 0.00815737 - time (sec): 5.34 - samples/sec: 2085.84 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:3

100%|██████████| 6/6 [00:02<00:00,  2.55it/s]

2024-05-25 09:30:11,151 DEV : loss 0.02017958275973797 - f1-score (micro avg)  0.9667
2024-05-25 09:30:11,180  - 1 epochs without improvement
2024-05-25 09:30:11,186  - 1 epochs without improvement
2024-05-25 09:30:11,192  - 1 epochs without improvement
2024-05-25 09:30:11,196  - 1 epochs without improvement
2024-05-25 09:30:11,202  - 1 epochs without improvement
2024-05-25 09:30:11,208 ----------------------------------------------------------------------------------------------------





2024-05-25 09:30:12,056 epoch 11 - iter 21/216 - loss 0.00746041 - time (sec): 0.85 - samples/sec: 2145.50 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:13,133 epoch 11 - iter 42/216 - loss 0.00695897 - time (sec): 1.92 - samples/sec: 1974.57 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:14,152 epoch 11 - iter 63/216 - loss 0.00559195 - time (sec): 2.94 - samples/sec: 1950.52 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:15,336 epoch 11 - iter 84/216 - loss 0.00772612 - time (sec): 4.13 - samples/sec: 1843.30 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:16,199 epoch 11 - iter 105/216 - loss 0.00873808 - time (sec): 4.99 - samples/sec: 1893.06 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:16,909 epoch 11 - iter 126/216 - loss 0.00912584 - time (sec): 5.70 - samples/sec: 1966.95 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:17,678 epoch 11 - iter 147/216 - loss 0.00848639 - time (sec): 6.47 - samples/sec: 2016.47 - lr: 0.100000 - momentum: 0.000

100%|██████████| 6/6 [00:02<00:00,  2.99it/s]

2024-05-25 09:30:22,303 DEV : loss 0.025108588859438896 - f1-score (micro avg)  0.964
2024-05-25 09:30:22,330  - 2 epochs without improvement
2024-05-25 09:30:22,336  - 2 epochs without improvement
2024-05-25 09:30:22,342  - 2 epochs without improvement
2024-05-25 09:30:22,346  - 2 epochs without improvement
2024-05-25 09:30:22,350  - 2 epochs without improvement
2024-05-25 09:30:22,355 ----------------------------------------------------------------------------------------------------





2024-05-25 09:30:23,103 epoch 12 - iter 21/216 - loss 0.01231259 - time (sec): 0.75 - samples/sec: 2220.39 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:23,833 epoch 12 - iter 42/216 - loss 0.00909005 - time (sec): 1.48 - samples/sec: 2283.16 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:24,571 epoch 12 - iter 63/216 - loss 0.01289055 - time (sec): 2.21 - samples/sec: 2290.95 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:25,417 epoch 12 - iter 84/216 - loss 0.01205428 - time (sec): 3.06 - samples/sec: 2317.78 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:26,360 epoch 12 - iter 105/216 - loss 0.01268136 - time (sec): 4.00 - samples/sec: 2223.36 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:27,421 epoch 12 - iter 126/216 - loss 0.01256395 - time (sec): 5.06 - samples/sec: 2140.77 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:28,637 epoch 12 - iter 147/216 - loss 0.01397908 - time (sec): 6.28 - samples/sec: 2048.03 - lr: 0.100000 - momentum: 0.000

100%|██████████| 6/6 [00:02<00:00,  2.96it/s]

2024-05-25 09:30:33,531 DEV : loss 0.02038664184510708 - f1-score (micro avg)  0.9698
2024-05-25 09:30:33,559  - 3 epochs without improvement
2024-05-25 09:30:33,565  - 3 epochs without improvement
2024-05-25 09:30:33,571  - 3 epochs without improvement
2024-05-25 09:30:33,575  - 3 epochs without improvement
2024-05-25 09:30:33,582  - 3 epochs without improvement
2024-05-25 09:30:33,588 ----------------------------------------------------------------------------------------------------





2024-05-25 09:30:34,387 epoch 13 - iter 21/216 - loss 0.02044364 - time (sec): 0.80 - samples/sec: 2173.96 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:35,200 epoch 13 - iter 42/216 - loss 0.01349211 - time (sec): 1.61 - samples/sec: 2291.76 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:36,013 epoch 13 - iter 63/216 - loss 0.01143986 - time (sec): 2.42 - samples/sec: 2379.60 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:36,729 epoch 13 - iter 84/216 - loss 0.00926124 - time (sec): 3.14 - samples/sec: 2408.39 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:37,368 epoch 13 - iter 105/216 - loss 0.01054984 - time (sec): 3.78 - samples/sec: 2450.01 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:38,180 epoch 13 - iter 126/216 - loss 0.01104731 - time (sec): 4.59 - samples/sec: 2414.18 - lr: 0.100000 - momentum: 0.000000
2024-05-25 09:30:38,982 epoch 13 - iter 147/216 - loss 0.01146665 - time (sec): 5.39 - samples/sec: 2405.75 - lr: 0.100000 - momentum: 0.000

100%|██████████| 6/6 [00:02<00:00,  2.72it/s]

2024-05-25 09:30:44,591 DEV : loss 0.016965307295322418 - f1-score (micro avg)  0.9683
2024-05-25 09:30:44,619  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.05]
2024-05-25 09:30:44,625  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.025]
2024-05-25 09:30:44,630  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0125]
2024-05-25 09:30:44,636  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00625]
2024-05-25 09:30:44,641  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.003125]
2024-05-25 09:30:44,647 ----------------------------------------------------------------------------------------------------





2024-05-25 09:30:45,458 epoch 14 - iter 21/216 - loss 0.02410454 - time (sec): 0.81 - samples/sec: 2291.74 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:46,286 epoch 14 - iter 42/216 - loss 0.01642428 - time (sec): 1.64 - samples/sec: 2339.37 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:47,071 epoch 14 - iter 63/216 - loss 0.01169874 - time (sec): 2.42 - samples/sec: 2310.67 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:47,809 epoch 14 - iter 84/216 - loss 0.01390950 - time (sec): 3.16 - samples/sec: 2336.45 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:48,632 epoch 14 - iter 105/216 - loss 0.01286018 - time (sec): 3.98 - samples/sec: 2334.47 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:49,444 epoch 14 - iter 126/216 - loss 0.01179730 - time (sec): 4.79 - samples/sec: 2344.37 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:50,130 epoch 14 - iter 147/216 - loss 0.01123961 - time (sec): 5.48 - samples/sec: 2339.09 - lr: 0.003125 - momentum: 0.000

100%|██████████| 6/6 [00:03<00:00,  1.78it/s]

2024-05-25 09:30:56,129 DEV : loss 0.017416099086403847 - f1-score (micro avg)  0.9698
2024-05-25 09:30:56,201  - 1 epochs without improvement
2024-05-25 09:30:56,210  - 1 epochs without improvement
2024-05-25 09:30:56,225  - 1 epochs without improvement
2024-05-25 09:30:56,253  - 1 epochs without improvement
2024-05-25 09:30:56,263  - 1 epochs without improvement
2024-05-25 09:30:56,271 ----------------------------------------------------------------------------------------------------





2024-05-25 09:30:57,338 epoch 15 - iter 21/216 - loss 0.00330554 - time (sec): 1.06 - samples/sec: 1652.56 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:58,139 epoch 15 - iter 42/216 - loss 0.00905443 - time (sec): 1.87 - samples/sec: 1858.82 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:59,008 epoch 15 - iter 63/216 - loss 0.00629548 - time (sec): 2.73 - samples/sec: 1971.29 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:30:59,755 epoch 15 - iter 84/216 - loss 0.00596544 - time (sec): 3.48 - samples/sec: 2082.73 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:00,554 epoch 15 - iter 105/216 - loss 0.00763361 - time (sec): 4.28 - samples/sec: 2138.29 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:01,302 epoch 15 - iter 126/216 - loss 0.00698626 - time (sec): 5.03 - samples/sec: 2169.03 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:02,161 epoch 15 - iter 147/216 - loss 0.00617450 - time (sec): 5.89 - samples/sec: 2191.94 - lr: 0.003125 - momentum: 0.000

100%|██████████| 6/6 [00:02<00:00,  2.28it/s]

2024-05-25 09:31:07,434 DEV : loss 0.018010348081588745 - f1-score (micro avg)  0.9688
2024-05-25 09:31:07,482  - 2 epochs without improvement
2024-05-25 09:31:07,492  - 2 epochs without improvement
2024-05-25 09:31:07,499  - 2 epochs without improvement
2024-05-25 09:31:07,505  - 2 epochs without improvement
2024-05-25 09:31:07,510  - 2 epochs without improvement
2024-05-25 09:31:07,520 ----------------------------------------------------------------------------------------------------





2024-05-25 09:31:08,581 epoch 16 - iter 21/216 - loss 0.00701135 - time (sec): 1.06 - samples/sec: 1747.60 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:09,793 epoch 16 - iter 42/216 - loss 0.01116940 - time (sec): 2.27 - samples/sec: 1690.25 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:10,799 epoch 16 - iter 63/216 - loss 0.01073432 - time (sec): 3.28 - samples/sec: 1691.01 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:11,502 epoch 16 - iter 84/216 - loss 0.01078107 - time (sec): 3.98 - samples/sec: 1789.29 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:12,306 epoch 16 - iter 105/216 - loss 0.00865730 - time (sec): 4.78 - samples/sec: 1896.49 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:13,086 epoch 16 - iter 126/216 - loss 0.00802545 - time (sec): 5.56 - samples/sec: 1974.45 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:13,848 epoch 16 - iter 147/216 - loss 0.00916126 - time (sec): 6.33 - samples/sec: 2016.29 - lr: 0.003125 - momentum: 0.000

100%|██████████| 6/6 [00:01<00:00,  3.02it/s]

2024-05-25 09:31:18,469 DEV : loss 0.017652174457907677 - f1-score (micro avg)  0.9688
2024-05-25 09:31:18,497  - 3 epochs without improvement
2024-05-25 09:31:18,503  - 3 epochs without improvement
2024-05-25 09:31:18,508  - 3 epochs without improvement
2024-05-25 09:31:18,514  - 3 epochs without improvement
2024-05-25 09:31:18,519  - 3 epochs without improvement
2024-05-25 09:31:18,525 ----------------------------------------------------------------------------------------------------





2024-05-25 09:31:19,363 epoch 17 - iter 21/216 - loss 0.00932604 - time (sec): 0.84 - samples/sec: 2455.59 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:20,099 epoch 17 - iter 42/216 - loss 0.00818989 - time (sec): 1.57 - samples/sec: 2333.24 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:21,046 epoch 17 - iter 63/216 - loss 0.00688064 - time (sec): 2.52 - samples/sec: 2169.99 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:22,029 epoch 17 - iter 84/216 - loss 0.00634862 - time (sec): 3.50 - samples/sec: 2045.69 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:23,254 epoch 17 - iter 105/216 - loss 0.00704494 - time (sec): 4.73 - samples/sec: 1977.72 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:24,350 epoch 17 - iter 126/216 - loss 0.00624465 - time (sec): 5.82 - samples/sec: 1924.34 - lr: 0.003125 - momentum: 0.000000
2024-05-25 09:31:25,308 epoch 17 - iter 147/216 - loss 0.00686618 - time (sec): 6.78 - samples/sec: 1927.85 - lr: 0.003125 - momentum: 0.000

100%|██████████| 6/6 [00:01<00:00,  3.05it/s]

2024-05-25 09:31:29,862 DEV : loss 0.018185140565037727 - f1-score (micro avg)  0.9688
2024-05-25 09:31:29,890  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0015625]
2024-05-25 09:31:29,895  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00078125]
2024-05-25 09:31:29,900  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000390625]
2024-05-25 09:31:29,906  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0001953125]
2024-05-25 09:31:29,912  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [9.765625e-05]
2024-05-25 09:31:29,914 ----------------------------------------------------------------------------------------------------
2024-05-25 09:31:29,916 learning rate too small - quitting training!
2024-05-25 09:31:29,917 ----------------------------------------------------------------------------------------------------
2024-




2024-05-25 09:31:30,774 Done.
2024-05-25 09:31:30,775 ----------------------------------------------------------------------------------------------------
2024-05-25 09:31:30,778 Loading model from best epoch ...
2024-05-25 09:31:31,825 SequenceTagger predicts: Dictionary with 15 tags: O, S-PERSON, B-PERSON, E-PERSON, I-PERSON, S-PLACE, B-PLACE, E-PLACE, I-PLACE, S-GROUP, B-GROUP, E-GROUP, I-GROUP, <START>, <STOP>


100%|██████████| 5/5 [00:03<00:00,  1.54it/s]

2024-05-25 09:31:35,397 
Results:
- F-score (micro) 0.9631
- F-score (macro) 0.6324
- Accuracy 0.931

By class:
              precision    recall  f1-score   support

      PERSON     0.9737    0.9652    0.9694       345
       PLACE     0.9783    0.8824    0.9278        51
       GROUP     0.0000    0.0000    0.0000         1

   micro avg     0.9742    0.9521    0.9631       397
   macro avg     0.6506    0.6159    0.6324       397
weighted avg     0.9718    0.9521    0.9616       397

2024-05-25 09:31:35,399 ----------------------------------------------------------------------------------------------------





{'test_score': 0.9630573248407643}