In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import argparse
import logging
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import f1_score, classification_report
from torch.utils.data import DataLoader, TensorDataset
import sys
from tqdm import tqdm, trange
import os
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CyclicLR


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import RobertaModel
import torch
import torch.nn as nn
from transformers.modeling_outputs import SequenceClassifierOutput

class RobertaWithMultiSampleDropoutTarget(nn.Module):
    def __init__(self, model_name, num_labels, dropout_rate=0.3, num_dropouts=5, use_multi_sample_dropout=True):
        super(RobertaWithMultiSampleDropoutTarget, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.use_multi_sample_dropout = use_multi_sample_dropout
        self.hidden_size = self.roberta.config.hidden_size

        if self.use_multi_sample_dropout:
            self.dropouts = nn.ModuleList([
                nn.Dropout(dropout_rate) for _ in range(num_dropouts)
            ])
        else:
            self.dropout = nn.Dropout(dropout_rate)

        self.classifier = nn.Linear(self.hidden_size, num_labels)

    def extract_entity_embeddings(self, input_ids, sequence_output, en_token_id, end_en_token_id):
        batch_size = input_ids.size(0)
        entity_representations = []

        for i in range(batch_size):
            input_id = input_ids[i]
            output = sequence_output[i]

            try:
                start = (input_id == en_token_id).nonzero(as_tuple=True)[0].item()
                end = (input_id == end_en_token_id).nonzero(as_tuple=True)[0].item()
            except IndexError:
                # если вдруг токены не найдены — fallback на [CLS]
                entity_representations.append(output[0])
                continue

            # вырезаем эмбеддинги между <en> и </en>
            entity_tokens = output[start + 1:end]
            if entity_tokens.shape[0] == 0:
                entity_representations.append(output[0])  # fallback на [CLS]
            else:
                entity_representations.append(entity_tokens.mean(dim=0))  # mean pooling

        return torch.stack(entity_representations)

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
        )
        sequence_output = outputs[0]  # [batch_size, seq_len, hidden_size]

        # --- 1. Находим позиции <en> и </en> ---
        start_token_id = tokenizer.convert_tokens_to_ids("<en>")
        end_token_id = tokenizer.convert_tokens_to_ids("</en>")
    
        start_positions = (input_ids == start_token_id).nonzero(as_tuple=False)
        end_positions = (input_ids == end_token_id).nonzero(as_tuple=False)

        # --- 2. Для каждого примера агрегируем hidden states между <en> и </en> ---
        pooled_output = []
        for batch_idx in range(input_ids.size(0)):
            start_pos = start_positions[start_positions[:, 0] == batch_idx][:, 1]
            end_pos = end_positions[end_positions[:, 0] == batch_idx][:, 1]
            if len(start_pos) > 0 and len(end_pos) > 0:
                s, e = start_pos[0].item() + 1, end_pos[0].item()  # между тегами
                if e > s:
                    token_embeds = sequence_output[batch_idx, s:e, :]  # [num_entity_tokens, hidden]
                    pooled = torch.mean(token_embeds, dim=0)  # Mean-pooling
                else:
                    pooled = sequence_output[batch_idx, 0, :]  # fallback to [CLS]
            else:
                pooled = sequence_output[batch_idx, 0, :]  # fallback to [CLS]

            pooled_output.append(pooled)

        pooled_output = torch.stack(pooled_output)  # [batch_size, hidden_size]

        if self.use_multi_sample_dropout:
            logits_list = [self.classifier(dropout(pooled_output)) for dropout in self.dropouts]
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
        else:
            avg_logits = self.classifier(self.dropout(pooled_output))

        return SequenceClassifierOutput(logits=avg_logits)


In [3]:
def save_metrics_best(epoch, all_labels, all_preds, result_path, backprop, loss):
    import os
    import pandas as pd
    from sklearn.metrics import classification_report

    metrics_file = os.path.join(result_path, "metrics_best.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)

    # Создаем словарь с метриками
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop,
        "loss": loss
    }

    # Добавляем метрики по каждому классу
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]

    # Создаем DataFrame и сохраняем его в CSV (перезапись файла)
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='w', header=True, index=False)

In [4]:
def compute_ens_weights(train_labels, beta: float = 0.999):
    train_labels = np.array(train_labels)
    classes, counts = np.unique(train_labels, return_counts=True)
    effective_num = (1 - np.power(beta, counts)) / (1 - beta)
    weights = 1.0 / effective_num
    weights = weights / np.sum(weights) * len(classes)  # нормализация как в оригинальной статье
    return torch.tensor(weights, dtype=torch.float)

In [5]:
def get_entity_tags_from_files(*file_paths):
    all_tags = set()
    for path in file_paths:
        try:
            df = pd.read_csv(path, sep='\t')
            if "entity_tag" in df.columns:
                tags = df["entity_tag"].dropna().unique().tolist()
                all_tags.update(tags)
        except Exception as e:
            print(f"[Ошибка] Не удалось загрузить {path}: {e}")
    return sorted(list(all_tags))

In [6]:
def load_data(file_path, tokenizer, max_seq_len):
    try:
        df = pd.read_csv(file_path, sep='\t')  # Загружаем CSV (TSV)

        # Проверка нужных колонок
        required_columns = {
            "sentence", "entity", "label", "entity_tag",
            "entity_pos_start_rel", "entity_pos_end_rel"
        }
        if not required_columns.issubset(df.columns):
            raise ValueError(f"Ожидаемые колонки: {required_columns}, но в файле: {df.columns}")

        # Заменяем -1 на 2 (если есть)
        df["label"] = df["label"].replace(-1, 2)

        # Размечаем предложения с [ENTITY] токенами
        def mark_entity_inline(row):
            sent = row["sentence"]
            start = row["entity_pos_start_rel"]
            end = row["entity_pos_end_rel"]
            tag = row["entity_tag"]
            tag_token = f"<en>"
            tag_token_close = f"</en>"
            return (
                sent[:start] +
                f"{tag_token} " + sent[start:end] + f" <|{tag}|>"+ f" {tag_token_close}" +
                sent[end:]
            )

        df["input_text"] = df.apply(mark_entity_inline, axis=1)
        texts = df["input_text"].tolist()
        labels = torch.tensor(df["label"].astype(int).tolist(), dtype=torch.long)

        # Токенизируем
        encodings = tokenizer(texts, padding=True, truncation=True,
                              max_length=max_seq_len, return_tensors='pt')
        dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'], labels)
        return dataset

    except FileNotFoundError:
        logging.error(f"Файл {file_path} не найден!")
        return None

In [7]:

def save_metrics(epoch, all_labels, all_preds, result_path, backprop, loss):
    metrics_file = os.path.join(result_path, "metrics_class_weights.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)
    
    # Создаем DataFrame для метрик
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop , # (Добавлено)
        "loss": loss
    }
    
    # Добавляем метрики для каждого класса
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):  # Пропускаем 'accuracy', так как это float
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]
    
    # Создаем DataFrame и сохраняем в CSV
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='a', header=not os.path.exists(metrics_file), index=False)

def save_confusion_matrix(epoch, y_true, y_pred, result_path,backprop):
    cm = confusion_matrix(y_true, y_pred)
    cm_df = pd.DataFrame(cm, columns=["Pred_0", "Pred_1", "Pred_2"], index=["True_0", "True_1", "True_2"])
    cm_file = os.path.join(result_path, f"confusion_matrix_class_weights_epoch_{epoch}_backprop_{backprop}.csv")
    cm_df.to_csv(cm_file)



In [8]:
from sklearn.metrics import classification_report
import pandas as pd

def castom_classification_report(all_labels, all_preds):
    report = classification_report(all_labels, all_preds, output_dict=True)
    data = []
    for label, metrics in report.items():
        if isinstance(metrics, dict):  # Для всех метрик, кроме accuracy
            row = {'label': label}
            for metric, value in metrics.items():
                row[metric] = round(value * 100, 3) if metric != 'support' else value
            data.append(row)
        else:  # Для accuracy
            data.append({'label': 'accuracy', 'precision': round(metrics * 100, 3), 'recall': None, 'f1-score': None, 'support': None})

    # Добавляем macro F1 для классов 1 и 2
    if '1' in report and '2' in report:
        f1_1 = report['1']['f1-score']
        f1_2 = report['2']['f1-score']
        f1_macro_1_2 = (f1_1 + f1_2) / 2
        data.append({
            'label': 'avg f1 (class 1&2)',
            'precision': None,
            'recall': None,
            'f1-score': round(f1_macro_1_2 * 100, 3),
            'support': None
        })
    
    # Создаем DataFrame
    df = pd.DataFrame(data)
    df.index = [''] * len(df)

    # Выводим таблицу
    print(df.to_string(index=False))

In [9]:
def add_token(flag = False):
    # Формируем список специальных токенов
    if not flag: return None
    special_tokens = {
        "additional_special_tokens":
            [f"<|{tag}|>" for tag in teg_class_entity] +
            ["<en>", "</en>"]
    }

    print("Добавленные специальные токены:")
    for token in special_tokens["additional_special_tokens"]:
        print(token)
    return special_tokens

def save_contrel_date(tokenizer, train_dataset):
    special_tokens = add_token(flag = True)
    if special_tokens != None: 
        tokenizer.add_special_tokens(special_tokens)
    tokenizer.save_pretrained(os.path.join(args.result, "tokenizer"))
    print(tokenizer.special_tokens_map)
    print(tokenizer.additional_special_tokens)

    
    file_control_text = os.path.join(args.result, "use_market_text.txt")
    input_ids, attention_mask, label = train_dataset[0]
    decoded_text_token = tokenizer.decode(input_ids, skip_special_tokens=False)
    decoded_text = tokenizer.decode(input_ids, skip_special_tokens=True)
    with open(file_control_text, "w") as f:
        f.write(f"Text token:\n{decoded_text_token}\n\n")
        f.write(f"Text:\n{decoded_text}\n\n")
    print(f"Text token:\n{decoded_text_token}\n\n")
    print(f"Text:\n{decoded_text}\n\n")

In [10]:
teg_class_entity = ["COUNTRY", "NATIONALITY", "ORGANIZATION", "PERSON", "PROFESSION"]
lr_m = 1e-6
# lr = 1e-6 началось обучение, уменьшение TrainError
# добавить волидацию по шагам в течение эпохи обучения, а не каджые N епох
# Добавить сохранение ошибки в файлы для построения графиков
# проверить методы передачи сущьности (параетром, +тип, или в тексте выделяя тегами).
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name", type=str, default="./../ruRoberta-large/")
    #parser.add_argument("--model_name", type=str, default="sberbank-ai/ruRoberta-large", help="Имя модели")
    parser.add_argument("--max_seq_len", type=int, default=512) #128
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--lr", type=float, default=lr_m)
    parser.add_argument("--init_checkpoint", type=str, default=None)
    parser.add_argument("--train_data", type=str, required=False, default="./../data/train_data.csv")
    parser.add_argument("--validation_data", type=str, required=False, default="./../data/validation.csv")
    parser.add_argument("--eval_data", type=str, required=False, default="./../data/test.csv")
    parser.add_argument("--result", type=str, default=f"./result_lr_{lr_m}_ENS_Teg_multi_dropout_V3_Token_Target/")
    # Игнорируем аргументы Jupyter
    args, unknown = parser.parse_known_args()
    os.makedirs(args.result, exist_ok=True)
    return args


if __name__ == "__main__":
    args = parse_args()
    print(args)  # Проверяем аргументы

    base_lr = args.lr
    min_lr = base_lr * (1/4)
    step_size_up = 400 #400
    best_avg_f1 = 0.0
    
    best_model_path = os.path.join(args.result, "best_model.pth")

    SEED = 42
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    
    logging.basicConfig(level=logging.INFO)

    tokenizer = RobertaTokenizer.from_pretrained(args.model_name)
    #model = RobertaForSequenceClassification.from_pretrained(args.model_name, num_labels=3)
    model = RobertaWithMultiSampleDropoutTarget(model_name='./../ruRoberta-large', num_labels=3, use_multi_sample_dropout=True )
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_dataset = load_data(args.train_data, tokenizer, args.max_seq_len)
    validation_data = load_data(args.validation_data, tokenizer, args.max_seq_len)
    eval_dataset = load_data(args.eval_data, tokenizer, args.max_seq_len)

    if train_dataset is None or eval_dataset is None or validation_data is None:
        sys.exit(f"Ошибка загрузки данных: убедитесь, что файлы {args.train_data}, {args.validation_data} и {args.eval_data} существуют и содержат нужные колонки.")

    #сохранение текста и токенов, с дабовлением собственных
    save_contrel_date(tokenizer,train_dataset)
    model.roberta.resize_token_embeddings(len(tokenizer))
    
    embedding_layer = model.roberta.embeddings.word_embeddings
    print(embedding_layer.weight[tokenizer.convert_tokens_to_ids("<en>")])
    
    # Получаем метки классов из тренировочного датасета
    train_labels = [label.item() for _, _, label in train_dataset]

    # Вычисляем веса классов
    class_weights = compute_ens_weights(train_labels, beta=0.999)

    # Вывод весов классов
    print(f"class_weights = {class_weights}")
    
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    validation_loader = DataLoader(validation_data, batch_size=args.batch_size)
    eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size)
    
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    
    #scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr = base_lr, step_size_up = step_size_up, mode="triangular2",cycle_momentum=False )

    loss_fct = nn.CrossEntropyLoss(weight=class_weights.to(device))
    
    def train():
        model.train()
        batches_per_validation = (len(train_loader) // 2)+2
        batches_per_test = (len(train_loader) // 5)+2
        
        for epoch in range(args.epochs):
            print(f"Epoch: {epoch}/{args.epochs}")
            total_loss = 0
            for i,batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1} - Batches")):
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                optimizer.zero_grad()
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                loss.backward()
                optimizer.step()
                #scheduler.step()
                
                total_loss += loss.item()
                if(i % batches_per_validation == 0 and i >50):
                    evaluate(epoch, backprop="Validation", vall_train = True)
                if(i % batches_per_test == 0 and i >50):
                    evaluate(epoch, backprop="Test", flag_print = True)
                
            logging.info(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}, Step: {len(train_loader)}")
            # Валидация с обратным распространением ошибки каждые 2 эпохи
            evaluate(epoch , backprop="Train", flag_print = True)
            evaluate(epoch , backprop="Test", flag_print = True)
            evaluate(epoch , backprop="Validation", flag_print = True, vall_train = True)
            
            
    
    def evaluate(epoch = None, backprop = "None", flag_print = False, vall_train = False):
        global best_avg_f1
        model.eval()
        all_preds, all_labels = [], []
        total_loss = 0
        print(f"evaluate, backprop: {backprop}")
        loader = validation_loader
        if backprop == "Validation":
            loader = validation_loader
        elif backprop == "Test":
            loader = eval_loader
        elif backprop == "Train":
            loader = train_loader
        
        else: loader = eval_loader
        with torch.no_grad() if not backprop == "Validation" else torch.enable_grad():  # Включаем градиенты для обучения на валидации
            for batch in loader:
            #for batch in loader:
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                
                
                
                if backprop == "Validation" and vall_train:
                    #уменьшая тк обучающие данные удвоились
                    loss = loss
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    #scheduler.step()
                    loss = loss

                preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())
                total_loss += loss.item()
                
        avg_loss = total_loss / len(loader)
        f1 = f1_score(all_labels, all_preds, average='macro')
        f1_pn = f1_score(all_labels, all_preds, labels=[1, 2], average='macro')
        avg_f1 = (f1 + f1_pn) / 2

        if(flag_print):
            print()
            print("--"*20)
            print(f"Result evaluate in {backprop}")
            #logging.info(f"{backprop} Loss: {avg_loss:.4f}")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")
            #print(classification_report(all_labels, all_preds))
            castom_classification_report(all_labels, all_preds)

        # 💾 Сохраняем модель только при улучшении средней метрики
        if (avg_f1 > best_avg_f1) and backprop == "Test":
            best_avg_f1 = avg_f1
            torch.save(model.state_dict(), best_model_path)
            
            print(f"[Checkpoint] 🎯 Новый лучший средний F1: {best_avg_f1:.4f}")
            print(f"[Checkpoint] 💾 Модель сохранена: {best_model_path}")
            # 🔸 Сохраняем значение лучшего F1 в файл
            best_score_path = os.path.join(args.result, "best_score.txt")
            save_metrics_best(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
            save_confusion_matrix(epoch, all_labels, all_preds, args.result, backprop)
            with open(best_score_path, "w") as f:
                f.write(f"Epoch: {epoch}\n")
                f.write(f"Loss: {avg_loss:.4f}\n")
                f.write(f"F1-pn0: {f1:.4f}\n")
                f.write(f"F1-pn: {f1_pn:.4f}\n")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")


        if epoch is not None:
            save_metrics(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
        
    if args.init_checkpoint:
        model.load_state_dict(torch.load(args.init_checkpoint, map_location=device))

    train()

Namespace(model_name='./../ruRoberta-large/', max_seq_len=512, batch_size=16, epochs=10, lr=1e-06, init_checkpoint=None, train_data='./../data/train_data.csv', validation_data='./../data/validation.csv', eval_data='./../data/test.csv', result='./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/')


Some weights of RobertaModel were not initialized from the model checkpoint at ./../ruRoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Добавленные специальные токены:
<|COUNTRY|>
<|NATIONALITY|>
<|ORGANIZATION|>
<|PERSON|>
<|PROFESSION|>
<en>
</en>
{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>', 'additional_special_tokens': ['<|COUNTRY|>', '<|NATIONALITY|>', '<|ORGANIZATION|>', '<|PERSON|>', '<|PROFESSION|>', '<en>', '</en>']}
['<|COUNTRY|>', '<|NATIONALITY|>', '<|ORGANIZATION|>', '<|PERSON|>', '<|PROFESSION|>', '<en>', '</en>']
Text token:
<s>Джеймс «Бадди» Макгирт (James (Buddy) McGirt, тренер Дадашева упрашивал дагестанского <en> спортсмена <|PROFESSION|> </en> остановить бой, но тот хотел продолжать.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>

Epoch 1 - Batches:  20%|██        | 85/415 [00:48<03:05,  1.78it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.8026
F1-macro: 0.2841
F1-pn: 0.0089
             label  precision  recall  f1-score  support
                 0     71.678  99.838    83.446    616.0
                 1    100.000   0.901     1.786    111.0
                 2      0.000   0.000     0.000    133.0
          accuracy     71.628     NaN       NaN      NaN
         macro avg     57.226  33.580    28.411    860.0
      weighted avg     64.249  71.628    60.001    860.0
avg f1 (class 1&2)        NaN     NaN     0.893      NaN


Epoch 1 - Batches:  21%|██        | 86/415 [00:57<17:08,  3.13s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.1465
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.8026
F1-macro: 0.2841
F1-pn: 0.0089


Epoch 1 - Batches:  41%|████      | 170/415 [01:43<02:16,  1.80it/s]

evaluate, backprop: Test


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  41%|████      | 171/415 [01:51<10:31,  2.59s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7519
F1-macro: 0.2841
F1-pn: 0.0088
             label  precision  recall  f1-score  support
                 0     71.678  99.838    83.446    616.0
                 1     50.000   0.901     1.770    111.0
                 2      0.000   0.000     0.000    133.0
          accuracy     71.628     NaN       NaN      NaN
         macro avg     40.559  33.580    28.405    860.0
      weighted avg     57.795  71.628    59.999    860.0
avg f1 (class 1&2)        NaN     NaN     0.885      NaN


Epoch 1 - Batches:  50%|█████     | 209/415 [02:12<01:54,  1.79it/s]

evaluate, backprop: Validation


Epoch 1 - Batches:  61%|██████▏   | 255/415 [03:29<01:29,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7966
F1-macro: 0.3095
F1-pn: 0.0461
             label  precision  recall  f1-score  support
                 0     72.118  99.513    83.629    616.0
                 1     66.667   1.802     3.509    111.0
                 2     57.143   3.008     5.714    133.0
          accuracy     71.977     NaN       NaN      NaN
         macro avg     65.309  34.774    30.951    860.0
      weighted avg     69.098  71.977    61.238    860.0
avg f1 (class 1&2)        NaN     NaN     4.612      NaN


Epoch 1 - Batches:  62%|██████▏   | 256/415 [03:40<09:35,  3.62s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.1778
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.7966
F1-macro: 0.3095
F1-pn: 0.0461


Epoch 1 - Batches:  82%|████████▏ | 340/415 [04:26<00:41,  1.79it/s]

evaluate, backprop: Test


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  82%|████████▏ | 341/415 [04:34<03:12,  2.60s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7158
F1-macro: 0.2881
F1-pn: 0.0147
             label  precision  recall  f1-score  support
                 0     71.762  99.838    83.503    616.0
                 1      0.000   0.000     0.000    111.0
                 2     66.667   1.504     2.941    133.0
          accuracy     71.744     NaN       NaN      NaN
         macro avg     46.143  33.780    28.815    860.0
      weighted avg     61.712  71.744    60.266    860.0
avg f1 (class 1&2)        NaN     NaN     1.471      NaN


Epoch 1 - Batches: 100%|██████████| 415/415 [05:15<00:00,  1.32it/s]
INFO:root:Epoch 1, Loss: 0.9029651379728891, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.7919
F1-macro: 0.3625
F1-pn: 0.1236
             label  precision  recall  f1-score  support
                 0     73.412  98.261    84.038   4774.0
                 1     70.270   3.037     5.823    856.0
                 2     54.762  11.420    18.899   1007.0
          accuracy     72.804     NaN       NaN      NaN
         macro avg     66.148  37.573    36.253   6637.0
      weighted avg     70.177  72.804    64.067   6637.0
avg f1 (class 1&2)        NaN     NaN    12.361      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6953
F1-macro: 0.3542
F1-pn: 0.1130
             label  precision  recall  f1-score  support
                 0     72.947  98.052    83.657    616.0
                 1     75.000   2.703     5.217    111.0
                 2     50.000  10.526    17.391    133.0
          accuracy     72.209     NaN       

Epoch 2 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6660
F1-macro: 0.4414
F1-pn: 0.2407
             label  precision  recall  f1-score  support
                 0     75.032  96.104    84.270    616.0
                 1     73.333   9.910    17.460    111.0
                 2     51.786  21.805    30.688    133.0
          accuracy     73.488     NaN       NaN      NaN
         macro avg     66.717  42.606    44.140    860.0
      weighted avg     71.217  73.488    67.361    860.0
avg f1 (class 1&2)        NaN     NaN    24.074      NaN


Epoch 2 - Batches:  21%|██        | 86/415 [00:58<19:45,  3.60s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.3411
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.6660
F1-macro: 0.4414
F1-pn: 0.2407


Epoch 2 - Batches:  41%|████      | 170/415 [01:45<02:16,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6442
F1-macro: 0.5446
F1-pn: 0.3913
             label  precision  recall  f1-score  support
                 0     78.918  92.370    85.116    616.0
                 1     57.778  23.423    33.333    111.0
                 2     54.255  38.346    44.934    133.0
          accuracy     75.116     NaN       NaN      NaN
         macro avg     63.650  51.380    54.461    860.0
      weighted avg     72.375  75.116    72.218    860.0
avg f1 (class 1&2)        NaN     NaN    39.134      NaN


Epoch 2 - Batches:  41%|████      | 171/415 [01:55<14:42,  3.62s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.4680
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.6442
F1-macro: 0.5446
F1-pn: 0.3913


Epoch 2 - Batches:  50%|█████     | 209/415 [02:17<01:55,  1.78it/s]

evaluate, backprop: Validation


Epoch 2 - Batches:  61%|██████▏   | 255/415 [03:34<01:29,  1.79it/s]

evaluate, backprop: Test


Epoch 2 - Batches:  62%|██████▏   | 256/415 [03:41<06:54,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6046
F1-macro: 0.5376
F1-pn: 0.3821
             label  precision  recall  f1-score  support
                 0     77.883  93.182    84.848    616.0
                 1     79.167  17.117    28.148    111.0
                 2     56.566  42.105    48.276    133.0
          accuracy     75.465     NaN       NaN      NaN
         macro avg     71.205  50.801    53.757    860.0
      weighted avg     74.752  75.465    71.874    860.0
avg f1 (class 1&2)        NaN     NaN    38.212      NaN


Epoch 2 - Batches:  82%|████████▏ | 340/415 [04:28<00:41,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5732
F1-macro: 0.5831
F1-pn: 0.4461
             label  precision  recall  f1-score  support
                 0     80.198  92.045    85.714    616.0
                 1     69.444  22.523    34.014    111.0
                 2     58.974  51.880    55.200    133.0
          accuracy     76.860     NaN       NaN      NaN
         macro avg     69.539  55.483    58.309    860.0
      weighted avg     75.528  76.860    74.322    860.0
avg f1 (class 1&2)        NaN     NaN    44.607      NaN


Epoch 2 - Batches:  82%|████████▏ | 341/415 [04:39<04:28,  3.63s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.5146
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5732
F1-macro: 0.5831
F1-pn: 0.4461


Epoch 2 - Batches: 100%|██████████| 415/415 [05:21<00:00,  1.29it/s]
INFO:root:Epoch 2, Loss: 0.7541831297328673, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.6058
F1-macro: 0.6537
F1-pn: 0.5485
             label  precision  recall  f1-score  support
                 0     85.399  87.474    86.424   4774.0
                 1     69.524  34.112    45.768    856.0
                 2     56.217  74.081    63.925   1007.0
          accuracy     78.560     NaN       NaN      NaN
         macro avg     70.380  65.222    65.372   6637.0
      weighted avg     78.924  78.560    77.767   6637.0
avg f1 (class 1&2)        NaN     NaN    54.846      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5741
F1-macro: 0.6062
F1-pn: 0.4858
             label  precision  recall  f1-score  support
                 0     83.677  85.714    84.683    616.0
                 1     65.909  26.126    37.419    111.0
                 2     51.351  71.429    59.748    133.0
          accuracy     75.814     NaN       

Epoch 3 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5444
F1-macro: 0.6186
F1-pn: 0.4986
             label  precision  recall  f1-score  support
                 0     81.885  90.260    85.869    616.0
                 1     69.565  28.829    40.764    111.0
                 2     58.519  59.398    58.955    133.0
          accuracy     77.558     NaN       NaN      NaN
         macro avg     69.990  59.496    61.863    860.0
      weighted avg     76.681  77.558    75.885    860.0
avg f1 (class 1&2)        NaN     NaN    49.860      NaN


Epoch 3 - Batches:  21%|██        | 86/415 [00:58<19:57,  3.64s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.5586
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5444
F1-macro: 0.6186
F1-pn: 0.4986


Epoch 3 - Batches:  41%|████      | 170/415 [01:45<02:16,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5216
F1-macro: 0.6501
F1-pn: 0.5430
             label  precision  recall  f1-score  support
                 0     84.603  88.312    86.418    616.0
                 1     61.039  42.342    50.000    111.0
                 2     57.143  60.150    58.608    133.0
          accuracy     78.023     NaN       NaN      NaN
         macro avg     67.595  63.601    65.009    860.0
      weighted avg     77.315  78.023    77.417    860.0
avg f1 (class 1&2)        NaN     NaN    54.304      NaN


Epoch 3 - Batches:  41%|████      | 171/415 [01:55<14:45,  3.63s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.5966
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5216
F1-macro: 0.6501
F1-pn: 0.5430


Epoch 3 - Batches:  50%|█████     | 209/415 [02:17<01:55,  1.79it/s]

evaluate, backprop: Validation


Epoch 3 - Batches:  61%|██████▏   | 255/415 [03:34<01:29,  1.79it/s]

evaluate, backprop: Test


Epoch 3 - Batches:  62%|██████▏   | 256/415 [03:41<06:54,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5176
F1-macro: 0.6209
F1-pn: 0.5017
             label  precision  recall  f1-score  support
                 0     81.606  90.747    85.934    616.0
                 1     61.905  35.135    44.828    111.0
                 2     60.714  51.128    55.510    133.0
          accuracy     77.442     NaN       NaN      NaN
         macro avg     68.075  59.003    62.091    860.0
      weighted avg     75.832  77.442    75.923    860.0
avg f1 (class 1&2)        NaN     NaN    50.169      NaN


Epoch 3 - Batches:  82%|████████▏ | 340/415 [04:28<00:41,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4970
F1-macro: 0.6632
F1-pn: 0.5576
             label  precision  recall  f1-score  support
                 0     84.756  90.260    87.421    616.0
                 1     70.909  35.135    46.988    111.0
                 2     61.074  68.421    64.539    133.0
          accuracy     79.767     NaN       NaN      NaN
         macro avg     72.246  64.605    66.316    860.0
      weighted avg     79.306  79.767    78.664    860.0
avg f1 (class 1&2)        NaN     NaN    55.763      NaN


Epoch 3 - Batches:  82%|████████▏ | 341/415 [04:39<04:26,  3.59s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6104
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4970
F1-macro: 0.6632
F1-pn: 0.5576


Epoch 3 - Batches: 100%|██████████| 415/415 [05:20<00:00,  1.29it/s]
INFO:root:Epoch 3, Loss: 0.6053051769912962, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.4855
F1-macro: 0.7425
F1-pn: 0.6680
             label  precision  recall  f1-score  support
                 0     89.354  88.961    89.157   4774.0
                 1     68.254  55.257    61.072    856.0
                 2     66.919  79.146    72.520   1007.0
          accuracy     83.125     NaN       NaN      NaN
         macro avg     74.842  74.455    74.250   6637.0
      weighted avg     83.229  83.125    83.011   6637.0
avg f1 (class 1&2)        NaN     NaN    66.796      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5014
F1-macro: 0.6932
F1-pn: 0.6053
             label  precision  recall  f1-score  support
                 0     88.147  85.714    86.914    616.0
                 1     64.368  50.450    56.566    111.0
                 2     56.897  74.436    64.495    133.0
          accuracy     79.419     NaN       

Epoch 4 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.79it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  21%|██        | 86/415 [00:54<14:18,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5160
F1-macro: 0.6896
F1-pn: 0.6076
             label  precision  recall  f1-score  support
                 0     89.643  81.494    85.374    616.0
                 1     56.637  57.658    57.143    111.0
                 2     55.080  77.444    64.375    133.0
          accuracy     77.791     NaN       NaN      NaN
         macro avg     67.120  72.198    68.964    860.0
      weighted avg     80.038  77.791    78.483    860.0
avg f1 (class 1&2)        NaN     NaN    60.759      NaN


Epoch 4 - Batches:  41%|████      | 170/415 [01:42<02:16,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4820
F1-macro: 0.7133
F1-pn: 0.6305
             label  precision  recall  f1-score  support
                 0     88.091  87.662    87.876    616.0
                 1     65.909  52.252    58.291    111.0
                 2     62.264  74.436    67.808    133.0
          accuracy     81.047     NaN       NaN      NaN
         macro avg     72.088  71.450    71.325    860.0
      weighted avg     81.234  81.047    80.954    860.0
avg f1 (class 1&2)        NaN     NaN    63.050      NaN


Epoch 4 - Batches:  41%|████      | 171/415 [01:52<14:45,  3.63s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6719
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4820
F1-macro: 0.7133
F1-pn: 0.6305


Epoch 4 - Batches:  50%|█████     | 209/415 [02:14<01:55,  1.79it/s]

evaluate, backprop: Validation


Epoch 4 - Batches:  61%|██████▏   | 255/415 [03:31<01:29,  1.79it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  62%|██████▏   | 256/415 [03:38<06:54,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4777
F1-macro: 0.6797
F1-pn: 0.5842
             label  precision  recall  f1-score  support
                 0     85.580  88.636    87.081    616.0
                 1     62.651  46.847    53.608    111.0
                 2     61.871  64.662    63.235    133.0
          accuracy     79.535     NaN       NaN      NaN
         macro avg     70.034  66.715    67.975    860.0
      weighted avg     78.954  79.535    79.073    860.0
avg f1 (class 1&2)        NaN     NaN    58.422      NaN


Epoch 4 - Batches:  82%|████████▏ | 340/415 [04:25<00:42,  1.78it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  82%|████████▏ | 341/415 [04:32<03:13,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4745
F1-macro: 0.7075
F1-pn: 0.6238
             label  precision  recall  f1-score  support
                 0     88.667  86.364    87.500    616.0
                 1     61.616  54.955    58.095    111.0
                 2     60.870  73.684    66.667    133.0
          accuracy     80.349     NaN       NaN      NaN
         macro avg     70.384  71.668    70.754    860.0
      weighted avg     80.876  80.349    80.483    860.0
avg f1 (class 1&2)        NaN     NaN    62.381      NaN


Epoch 4 - Batches: 100%|██████████| 415/415 [05:14<00:00,  1.32it/s]
INFO:root:Epoch 4, Loss: 0.5019800503569913, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.3958
F1-macro: 0.7959
F1-pn: 0.7380
             label  precision  recall  f1-score  support
                 0     91.750  90.616    91.179   4774.0
                 1     73.494  64.136    68.497    856.0
                 2     73.447  85.700    79.102   1007.0
          accuracy     86.455     NaN       NaN      NaN
         macro avg     79.564  80.150    79.593   6637.0
      weighted avg     86.618  86.455    86.421   6637.0
avg f1 (class 1&2)        NaN     NaN    73.799      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4660
F1-macro: 0.7208
F1-pn: 0.6426
             label  precision  recall  f1-score  support
                 0     89.112  86.364    87.716    616.0
                 1     63.918  55.856    59.615    111.0
                 2     62.048  77.444    68.896    133.0
          accuracy     81.047     NaN       

Epoch 5 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5019
F1-macro: 0.7188
F1-pn: 0.6465
             label  precision  recall  f1-score  support
                 0     92.251  81.169    86.356    616.0
                 1     57.937  65.766    61.603    111.0
                 2     57.292  82.707    67.692    133.0
          accuracy     79.419     NaN       NaN      NaN
         macro avg     69.160  76.547    71.884    860.0
      weighted avg     82.415  79.419    80.275    860.0
avg f1 (class 1&2)        NaN     NaN    64.648      NaN


Epoch 5 - Batches:  21%|██        | 86/415 [00:58<19:59,  3.64s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6827
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5019
F1-macro: 0.7188
F1-pn: 0.6465


Epoch 5 - Batches:  41%|████      | 170/415 [01:45<02:16,  1.80it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4631
F1-macro: 0.7472
F1-pn: 0.6770
             label  precision  recall  f1-score  support
                 0     91.111  86.526    88.759    616.0
                 1     64.957  68.468    66.667    111.0
                 2     63.291  75.188    68.729    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     73.120  76.727    74.718    860.0
      weighted avg     83.433  82.442    82.810    860.0
avg f1 (class 1&2)        NaN     NaN    67.698      NaN


Epoch 5 - Batches:  41%|████      | 171/415 [01:56<14:49,  3.64s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.7121
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4631
F1-macro: 0.7472
F1-pn: 0.6770


Epoch 5 - Batches:  50%|█████     | 209/415 [02:17<01:55,  1.79it/s]

evaluate, backprop: Validation


Epoch 5 - Batches:  61%|██████▏   | 255/415 [03:34<01:29,  1.78it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  62%|██████▏   | 256/415 [03:41<06:54,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4491
F1-macro: 0.7326
F1-pn: 0.6564
             label  precision  recall  f1-score  support
                 0     88.350  88.636    88.493    616.0
                 1     65.306  57.658    61.244    111.0
                 2     67.361  72.932    70.036    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     73.672  73.075    73.258    860.0
      weighted avg     82.129  82.209    82.121    860.0
avg f1 (class 1&2)        NaN     NaN    65.640      NaN


Epoch 5 - Batches:  82%|████████▏ | 340/415 [04:28<00:42,  1.78it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  82%|████████▏ | 341/415 [04:36<03:13,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4502
F1-macro: 0.7387
F1-pn: 0.6646
             label  precision  recall  f1-score  support
                 0     90.388  87.013    88.668    616.0
                 1     64.486  62.162    63.303    111.0
                 2     63.750  76.692    69.625    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     72.875  75.289    73.865    860.0
      weighted avg     82.925  82.209    82.449    860.0
avg f1 (class 1&2)        NaN     NaN    66.464      NaN


Epoch 5 - Batches: 100%|██████████| 415/415 [05:17<00:00,  1.31it/s]
INFO:root:Epoch 5, Loss: 0.40930005724889684, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.3045
F1-macro: 0.8530
F1-pn: 0.8132
             label  precision  recall  f1-score  support
                 0     94.475  92.061    93.253   4774.0
                 1     74.973  79.790    77.306    856.0
                 2     82.682  88.183    85.344   1007.0
          accuracy     89.890     NaN       NaN      NaN
         macro avg     84.043  86.678    85.301   6637.0
      weighted avg     90.171  89.890    89.996   6637.0
avg f1 (class 1&2)        NaN     NaN    81.325      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4601
F1-macro: 0.7461
F1-pn: 0.6754
             label  precision  recall  f1-score  support
                 0     91.111  86.526    88.759    616.0
                 1     62.500  67.568    64.935    111.0
                 2     65.161  75.940    70.139    133.0
          accuracy     82.442     NaN       

Epoch 6 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.78it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  21%|██        | 86/415 [00:55<14:22,  2.62s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4779
F1-macro: 0.7341
F1-pn: 0.6628
             label  precision  recall  f1-score  support
                 0     91.373  84.253    87.669    616.0
                 1     56.250  72.973    63.529    111.0
                 2     65.541  72.932    69.039    133.0
          accuracy     81.047     NaN       NaN      NaN
         macro avg     71.055  76.720    73.412    860.0
      weighted avg     82.845  81.047    81.672    860.0
avg f1 (class 1&2)        NaN     NaN    66.284      NaN


Epoch 6 - Batches:  41%|████      | 170/415 [01:42<02:17,  1.78it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  41%|████      | 171/415 [01:49<10:35,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4618
F1-macro: 0.7467
F1-pn: 0.6762
             label  precision  recall  f1-score  support
                 0     91.111  86.526    88.759    616.0
                 1     63.333  68.468    65.801    111.0
                 2     64.516  75.188    69.444    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     72.987  76.727    74.668    860.0
      weighted avg     83.413  82.442    82.809    860.0
avg f1 (class 1&2)        NaN     NaN    67.623      NaN


Epoch 6 - Batches:  50%|█████     | 209/415 [02:10<01:55,  1.79it/s]

evaluate, backprop: Validation


Epoch 6 - Batches:  61%|██████▏   | 255/415 [03:28<01:29,  1.78it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  62%|██████▏   | 256/415 [03:35<06:53,  2.60s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4582
F1-macro: 0.7427
F1-pn: 0.6694
             label  precision  recall  f1-score  support
                 0     90.572  87.338    88.926    616.0
                 1     66.364  65.766    66.063    111.0
                 2     62.821  73.684    67.820    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     73.252  75.596    74.270    860.0
      weighted avg     83.156  82.442    82.711    860.0
avg f1 (class 1&2)        NaN     NaN    66.942      NaN


Epoch 6 - Batches:  82%|████████▏ | 340/415 [04:22<00:41,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4555
F1-macro: 0.7556
F1-pn: 0.6867
             label  precision  recall  f1-score  support
                 0     91.638  87.175    89.351    616.0
                 1     65.289  71.171    68.103    111.0
                 2     64.706  74.436    69.231    133.0
          accuracy     83.140     NaN       NaN      NaN
         macro avg     73.878  77.594    75.562    860.0
      weighted avg     84.072  83.140    83.497    860.0
avg f1 (class 1&2)        NaN     NaN    68.667      NaN


Epoch 6 - Batches:  82%|████████▏ | 341/415 [04:33<04:27,  3.61s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.7211
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4555
F1-macro: 0.7556
F1-pn: 0.6867


Epoch 6 - Batches: 100%|██████████| 415/415 [05:14<00:00,  1.32it/s]
INFO:root:Epoch 6, Loss: 0.3156509828495692, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.2278
F1-macro: 0.9015
F1-pn: 0.8738
             label  precision  recall  f1-score  support
                 0     95.569  95.790    95.679   4774.0
                 1     86.286  83.061    84.643    856.0
                 2     89.202  91.063    90.123   1007.0
          accuracy     93.431     NaN       NaN      NaN
         macro avg     90.353  89.971    90.148   6637.0
      weighted avg     93.406  93.431    93.413   6637.0
avg f1 (class 1&2)        NaN     NaN    87.383      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4482
F1-macro: 0.7534
F1-pn: 0.6811
             label  precision  recall  f1-score  support
                 0     91.000  88.636    89.803    616.0
                 1     67.273  66.667    66.968    111.0
                 2     65.333  73.684    69.258    133.0
          accuracy     83.488     NaN       

Epoch 7 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.78it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  21%|██        | 86/415 [00:55<14:18,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4743
F1-macro: 0.7441
F1-pn: 0.6718
             label  precision  recall  f1-score  support
                 0     90.847  87.013    88.889    616.0
                 1     63.333  68.468    65.801    111.0
                 2     64.667  72.932    68.551    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     72.949  76.138    74.414    860.0
      weighted avg     83.247  82.442    82.764    860.0
avg f1 (class 1&2)        NaN     NaN    67.176      NaN


Epoch 7 - Batches:  41%|████      | 170/415 [01:41<02:16,  1.80it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  41%|████      | 171/415 [01:49<10:35,  2.60s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4782
F1-macro: 0.7510
F1-pn: 0.6804
             label  precision  recall  f1-score  support
                 0     91.327  87.175    89.203    616.0
                 1     65.789  67.568    66.667    111.0
                 2     63.924  75.940    69.416    133.0
          accuracy     82.907     NaN       NaN      NaN
         macro avg     73.680  76.894    75.095    860.0
      weighted avg     83.793  82.907    83.234    860.0
avg f1 (class 1&2)        NaN     NaN    68.041      NaN


Epoch 7 - Batches:  50%|█████     | 209/415 [02:10<01:55,  1.78it/s]

evaluate, backprop: Validation


Epoch 7 - Batches:  61%|██████▏   | 255/415 [03:27<01:29,  1.79it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  62%|██████▏   | 256/415 [03:35<06:55,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4761
F1-macro: 0.7480
F1-pn: 0.6756
             label  precision  recall  f1-score  support
                 0     90.772  87.825    89.274    616.0
                 1     64.655  67.568    66.079    111.0
                 2     65.541  72.932    69.039    133.0
          accuracy     82.907     NaN       NaN      NaN
         macro avg     73.656  76.108    74.797    860.0
      weighted avg     83.499  82.907    83.151    860.0
avg f1 (class 1&2)        NaN     NaN    67.559      NaN


Epoch 7 - Batches:  82%|████████▏ | 340/415 [04:22<00:41,  1.79it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  82%|████████▏ | 341/415 [04:29<03:13,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4789
F1-macro: 0.7485
F1-pn: 0.6787
             label  precision  recall  f1-score  support
                 0     91.552  86.201    88.796    616.0
                 1     63.710  71.171    67.234    111.0
                 2     63.462  74.436    68.512    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     72.908  77.270    74.847    860.0
      weighted avg     83.614  82.442    82.876    860.0
avg f1 (class 1&2)        NaN     NaN    67.873      NaN


Epoch 7 - Batches: 100%|██████████| 415/415 [05:11<00:00,  1.33it/s]
INFO:root:Epoch 7, Loss: 0.22806515252464507, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.1464
F1-macro: 0.9425
F1-pn: 0.9264
             label  precision  recall  f1-score  support
                 0     98.009  96.942    97.473   4774.0
                 1     89.527  92.874    91.170    856.0
                 2     93.184  95.035    94.100   1007.0
          accuracy     96.128     NaN       NaN      NaN
         macro avg     93.573  94.950    94.248   6637.0
      weighted avg     96.183  96.128    96.148   6637.0
avg f1 (class 1&2)        NaN     NaN    92.635      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4774
F1-macro: 0.7543
F1-pn: 0.6845
             label  precision  recall  f1-score  support
                 0     91.356  87.500    89.386    616.0
                 1     65.289  71.171    68.103    111.0
                 2     65.101  72.932    68.794    133.0
          accuracy     83.140     NaN       

Epoch 8 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.78it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  21%|██        | 86/415 [00:55<14:19,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5106
F1-macro: 0.7530
F1-pn: 0.6839
             label  precision  recall  f1-score  support
                 0     91.312  87.013    89.111    616.0
                 1     64.228  71.171    67.521    111.0
                 2     65.333  73.684    69.258    133.0
          accuracy     82.907     NaN       NaN      NaN
         macro avg     73.624  77.289    75.297    860.0
      weighted avg     83.798  82.907    83.254    860.0
avg f1 (class 1&2)        NaN     NaN    68.390      NaN


Epoch 8 - Batches:  41%|████      | 170/415 [01:42<02:17,  1.79it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5094
F1-macro: 0.7646
F1-pn: 0.6978
             label  precision  recall  f1-score  support
                 0     91.568  88.149    89.826    616.0
                 1     69.565  72.072    70.796    111.0
                 2     64.474  73.684    68.772    133.0
          accuracy     83.837     NaN       NaN      NaN
         macro avg     75.202  77.969    76.465    860.0
      weighted avg     84.538  83.837    84.114    860.0
avg f1 (class 1&2)        NaN     NaN    69.784      NaN


Epoch 8 - Batches:  41%|████      | 171/415 [01:52<14:44,  3.63s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.7312
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5094
F1-macro: 0.7646
F1-pn: 0.6978


Epoch 8 - Batches:  50%|█████     | 209/415 [02:14<01:55,  1.79it/s]

evaluate, backprop: Validation


Epoch 8 - Batches:  61%|██████▏   | 255/415 [03:31<01:29,  1.79it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  62%|██████▏   | 256/415 [03:38<06:53,  2.60s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5206
F1-macro: 0.7632
F1-pn: 0.6951
             label  precision  recall  f1-score  support
                 0     91.304  88.636    89.951    616.0
                 1     68.750  69.369    69.058    111.0
                 2     66.000  74.436    69.965    133.0
          accuracy     83.953     NaN       NaN      NaN
         macro avg     75.351  77.481    76.325    860.0
      weighted avg     84.480  83.953    84.163    860.0
avg f1 (class 1&2)        NaN     NaN    69.511      NaN


Epoch 8 - Batches:  82%|████████▏ | 340/415 [04:25<00:41,  1.79it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  82%|████████▏ | 341/415 [04:33<03:13,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5310
F1-macro: 0.7613
F1-pn: 0.6949
             label  precision  recall  f1-score  support
                 0     91.938  87.013    89.408    616.0
                 1     66.393  72.973    69.528    111.0
                 2     64.516  75.188    69.444    133.0
          accuracy     83.372     NaN       NaN      NaN
         macro avg     74.283  78.391    76.127    860.0
      weighted avg     84.400  83.372    83.755    860.0
avg f1 (class 1&2)        NaN     NaN    69.486      NaN


Epoch 8 - Batches: 100%|██████████| 415/415 [05:14<00:00,  1.32it/s]
INFO:root:Epoch 8, Loss: 0.14016121192779168, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0813
F1-macro: 0.9733
F1-pn: 0.9658
             label  precision  recall  f1-score  support
                 0     99.095  98.576    98.834   4774.0
                 1     95.402  96.963    96.176    856.0
                 2     96.464  97.517    96.988   1007.0
          accuracy     98.207     NaN       NaN      NaN
         macro avg     96.987  97.685    97.333   6637.0
      weighted avg     98.219  98.207    98.211   6637.0
avg f1 (class 1&2)        NaN     NaN    96.582      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5194
F1-macro: 0.7571
F1-pn: 0.6877
             label  precision  recall  f1-score  support
                 0     91.385  87.825    89.570    616.0
                 1     66.949  71.171    68.996    111.0
                 2     64.667  72.932    68.551    133.0
          accuracy     83.372     NaN       

Epoch 9 - Batches:  20%|██        | 85/415 [00:47<03:05,  1.78it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  21%|██        | 86/415 [00:55<14:18,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5779
F1-macro: 0.7468
F1-pn: 0.6758
             label  precision  recall  f1-score  support
                 0     91.566  86.364    88.889    616.0
                 1     61.069  72.072    66.116    111.0
                 2     65.541  72.932    69.039    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     72.725  77.123    74.681    860.0
      weighted avg     83.605  82.442    82.880    860.0
avg f1 (class 1&2)        NaN     NaN    67.577      NaN


Epoch 9 - Batches:  41%|████      | 170/415 [01:42<02:17,  1.79it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  41%|████      | 171/415 [01:49<10:35,  2.60s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5735
F1-macro: 0.7567
F1-pn: 0.6884
             label  precision  recall  f1-score  support
                 0     91.924  86.851    89.316    616.0
                 1     63.492  72.072    67.511    111.0
                 2     65.789  75.188    70.175    133.0
          accuracy     83.140     NaN       NaN      NaN
         macro avg     73.735  78.037    75.667    860.0
      weighted avg     84.213  83.140    83.541    860.0
avg f1 (class 1&2)        NaN     NaN    68.843      NaN


Epoch 9 - Batches:  50%|█████     | 209/415 [02:10<01:55,  1.78it/s]

evaluate, backprop: Validation


Epoch 9 - Batches:  61%|██████▏   | 255/415 [03:27<01:29,  1.79it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  62%|██████▏   | 256/415 [03:35<06:54,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5842
F1-macro: 0.7623
F1-pn: 0.6942
             label  precision  recall  f1-score  support
                 0     91.290  88.474    89.860    616.0
                 1     69.369  69.369    69.369    111.0
                 2     65.132  74.436    69.474    133.0
          accuracy     83.837     NaN       NaN      NaN
         macro avg     75.264  77.426    76.234    860.0
      weighted avg     84.415  83.837    84.062    860.0
avg f1 (class 1&2)        NaN     NaN    69.422      NaN


Epoch 9 - Batches:  82%|████████▏ | 340/415 [04:22<00:42,  1.78it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  82%|████████▏ | 341/415 [04:29<03:13,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5896
F1-macro: 0.7519
F1-pn: 0.6824
             label  precision  recall  f1-score  support
                 0     91.595  86.688    89.074    616.0
                 1     65.812  69.369    67.544    111.0
                 2     63.125  75.940    68.942    133.0
          accuracy     82.791     NaN       NaN      NaN
         macro avg     73.511  77.333    75.187    860.0
      weighted avg     83.864  82.791    83.182    860.0
avg f1 (class 1&2)        NaN     NaN    68.243      NaN


Epoch 9 - Batches: 100%|██████████| 415/415 [05:11<00:00,  1.33it/s]
INFO:root:Epoch 9, Loss: 0.07719121122149279, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0409
F1-macro: 0.9902
F1-pn: 0.9872
             label  precision  recall  f1-score  support
                 0     99.811  99.393    99.601   4774.0
                 1     98.266  99.299    98.780    856.0
                 2     98.134  99.206    98.667   1007.0
          accuracy     99.352     NaN       NaN      NaN
         macro avg     98.737  99.299    99.016   6637.0
      weighted avg     99.357  99.352    99.353   6637.0
avg f1 (class 1&2)        NaN     NaN    98.723      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5697
F1-macro: 0.7513
F1-pn: 0.6779
             label  precision  recall  f1-score  support
                 0     90.049  89.610    89.829    616.0
                 1     69.307  63.063    66.038    111.0
                 2     66.438  72.932    69.534    133.0
          accuracy     83.605     NaN       

Epoch 10 - Batches:  20%|██        | 85/415 [00:47<03:04,  1.79it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  21%|██        | 86/415 [00:55<14:17,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6271
F1-macro: 0.7564
F1-pn: 0.6881
             label  precision  recall  f1-score  support
                 0     91.924  86.851    89.316    616.0
                 1     63.710  71.171    67.234    111.0
                 2     65.584  75.940    70.383    133.0
          accuracy     83.140     NaN       NaN      NaN
         macro avg     73.739  77.987    75.644    860.0
      weighted avg     84.209  83.140    83.538    860.0
avg f1 (class 1&2)        NaN     NaN    68.809      NaN


Epoch 10 - Batches:  41%|████      | 170/415 [01:42<02:17,  1.78it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  41%|████      | 171/415 [01:49<10:36,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6291
F1-macro: 0.7543
F1-pn: 0.6848
             label  precision  recall  f1-score  support
                 0     91.201  87.500    89.312    616.0
                 1     65.254  69.369    67.249    111.0
                 2     65.563  74.436    69.718    133.0
          accuracy     83.140     NaN       NaN      NaN
         macro avg     74.006  77.102    75.427    860.0
      weighted avg     83.887  83.140    83.434    860.0
avg f1 (class 1&2)        NaN     NaN    68.484      NaN


Epoch 10 - Batches:  50%|█████     | 209/415 [02:10<01:55,  1.78it/s]

evaluate, backprop: Validation


Epoch 10 - Batches:  61%|██████▏   | 255/415 [03:28<01:29,  1.78it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  62%|██████▏   | 256/415 [03:35<06:54,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6287
F1-macro: 0.7591
F1-pn: 0.6888
             label  precision  recall  f1-score  support
                 0     90.344  89.610    89.976    616.0
                 1     69.811  66.667    68.203    111.0
                 2     67.133  72.180    69.565    133.0
          accuracy     83.953     NaN       NaN      NaN
         macro avg     75.763  76.153    75.915    860.0
      weighted avg     84.104  83.953    84.009    860.0
avg f1 (class 1&2)        NaN     NaN    68.884      NaN


Epoch 10 - Batches:  82%|████████▏ | 340/415 [04:22<00:42,  1.78it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  82%|████████▏ | 341/415 [04:30<03:13,  2.61s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6378
F1-macro: 0.7519
F1-pn: 0.6810
             label  precision  recall  f1-score  support
                 0     90.787  87.987    89.365    616.0
                 1     64.957  68.468    66.667    111.0
                 2     66.438  72.932    69.534    133.0
          accuracy     83.140     NaN       NaN      NaN
         macro avg     74.061  76.463    75.189    860.0
      weighted avg     83.688  83.140    83.369    860.0
avg f1 (class 1&2)        NaN     NaN    68.100      NaN


Epoch 10 - Batches: 100%|██████████| 415/415 [05:11<00:00,  1.33it/s]
INFO:root:Epoch 10, Loss: 0.03793388788969851, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0182
F1-macro: 0.9961
F1-pn: 0.9950
             label  precision  recall  f1-score  support
                 0     99.958  99.686    99.822   4774.0
                 1     99.303  99.883    99.592    856.0
                 2     99.015  99.801    99.407   1007.0
          accuracy     99.729     NaN       NaN      NaN
         macro avg     99.425  99.790    99.607   6637.0
      weighted avg     99.730  99.729    99.729   6637.0
avg f1 (class 1&2)        NaN     NaN    99.499      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6560
F1-macro: 0.7552
F1-pn: 0.6854
             label  precision  recall  f1-score  support
                 0     91.371  87.662    89.478    616.0
                 1     65.546  70.270    67.826    111.0
                 2     65.333  73.684    69.258    133.0
          accuracy     83.256     NaN       

In [11]:
import torch
from transformers import AutoTokenizer, AutoModel

# Загружаем токенизатор и модель
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Добавим специальные токены
special_tokens = {"additional_special_tokens": ["<en>", "</en>", " <|company|>"]}
tokenizer.add_special_tokens(special_tokens)
model.resize_token_embeddings(len(tokenizer))

# Пример текста
text = "This is <en> Apple  <|company|> </en>."

# Токенизация
encoding = tokenizer(text, return_tensors='pt')
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']

# Пропускаем через модель
with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    sequence_output = outputs.last_hidden_state

# Получаем ID спецтокенов
en_token_id = tokenizer.convert_tokens_to_ids("<en>")
end_en_token_id = tokenizer.convert_tokens_to_ids("</en>")

# Извлекаем эмбеддинги сущности
input_id = input_ids[0]
output = sequence_output[0]

# Ищем индексы <en> и </en>
start = (input_id == en_token_id).nonzero(as_tuple=True)[0].item()
end = (input_id == end_en_token_id).nonzero(as_tuple=True)[0].item()

# Эмбеддинги между <en> и </en>
entity_tokens = output[start + 1:end]

print(f"Токены сущности: {tokenizer.convert_ids_to_tokens(input_id[start+1:end])}")
print(f"Форма эмбеддингов: {entity_tokens.shape}")


# Среднее по токенам сущности
entity_embedding = entity_tokens.mean(dim=0)
print(f"Размерность итогового эмбеддинга сущности: {entity_embedding.shape}")

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Токены сущности: ['apple', ' <|company|>']
Форма эмбеддингов: torch.Size([2, 768])
Размерность итогового эмбеддинга сущности: torch.Size([768])
