In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import argparse
import logging
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import f1_score, classification_report
from torch.utils.data import DataLoader, TensorDataset
import sys
from tqdm import tqdm, trange
import os
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CyclicLR


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import RobertaModel
import torch
import torch.nn as nn
from transformers.modeling_outputs import SequenceClassifierOutput

class RobertaWithMultiSampleDropout(nn.Module):
    def __init__(self, model_name, num_labels, dropout_rate=0.3, num_dropouts=5, use_multi_sample_dropout=True):
        super(RobertaWithMultiSampleDropout, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.use_multi_sample_dropout = use_multi_sample_dropout

        if self.use_multi_sample_dropout:
            self.dropouts = nn.ModuleList([
                nn.Dropout(dropout_rate) for _ in range(num_dropouts)
            ])
        else:
            self.dropout = nn.Dropout(dropout_rate)

        self.classifier = nn.Linear(self.roberta.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )
        sequence_output = outputs[0]  # последний скрытый слой

        if self.use_multi_sample_dropout:
            logits_list = []
            for dropout in self.dropouts:
                dropped = dropout(sequence_output[:, 0, :])  # [CLS]-токен
                logits_list.append(self.classifier(dropped))
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
        else:
            dropped = self.dropout(sequence_output[:, 0, :])  # [CLS]-токен
            avg_logits = self.classifier(dropped)

        return SequenceClassifierOutput(logits=avg_logits)


In [3]:
def save_metrics_best(epoch, all_labels, all_preds, result_path, backprop, loss):
    import os
    import pandas as pd
    from sklearn.metrics import classification_report

    metrics_file = os.path.join(result_path, "metrics_best.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)

    # Создаем словарь с метриками
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop,
        "loss": loss
    }

    # Добавляем метрики по каждому классу
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]

    # Создаем DataFrame и сохраняем его в CSV (перезапись файла)
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='w', header=True, index=False)

In [4]:
def compute_ens_weights(train_labels, beta: float = 0.999):
    train_labels = np.array(train_labels)
    classes, counts = np.unique(train_labels, return_counts=True)
    effective_num = (1 - np.power(beta, counts)) / (1 - beta)
    weights = 1.0 / effective_num
    weights = weights / np.sum(weights) * len(classes)  # нормализация как в оригинальной статье
    return torch.tensor(weights, dtype=torch.float)

In [5]:
def get_entity_tags_from_files(*file_paths):
    all_tags = set()
    for path in file_paths:
        try:
            df = pd.read_csv(path, sep='\t')
            if "entity_tag" in df.columns:
                tags = df["entity_tag"].dropna().unique().tolist()
                all_tags.update(tags)
        except Exception as e:
            print(f"[Ошибка] Не удалось загрузить {path}: {e}")
    return sorted(list(all_tags))

In [6]:
def load_data(file_path, tokenizer, max_seq_len):
    try:
        df = pd.read_csv(file_path, sep='\t')  # Загружаем CSV (TSV)

        # Проверка нужных колонок
        required_columns = {
            "sentence", "entity", "label", "entity_tag",
            "entity_pos_start_rel", "entity_pos_end_rel"
        }
        if not required_columns.issubset(df.columns):
            raise ValueError(f"Ожидаемые колонки: {required_columns}, но в файле: {df.columns}")

        # Заменяем -1 на 2 (если есть)
        df["label"] = df["label"].replace(-1, 2)

        # Размечаем предложения с [ENTITY] токенами
        def mark_entity_inline(row):
            sent = row["sentence"]
            start = row["entity_pos_start_rel"]
            end = row["entity_pos_end_rel"]
            tag = row["entity_tag"]
            tag_token = f"<en>"
            tag_token_close = f"</en>"
            return (
                sent[:start] +
                f"{tag_token}" + sent[start:end] + f"{tag_token_close}" +
                sent[end:]
            )

        df["input_text"] = df.apply(mark_entity_inline, axis=1)
        texts = df["input_text"].tolist()
        labels = torch.tensor(df["label"].astype(int).tolist(), dtype=torch.long)

        # Токенизируем
        encodings = tokenizer(texts, padding=True, truncation=True,
                              max_length=max_seq_len, return_tensors='pt')
        dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'], labels)
        return dataset

    except FileNotFoundError:
        logging.error(f"Файл {file_path} не найден!")
        return None

In [7]:

def save_metrics(epoch, all_labels, all_preds, result_path, backprop, loss):
    metrics_file = os.path.join(result_path, "metrics_class_weights.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)
    
    # Создаем DataFrame для метрик
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop , # (Добавлено)
        "loss": loss
    }
    
    # Добавляем метрики для каждого класса
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):  # Пропускаем 'accuracy', так как это float
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]
    
    # Создаем DataFrame и сохраняем в CSV
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='a', header=not os.path.exists(metrics_file), index=False)

def save_confusion_matrix(epoch, y_true, y_pred, result_path,backprop):
    cm = confusion_matrix(y_true, y_pred)
    cm_df = pd.DataFrame(cm, columns=["Pred_0", "Pred_1", "Pred_2"], index=["True_0", "True_1", "True_2"])
    cm_file = os.path.join(result_path, f"confusion_matrix_class_weights_epoch_{epoch}_backprop_{backprop}.csv")
    cm_df.to_csv(cm_file)



In [8]:
from sklearn.metrics import classification_report
import pandas as pd

def castom_classification_report(all_labels, all_preds):
    report = classification_report(all_labels, all_preds, output_dict=True)
    data = []
    for label, metrics in report.items():
        if isinstance(metrics, dict):  # Для всех метрик, кроме accuracy
            row = {'label': label}
            for metric, value in metrics.items():
                row[metric] = round(value * 100, 3) if metric != 'support' else value
            data.append(row)
        else:  # Для accuracy
            data.append({'label': 'accuracy', 'precision': round(metrics * 100, 3), 'recall': None, 'f1-score': None, 'support': None})

    # Добавляем macro F1 для классов 1 и 2
    if '1' in report and '2' in report:
        f1_1 = report['1']['f1-score']
        f1_2 = report['2']['f1-score']
        f1_macro_1_2 = (f1_1 + f1_2) / 2
        data.append({
            'label': 'avg f1 (class 1&2)',
            'precision': None,
            'recall': None,
            'f1-score': round(f1_macro_1_2 * 100, 3),
            'support': None
        })
    
    # Создаем DataFrame
    df = pd.DataFrame(data)
    df.index = [''] * len(df)

    # Выводим таблицу
    print(df.to_string(index=False))

In [9]:
def add_token(flag = False):
    # Формируем список специальных токенов
    if not flag: return None
    special_tokens = {
        "additional_special_tokens": 
            ["<en>", "</en>"]
    }

    print("Добавленные специальные токены:")
    for token in special_tokens["additional_special_tokens"]:
        print(token)
    return special_tokens

def save_contrel_date(tokenizer, train_dataset):
    special_tokens = add_token(flag = True)
    if special_tokens != None: 
        tokenizer.add_special_tokens(special_tokens)
    tokenizer.save_pretrained(os.path.join(args.result, "tokenizer"))
    print(tokenizer.special_tokens_map)
    print(tokenizer.additional_special_tokens)

    
    file_control_text = os.path.join(args.result, "use_market_text.txt")
    input_ids, attention_mask, label = train_dataset[0]
    decoded_text_token = tokenizer.decode(input_ids, skip_special_tokens=False)
    decoded_text = tokenizer.decode(input_ids, skip_special_tokens=True)
    with open(file_control_text, "w") as f:
        f.write(f"Text token:\n{decoded_text_token}\n\n")
        f.write(f"Text:\n{decoded_text}\n\n")
    print(f"Text token:\n{decoded_text_token}\n\n")
    print(f"Text:\n{decoded_text}\n\n")

In [10]:

lr_m = 1e-6
# lr = 1e-6 началось обучение, уменьшение TrainError
# добавить волидацию по шагам в течение эпохи обучения, а не каджые N епох
# Добавить сохранение ошибки в файлы для построения графиков
# проверить методы передачи сущьности (параетром, +тип, или в тексте выделяя тегами).
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name", type=str, default="./ruRoberta-large/")
    #parser.add_argument("--model_name", type=str, default="sberbank-ai/ruRoberta-large", help="Имя модели")
    parser.add_argument("--max_seq_len", type=int, default=512) #128
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--lr", type=float, default=lr_m)
    parser.add_argument("--init_checkpoint", type=str, default=None)
    parser.add_argument("--train_data", type=str, required=False, default="./data/train_data.csv")
    parser.add_argument("--validation_data", type=str, required=False, default="./data/validation.csv")
    parser.add_argument("--eval_data", type=str, required=False, default="./data/test.csv")
    parser.add_argument("--result", type=str, default=f"./result_lr_{lr_m}_ENS_Teg_32_multi_dropout_V3_Token/")
    # Игнорируем аргументы Jupyter
    args, unknown = parser.parse_known_args()
    os.makedirs(args.result, exist_ok=True)
    return args


if __name__ == "__main__":
    args = parse_args()
    print(args)  # Проверяем аргументы

    base_lr = args.lr
    min_lr = base_lr * (1/4)
    step_size_up = 400 #400

    best_avg_f1 = 0.0
    
    best_model_path = os.path.join(args.result, "best_model.pth")

    SEED = 42
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    
    logging.basicConfig(level=logging.INFO)

    tokenizer = RobertaTokenizer.from_pretrained(args.model_name)
    #model = RobertaForSequenceClassification.from_pretrained(args.model_name, num_labels=3)
    model = RobertaWithMultiSampleDropout(model_name='./ruRoberta-large', num_labels=3, use_multi_sample_dropout=True )


    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)


    train_dataset = load_data(args.train_data, tokenizer, args.max_seq_len)
    validation_data = load_data(args.validation_data, tokenizer, args.max_seq_len)
    eval_dataset = load_data(args.eval_data, tokenizer, args.max_seq_len)

    
    if train_dataset is None or eval_dataset is None or validation_data is None:
        sys.exit(f"Ошибка загрузки данных: убедитесь, что файлы {args.train_data}, {args.validation_data} и {args.eval_data} существуют и содержат нужные колонки.")

    #сохранение текста и токенов, с дабовлением собственных
    save_contrel_date(tokenizer,train_dataset)
    model.roberta.resize_token_embeddings(len(tokenizer))
    # Получаем метки классов из тренировочного датасета
    train_labels = [label.item() for _, _, label in train_dataset]

    # Вычисляем веса классов
    class_weights = compute_ens_weights(train_labels, beta=0.999)

    # Вывод весов классов
    print(f"class_weights = {class_weights}")
    
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    validation_loader = DataLoader(validation_data, batch_size=args.batch_size)
    eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size)
    
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    
    #scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr = base_lr, step_size_up = step_size_up, mode="triangular2",cycle_momentum=False )

    loss_fct = nn.CrossEntropyLoss(weight=class_weights.to(device))
    
    def train():
        model.train()
        batches_per_validation = (len(train_loader) // 2)+2
        batches_per_test = (len(train_loader) // 5)+2
        
        for epoch in range(args.epochs):
            print(f"Epoch: {epoch}/{args.epochs}")
            total_loss = 0
            for i,batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1} - Batches")):
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                optimizer.zero_grad()
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                loss.backward()
                optimizer.step()
                #scheduler.step()
                
                total_loss += loss.item()
                if(i % batches_per_validation == 0 and i >50):
                    evaluate(epoch, backprop="Validation", vall_train = True)
                if(i % batches_per_test == 0 and i >50):
                    evaluate(epoch, backprop="Test", flag_print = True)
                
            logging.info(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}, Step: {len(train_loader)}")
            # Валидация с обратным распространением ошибки каждые 2 эпохи
            evaluate(epoch , backprop="Train", flag_print = True)
            evaluate(epoch , backprop="Test", flag_print = True)
            evaluate(epoch , backprop="Validation", flag_print = True, vall_train = True)
            
            
    
    def evaluate(epoch = None, backprop = "None", flag_print = False, vall_train = False):
        global best_avg_f1
        model.eval()
        all_preds, all_labels = [], []
        total_loss = 0
        print(f"evaluate, backprop: {backprop}")
        loader = validation_loader
        if backprop == "Validation":
            loader = validation_loader
        elif backprop == "Test":
            loader = eval_loader
        elif backprop == "Train":
            loader = train_loader
        
        else: loader = eval_loader
        with torch.no_grad() if not backprop == "Validation" else torch.enable_grad():  # Включаем градиенты для обучения на валидации
            for batch in loader:
            #for batch in loader:
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                
                
                
                if backprop == "Validation" and vall_train:
                    #уменьшая тк обучающие данные удвоились
                    loss = loss
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    #scheduler.step()
                    loss = loss

                preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())
                total_loss += loss.item()
                
        avg_loss = total_loss / len(loader)
        f1 = f1_score(all_labels, all_preds, average='macro')
        f1_pn = f1_score(all_labels, all_preds, labels=[1, 2], average='macro')
        avg_f1 = (f1 + f1_pn) / 2

        if(flag_print):
            print()
            print("--"*20)
            print(f"Result evaluate in {backprop}")
            #logging.info(f"{backprop} Loss: {avg_loss:.4f}")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")
            #print(classification_report(all_labels, all_preds))
            castom_classification_report(all_labels, all_preds)

        # 💾 Сохраняем модель только при улучшении средней метрики
        if (avg_f1 > best_avg_f1) and backprop == "Test":
            best_avg_f1 = avg_f1
            torch.save(model.state_dict(), best_model_path)
            
            print(f"[Checkpoint] 🎯 Новый лучший средний F1: {best_avg_f1:.4f}")
            print(f"[Checkpoint] 💾 Модель сохранена: {best_model_path}")
            # 🔸 Сохраняем значение лучшего F1 в файл
            best_score_path = os.path.join(args.result, "best_score.txt")
            save_metrics_best(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
            save_confusion_matrix(epoch, all_labels, all_preds, args.result, backprop)
            with open(best_score_path, "w") as f:
                f.write(f"Epoch: {epoch}\n")
                f.write(f"Loss: {avg_loss:.4f}\n")
                f.write(f"F1-pn0: {f1:.4f}\n")
                f.write(f"F1-pn: {f1_pn:.4f}\n")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")


        if epoch is not None:
            save_metrics(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
        
    if args.init_checkpoint:
        model.load_state_dict(torch.load(args.init_checkpoint, map_location=device))

    train()

Namespace(model_name='./ruRoberta-large/', max_seq_len=512, batch_size=16, epochs=10, lr=1e-06, init_checkpoint=None, train_data='./data/train_data.csv', validation_data='./data/validation.csv', eval_data='./data/test.csv', result='./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/')


Some weights of RobertaModel were not initialized from the model checkpoint at ./ruRoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Добавленные специальные токены:
<en>
</en>
{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>', 'additional_special_tokens': ['<en>', '</en>']}
['<en>', '</en>']
Text token:
<s>Джеймс «Бадди» Макгирт (James (Buddy) McGirt, тренер Дадашева упрашивал дагестанского <en> спортсмена </en> остановить бой, но тот хотел продолжать.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

Epoch 1 - Batches:  20%|██        | 85/415 [00:44<02:52,  1.92it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.8095
F1-macro: 0.2911
F1-pn: 0.0210
             label  precision  recall  f1-score  support
                 0     71.731  98.864    83.140    616.0
                 1      0.000   0.000     0.000    111.0
                 2     30.000   2.256     4.196    133.0
          accuracy     71.163     NaN       NaN      NaN
         macro avg     33.910  33.706    29.112    860.0
      weighted avg     56.019  71.163    60.200    860.0
avg f1 (class 1&2)        NaN     NaN     2.098      NaN


Epoch 1 - Batches:  21%|██        | 86/415 [00:53<16:19,  2.98s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.1560
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.8095
F1-macro: 0.2911
F1-pn: 0.0210


Epoch 1 - Batches:  41%|████      | 170/415 [01:36<02:07,  1.92it/s]

evaluate, backprop: Test


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  41%|████      | 171/415 [01:43<09:58,  2.45s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7588
F1-macro: 0.2837
F1-pn: 0.0088
             label  precision  recall  f1-score  support
                 0     71.645  99.675    83.367    616.0
                 1     33.333   0.901     1.754    111.0
                 2      0.000   0.000     0.000    133.0
          accuracy     71.512     NaN       NaN      NaN
         macro avg     34.993  33.525    28.374    860.0
      weighted avg     55.620  71.512    59.941    860.0
avg f1 (class 1&2)        NaN     NaN     0.877      NaN


Epoch 1 - Batches:  50%|█████     | 209/415 [02:03<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 1 - Batches:  61%|██████▏   | 255/415 [03:13<01:22,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7456
F1-macro: 0.3163
F1-pn: 0.0555
             label  precision  recall  f1-score  support
                 0     72.373  99.513    83.800    616.0
                 1     57.143   3.604     6.780    111.0
                 2     50.000   2.256     4.317    133.0
          accuracy     72.093     NaN       NaN      NaN
         macro avg     59.839  35.124    31.632    860.0
      weighted avg     66.947  72.093    61.567    860.0
avg f1 (class 1&2)        NaN     NaN     5.548      NaN


Epoch 1 - Batches:  62%|██████▏   | 256/415 [03:23<09:16,  3.50s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.1859
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.7456
F1-macro: 0.3163
F1-pn: 0.0555


Epoch 1 - Batches:  82%|████████▏ | 340/415 [04:07<00:38,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7031
F1-macro: 0.3661
F1-pn: 0.1315
             label  precision  recall  f1-score  support
                 0     73.026  97.565    83.530    616.0
                 1     80.000   3.604     6.897    111.0
                 2     50.000  12.030    19.394    133.0
          accuracy     72.209     NaN       NaN      NaN
         macro avg     67.675  37.733    36.607    860.0
      weighted avg     70.365  72.209    63.720    860.0
avg f1 (class 1&2)        NaN     NaN    13.145      NaN


Epoch 1 - Batches:  82%|████████▏ | 341/415 [04:19<04:45,  3.86s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.2488
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.7031
F1-macro: 0.3661
F1-pn: 0.1315


Epoch 1 - Batches: 100%|██████████| 415/415 [04:57<00:00,  1.39it/s]
INFO:root:Epoch 1, Loss: 0.8879664482840572, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.7598
F1-macro: 0.4211
F1-pn: 0.2098
             label  precision  recall  f1-score  support
                 0     74.729  96.816    84.351   4774.0
                 1     68.750   5.140     9.565    856.0
                 2     58.247  22.443    32.401   1007.0
          accuracy     73.708     NaN       NaN      NaN
         macro avg     67.242  41.466    42.106   6637.0
      weighted avg     71.457  73.708    66.823   6637.0
avg f1 (class 1&2)        NaN     NaN    20.983      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6799
F1-macro: 0.4016
F1-pn: 0.1848
             label  precision  recall  f1-score  support
                 0     73.756  96.266    83.521    616.0
                 1     83.333   4.505     8.547    111.0
                 2     52.000  19.549    28.415    133.0
          accuracy     72.558     NaN       

Epoch 2 - Batches:  20%|██        | 85/415 [00:44<02:51,  1.92it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6488
F1-macro: 0.4850
F1-pn: 0.3067
             label  precision  recall  f1-score  support
                 0     75.880  94.481    84.165    616.0
                 1     64.000  14.414    23.529    111.0
                 2     55.882  28.571    37.811    133.0
          accuracy     73.953     NaN       NaN      NaN
         macro avg     65.254  45.822    48.502    860.0
      weighted avg     71.254  73.953    69.170    860.0
avg f1 (class 1&2)        NaN     NaN    30.670      NaN


Epoch 2 - Batches:  21%|██        | 86/415 [00:54<19:05,  3.48s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.3959
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.6488
F1-macro: 0.4850
F1-pn: 0.3067


Epoch 2 - Batches:  41%|████      | 170/415 [01:38<02:07,  1.92it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6379
F1-macro: 0.5887
F1-pn: 0.4585
             label  precision  recall  f1-score  support
                 0     80.406  89.935    84.904    616.0
                 1     56.452  31.532    40.462    111.0
                 2     56.881  46.617    51.240    133.0
          accuracy     75.698     NaN       NaN      NaN
         macro avg     64.580  56.028    58.869    860.0
      weighted avg     73.676  75.698    73.962    860.0
avg f1 (class 1&2)        NaN     NaN    45.851      NaN


Epoch 2 - Batches:  41%|████      | 171/415 [01:48<14:11,  3.49s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.5236
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.6379
F1-macro: 0.5887
F1-pn: 0.4585


Epoch 2 - Batches:  50%|█████     | 209/415 [02:08<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 2 - Batches:  61%|██████▏   | 255/415 [03:18<01:22,  1.93it/s]

evaluate, backprop: Test


Epoch 2 - Batches:  62%|██████▏   | 256/415 [03:25<06:30,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5966
F1-macro: 0.5381
F1-pn: 0.3846
             label  precision  recall  f1-score  support
                 0     78.099  92.045    84.501    616.0
                 1     66.667  16.216    26.087    111.0
                 2     57.009  45.865    50.833    133.0
          accuracy     75.116     NaN       NaN      NaN
         macro avg     67.258  51.375    53.807    860.0
      weighted avg     73.362  75.116    71.755    860.0
avg f1 (class 1&2)        NaN     NaN    38.460      NaN


Epoch 2 - Batches:  82%|████████▏ | 340/415 [04:09<00:38,  1.93it/s]

evaluate, backprop: Test


Epoch 2 - Batches:  82%|████████▏ | 341/415 [04:16<03:01,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5712
F1-macro: 0.5801
F1-pn: 0.4442
             label  precision  recall  f1-score  support
                 0     79.745  91.396    85.174    616.0
                 1     66.667  25.225    36.601    111.0
                 2     57.143  48.120    52.245    133.0
          accuracy     76.163     NaN       NaN      NaN
         macro avg     67.852  54.914    58.007    860.0
      weighted avg     74.562  76.163    73.812    860.0
avg f1 (class 1&2)        NaN     NaN    44.423      NaN


Epoch 2 - Batches: 100%|██████████| 415/415 [04:54<00:00,  1.41it/s]
INFO:root:Epoch 2, Loss: 0.7206249496304845, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.5738
F1-macro: 0.6740
F1-pn: 0.5728
             label  precision  recall  f1-score  support
                 0     85.425  89.987    87.647   4774.0
                 1     70.507  35.748    47.442    856.0
                 2     62.351  72.691    67.125   1007.0
          accuracy     80.368     NaN       NaN      NaN
         macro avg     72.761  66.142    67.405   6637.0
      weighted avg     80.000  80.368    79.348   6637.0
avg f1 (class 1&2)        NaN     NaN    57.284      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5647
F1-macro: 0.6073
F1-pn: 0.4838
             label  precision  recall  f1-score  support
                 0     83.002  87.987    85.422    616.0
                 1     66.667  27.027    38.462    111.0
                 2     53.086  64.662    58.305    133.0
          accuracy     76.512     NaN       

Epoch 3 - Batches:  20%|██        | 85/415 [00:44<02:52,  1.91it/s]

evaluate, backprop: Test


Epoch 3 - Batches:  21%|██        | 86/415 [00:51<13:29,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5433
F1-macro: 0.6034
F1-pn: 0.4743
             label  precision  recall  f1-score  support
                 0     81.871  90.909    86.154    616.0
                 1     60.784  27.928    38.272    111.0
                 2     58.400  54.887    56.589    133.0
          accuracy     77.209     NaN       NaN      NaN
         macro avg     67.019  57.908    60.338    860.0
      weighted avg     75.520  77.209    75.401    860.0
avg f1 (class 1&2)        NaN     NaN    47.430      NaN


Epoch 3 - Batches:  41%|████      | 170/415 [01:34<02:07,  1.92it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5271
F1-macro: 0.6311
F1-pn: 0.5135
             label  precision  recall  f1-score  support
                 0     83.661  89.773    86.609    616.0
                 1     53.165  37.838    44.211    111.0
                 2     61.667  55.639    58.498    133.0
          accuracy     77.791     NaN       NaN      NaN
         macro avg     66.164  61.083    63.106    860.0
      weighted avg     76.323  77.791    76.789    860.0
avg f1 (class 1&2)        NaN     NaN    51.354      NaN


Epoch 3 - Batches:  41%|████      | 171/415 [01:45<14:07,  3.47s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.5723
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.5271
F1-macro: 0.6311
F1-pn: 0.5135


Epoch 3 - Batches:  50%|█████     | 209/415 [02:05<01:46,  1.93it/s]

evaluate, backprop: Validation


Epoch 3 - Batches:  61%|██████▏   | 255/415 [03:15<01:23,  1.92it/s]

evaluate, backprop: Test


Epoch 3 - Batches:  62%|██████▏   | 256/415 [03:22<06:31,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5265
F1-macro: 0.6158
F1-pn: 0.4919
             label  precision  recall  f1-score  support
                 0     81.349  92.045    86.367    616.0
                 1     56.944  36.937    44.809    111.0
                 2     65.934  45.113    53.571    133.0
          accuracy     77.674     NaN       NaN      NaN
         macro avg     68.076  58.032    61.582    860.0
      weighted avg     75.815  77.674    75.931    860.0
avg f1 (class 1&2)        NaN     NaN    49.190      NaN


Epoch 3 - Batches:  82%|████████▏ | 340/415 [04:05<00:38,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5018
F1-macro: 0.6476
F1-pn: 0.5373
             label  precision  recall  f1-score  support
                 0     83.939  89.935    86.834    616.0
                 1     63.492  36.036    45.977    111.0
                 2     60.584  62.406    61.481    133.0
          accuracy     78.721     NaN       NaN      NaN
         macro avg     69.338  62.792    64.764    860.0
      weighted avg     77.688  78.721    77.640    860.0
avg f1 (class 1&2)        NaN     NaN    53.729      NaN


Epoch 3 - Batches:  82%|████████▏ | 341/415 [04:16<04:16,  3.46s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.5925
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.5018
F1-macro: 0.6476
F1-pn: 0.5373


Epoch 3 - Batches: 100%|██████████| 415/415 [04:54<00:00,  1.41it/s]
INFO:root:Epoch 3, Loss: 0.5672602651169502, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.4338
F1-macro: 0.7751
F1-pn: 0.7102
             label  precision  recall  f1-score  support
                 0     90.731  90.218    90.474   4774.0
                 1     70.121  60.864    65.166    856.0
                 2     72.188  82.224    76.880   1007.0
          accuracy     85.219     NaN       NaN      NaN
         macro avg     77.680  77.769    77.507   6637.0
      weighted avg     85.259  85.219    85.147   6637.0
avg f1 (class 1&2)        NaN     NaN    71.023      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5006
F1-macro: 0.6796
F1-pn: 0.5841
             label  precision  recall  f1-score  support
                 0     87.914  86.201    87.049    616.0
                 1     56.989  47.748    51.961    111.0
                 2     58.896  72.180    64.865    133.0
          accuracy     79.070     NaN       

Epoch 4 - Batches:  20%|██        | 85/415 [00:44<02:51,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5258
F1-macro: 0.6825
F1-pn: 0.5952
             label  precision  recall  f1-score  support
                 0     90.000  81.818    85.714    616.0
                 1     52.756  60.360    56.303    111.0
                 2     55.491  72.180    62.745    133.0
          accuracy     77.558     NaN       NaN      NaN
         macro avg     66.082  71.453    68.254    860.0
      weighted avg     79.856  77.558    78.366    860.0
avg f1 (class 1&2)        NaN     NaN    59.524      NaN


Epoch 4 - Batches:  21%|██        | 86/415 [00:54<18:58,  3.46s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6389
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.5258
F1-macro: 0.6825
F1-pn: 0.5952


Epoch 4 - Batches:  41%|████      | 170/415 [01:38<02:07,  1.92it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4922
F1-macro: 0.7050
F1-pn: 0.6170
             label  precision  recall  f1-score  support
                 0     87.884  88.312    88.097    616.0
                 1     61.856  54.054    57.692    111.0
                 2     63.194  68.421    65.704    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.978  70.262    70.498    860.0
      weighted avg     80.706  80.814    80.710    860.0
avg f1 (class 1&2)        NaN     NaN    61.698      NaN


Epoch 4 - Batches:  41%|████      | 171/415 [01:48<14:14,  3.50s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6610
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.4922
F1-macro: 0.7050
F1-pn: 0.6170


Epoch 4 - Batches:  50%|█████     | 209/415 [02:08<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 4 - Batches:  61%|██████▏   | 255/415 [03:18<01:22,  1.93it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  62%|██████▏   | 256/415 [03:25<06:31,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4886
F1-macro: 0.6658
F1-pn: 0.5619
             label  precision  recall  f1-score  support
                 0     85.647  89.123    87.351    616.0
                 1     54.023  42.342    47.475    111.0
                 2     65.152  64.662    64.906    133.0
          accuracy     79.302     NaN       NaN      NaN
         macro avg     68.274  65.376    66.577    860.0
      weighted avg     78.396  79.302    78.733    860.0
avg f1 (class 1&2)        NaN     NaN    56.190      NaN


Epoch 4 - Batches:  82%|████████▏ | 340/415 [04:09<00:38,  1.92it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4829
F1-macro: 0.7059
F1-pn: 0.6176
             label  precision  recall  f1-score  support
                 0     89.351  87.175    88.250    616.0
                 1     57.407  55.856    56.621    111.0
                 2     62.914  71.429    66.901    133.0
          accuracy     80.698     NaN       NaN      NaN
         macro avg     69.891  71.487    70.591    860.0
      weighted avg     81.140  80.698    80.866    860.0
avg f1 (class 1&2)        NaN     NaN    61.761      NaN


Epoch 4 - Batches:  82%|████████▏ | 341/415 [04:19<04:17,  3.48s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6618
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.4829
F1-macro: 0.7059
F1-pn: 0.6176


Epoch 4 - Batches: 100%|██████████| 415/415 [04:58<00:00,  1.39it/s]
INFO:root:Epoch 4, Loss: 0.44242986939757706, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.3272
F1-macro: 0.8377
F1-pn: 0.7921
             label  precision  recall  f1-score  support
                 0     93.844  91.956    92.890   4774.0
                 1     77.160  73.014    75.030    856.0
                 2     78.242  89.275    83.395   1007.0
          accuracy     89.107     NaN       NaN      NaN
         macro avg     83.082  84.749    83.772   6637.0
      weighted avg     89.325  89.107    89.146   6637.0
avg f1 (class 1&2)        NaN     NaN    79.213      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4858
F1-macro: 0.6965
F1-pn: 0.6076
             label  precision  recall  f1-score  support
                 0     89.189  85.714    87.417    616.0
                 1     56.190  53.153    54.630    111.0
                 2     60.736  74.436    66.892    133.0
          accuracy     79.767     NaN       

Epoch 5 - Batches:  20%|██        | 85/415 [00:44<02:51,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5211
F1-macro: 0.7073
F1-pn: 0.6258
             label  precision  recall  f1-score  support
                 0     91.727  82.792    87.031    616.0
                 1     57.143  61.261    59.130    111.0
                 2     56.757  78.947    66.038    133.0
          accuracy     79.419     NaN       NaN      NaN
         macro avg     68.542  74.334    70.733    860.0
      weighted avg     81.855  79.419    80.183    860.0
avg f1 (class 1&2)        NaN     NaN    62.584      NaN


Epoch 5 - Batches:  21%|██        | 86/415 [00:54<19:01,  3.47s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6666
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.5211
F1-macro: 0.7073
F1-pn: 0.6258


Epoch 5 - Batches:  41%|████      | 170/415 [01:38<02:07,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4918
F1-macro: 0.7279
F1-pn: 0.6484
             label  precision  recall  f1-score  support
                 0     90.252  87.175    88.687    616.0
                 1     61.062  62.162    61.607    111.0
                 2     63.816  72.932    68.070    133.0
          accuracy     81.744     NaN       NaN      NaN
         macro avg     71.710  74.090    72.788    860.0
      weighted avg     82.396  81.744    82.003    860.0
avg f1 (class 1&2)        NaN     NaN    64.839      NaN


Epoch 5 - Batches:  41%|████      | 171/415 [01:48<13:57,  3.43s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6881
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.4918
F1-macro: 0.7279
F1-pn: 0.6484


Epoch 5 - Batches:  50%|█████     | 209/415 [02:08<01:46,  1.93it/s]

evaluate, backprop: Validation


Epoch 5 - Batches:  61%|██████▏   | 255/415 [03:18<01:22,  1.93it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  62%|██████▏   | 256/415 [03:25<06:30,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5023
F1-macro: 0.7071
F1-pn: 0.6163
             label  precision  recall  f1-score  support
                 0     86.595  91.234    88.854    616.0
                 1     60.606  54.054    57.143    111.0
                 2     72.321  60.902    66.122    133.0
          accuracy     81.744     NaN       NaN      NaN
         macro avg     73.174  68.730    70.706    860.0
      weighted avg     81.033  81.744    81.245    860.0
avg f1 (class 1&2)        NaN     NaN    61.633      NaN


Epoch 5 - Batches:  82%|████████▏ | 340/415 [04:08<00:39,  1.92it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  82%|████████▏ | 341/415 [04:15<03:01,  2.45s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4900
F1-macro: 0.7090
F1-pn: 0.6236
             label  precision  recall  f1-score  support
                 0     87.987  87.987    87.987    616.0
                 1     58.559  58.559    58.559    111.0
                 2     66.165  66.165    66.165    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.904  70.904    70.904    860.0
      weighted avg     80.814  80.814    80.814    860.0
avg f1 (class 1&2)        NaN     NaN    62.362      NaN


Epoch 5 - Batches: 100%|██████████| 415/415 [04:54<00:00,  1.41it/s]
INFO:root:Epoch 5, Loss: 0.3257912185626576, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.2213
F1-macro: 0.9023
F1-pn: 0.8748
             label  precision  recall  f1-score  support
                 0     96.869  94.617    95.730   4774.0
                 1     81.216  87.383    84.187    856.0
                 2     88.794  92.850    90.777   1007.0
          accuracy     93.416     NaN       NaN      NaN
         macro avg     88.960  91.617    90.231   6637.0
      weighted avg     93.625  93.416    93.489   6637.0
avg f1 (class 1&2)        NaN     NaN    87.482      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4907
F1-macro: 0.7241
F1-pn: 0.6439
             label  precision  recall  f1-score  support
                 0     89.933  87.013    88.449    616.0
                 1     58.400  65.766    61.864    111.0
                 2     65.468  68.421    66.912    133.0
          accuracy     81.395     NaN       

Epoch 6 - Batches:  20%|██        | 85/415 [00:44<02:52,  1.91it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  21%|██        | 86/415 [00:51<13:31,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5237
F1-macro: 0.7216
F1-pn: 0.6410
             label  precision  recall  f1-score  support
                 0     90.460  86.201    88.279    616.0
                 1     55.072  68.468    61.044    111.0
                 2     66.667  67.669    67.164    133.0
          accuracy     81.047     NaN       NaN      NaN
         macro avg     70.733  74.113    72.163    860.0
      weighted avg     82.213  81.047    81.499    860.0
avg f1 (class 1&2)        NaN     NaN    64.104      NaN


Epoch 6 - Batches:  41%|████      | 170/415 [01:35<02:07,  1.93it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5075
F1-macro: 0.7346
F1-pn: 0.6600
             label  precision  recall  f1-score  support
                 0     89.782  87.013    88.376    616.0
                 1     62.069  64.865    63.436    111.0
                 2     65.306  72.180    68.571    133.0
          accuracy     81.860     NaN       NaN      NaN
         macro avg     72.386  74.686    73.461    860.0
      weighted avg     82.420  81.860    82.094    860.0
avg f1 (class 1&2)        NaN     NaN    66.004      NaN


Epoch 6 - Batches:  41%|████      | 171/415 [01:45<14:08,  3.48s/it]

[Checkpoint] 🎯 Новый лучший средний F1: 0.6973
[Checkpoint] 💾 Модель сохранена: ./result_lr_1e-06_ENS_Teg_32_multi_dropout_V3_Token/best_model.pth
Loss: 0.5075
F1-macro: 0.7346
F1-pn: 0.6600


Epoch 6 - Batches:  50%|█████     | 209/415 [02:05<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 6 - Batches:  61%|██████▏   | 255/415 [03:15<01:22,  1.93it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  62%|██████▏   | 256/415 [03:22<06:31,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5165
F1-macro: 0.7261
F1-pn: 0.6458
             label  precision  recall  f1-score  support
                 0     88.387  88.961    88.673    616.0
                 1     63.208  60.360    61.751    111.0
                 2     67.164  67.669    67.416    133.0
          accuracy     81.977     NaN       NaN      NaN
         macro avg     72.920  72.330    72.613    860.0
      weighted avg     81.855  81.977    81.911    860.0
avg f1 (class 1&2)        NaN     NaN    64.583      NaN


Epoch 6 - Batches:  82%|████████▏ | 340/415 [04:06<00:38,  1.93it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  82%|████████▏ | 341/415 [04:13<03:02,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5087
F1-macro: 0.7291
F1-pn: 0.6495
             label  precision  recall  f1-score  support
                 0     89.735  87.987    88.852    616.0
                 1     61.947  63.063    62.500    111.0
                 2     65.035  69.925    67.391    133.0
          accuracy     81.977     NaN       NaN      NaN
         macro avg     72.239  73.658    72.915    860.0
      weighted avg     82.329  81.977    82.132    860.0
avg f1 (class 1&2)        NaN     NaN    64.946      NaN


Epoch 6 - Batches: 100%|██████████| 415/415 [04:51<00:00,  1.42it/s]
INFO:root:Epoch 6, Loss: 0.2172641000429909, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.1291
F1-macro: 0.9541
F1-pn: 0.9411
             label  precision  recall  f1-score  support
                 0     97.950  98.094    98.022   4774.0
                 1     92.781  91.589    92.181    856.0
                 2     95.846  96.226    96.036   1007.0
          accuracy     96.972     NaN       NaN      NaN
         macro avg     95.526  95.303    95.413   6637.0
      weighted avg     96.964  96.972    96.967   6637.0
avg f1 (class 1&2)        NaN     NaN    94.108      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5056
F1-macro: 0.7241
F1-pn: 0.6435
             label  precision  recall  f1-score  support
                 0     87.480  89.610    88.532    616.0
                 1     63.107  58.559    60.748    111.0
                 2     69.841  66.165    67.954    133.0
          accuracy     81.977     NaN       

Epoch 7 - Batches:  20%|██        | 85/415 [00:44<02:51,  1.92it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  21%|██        | 86/415 [00:51<13:31,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5684
F1-macro: 0.7205
F1-pn: 0.6387
             label  precision  recall  f1-score  support
                 0     87.821  88.961    88.387    616.0
                 1     62.264  59.459    60.829    111.0
                 2     67.692  66.165    66.920    133.0
          accuracy     81.628     NaN       NaN      NaN
         macro avg     72.592  71.529    72.046    860.0
      weighted avg     81.409  81.628    81.510    860.0
avg f1 (class 1&2)        NaN     NaN    63.875      NaN


Epoch 7 - Batches:  41%|████      | 170/415 [01:35<02:07,  1.92it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  41%|████      | 171/415 [01:42<10:01,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5603
F1-macro: 0.7250
F1-pn: 0.6426
             label  precision  recall  f1-score  support
                 0     88.710  89.286    88.997    616.0
                 1     62.963  61.261    62.100    111.0
                 2     66.667  66.165    66.415    133.0
          accuracy     82.093     NaN       NaN      NaN
         macro avg     72.780  72.237    72.504    860.0
      weighted avg     81.978  82.093    82.033    860.0
avg f1 (class 1&2)        NaN     NaN    64.258      NaN


Epoch 7 - Batches:  50%|█████     | 209/415 [02:01<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 7 - Batches:  61%|██████▏   | 255/415 [03:12<01:23,  1.92it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  62%|██████▏   | 256/415 [03:19<06:31,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5802
F1-macro: 0.7265
F1-pn: 0.6443
             label  precision  recall  f1-score  support
                 0     88.728  89.448    89.086    616.0
                 1     60.345  63.063    61.674    111.0
                 2     69.919  64.662    67.188    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     72.997  72.391    72.649    860.0
      weighted avg     82.156  82.209    82.162    860.0
avg f1 (class 1&2)        NaN     NaN    64.431      NaN


Epoch 7 - Batches:  82%|████████▏ | 340/415 [04:02<00:39,  1.92it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  82%|████████▏ | 341/415 [04:09<03:02,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5677
F1-macro: 0.7296
F1-pn: 0.6518
             label  precision  recall  f1-score  support
                 0     89.535  87.500    88.506    616.0
                 1     57.812  66.667    61.925    111.0
                 2     69.231  67.669    68.441    133.0
          accuracy     81.744     NaN       NaN      NaN
         macro avg     72.193  73.945    72.957    860.0
      weighted avg     82.300  81.744    81.972    860.0
avg f1 (class 1&2)        NaN     NaN    65.183      NaN


Epoch 7 - Batches: 100%|██████████| 415/415 [04:48<00:00,  1.44it/s]
INFO:root:Epoch 7, Loss: 0.11927447977464602, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0586
F1-macro: 0.9825
F1-pn: 0.9772
             label  precision  recall  f1-score  support
                 0     99.683  98.932    99.306   4774.0
                 1     95.588  98.715    97.126    856.0
                 2     97.931  98.709    98.318   1007.0
          accuracy     98.870     NaN       NaN      NaN
         macro avg     97.734  98.785    98.250   6637.0
      weighted avg     98.889  98.870    98.875   6637.0
avg f1 (class 1&2)        NaN     NaN    97.722      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5758
F1-macro: 0.7286
F1-pn: 0.6485
             label  precision  recall  f1-score  support
                 0     88.943  88.799    88.871    616.0
                 1     60.169  63.964    62.009    111.0
                 2     69.291  66.165    67.692    133.0
          accuracy     82.093     NaN       

Epoch 8 - Batches:  20%|██        | 85/415 [00:44<02:52,  1.91it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  21%|██        | 86/415 [00:51<13:32,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6321
F1-macro: 0.7135
F1-pn: 0.6291
             label  precision  recall  f1-score  support
                 0     89.482  87.013    88.230    616.0
                 1     54.412  66.667    59.919    111.0
                 2     68.000  63.910    65.891    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.631  72.530    71.347    860.0
      weighted avg     81.634  80.814    81.122    860.0
avg f1 (class 1&2)        NaN     NaN    62.905      NaN


Epoch 8 - Batches:  41%|████      | 170/415 [01:35<02:07,  1.92it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  41%|████      | 171/415 [01:42<10:01,  2.46s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6314
F1-macro: 0.7212
F1-pn: 0.6377
             label  precision  recall  f1-score  support
                 0     88.548  89.123    88.835    616.0
                 1     60.360  60.360    60.360    111.0
                 2     68.217  66.165    67.176    133.0
          accuracy     81.860     NaN       NaN      NaN
         macro avg     72.375  71.883    72.124    860.0
      weighted avg     81.766  81.860    81.810    860.0
avg f1 (class 1&2)        NaN     NaN    63.768      NaN


Epoch 8 - Batches:  50%|█████     | 209/415 [02:01<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 8 - Batches:  61%|██████▏   | 255/415 [03:11<01:23,  1.92it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  62%|██████▏   | 256/415 [03:18<06:32,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6687
F1-macro: 0.7191
F1-pn: 0.6343
             label  precision  recall  f1-score  support
                 0     88.179  89.610    88.889    616.0
                 1     60.177  61.261    60.714    111.0
                 2     69.421  63.158    66.142    133.0
          accuracy     81.860     NaN       NaN      NaN
         macro avg     72.592  71.343    71.915    860.0
      weighted avg     81.664  81.860    81.735    860.0
avg f1 (class 1&2)        NaN     NaN    63.428      NaN


Epoch 8 - Batches:  82%|████████▏ | 340/415 [04:02<00:39,  1.92it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  82%|████████▏ | 341/415 [04:09<03:02,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6595
F1-macro: 0.7149
F1-pn: 0.6324
             label  precision  recall  f1-score  support
                 0     89.167  86.851    87.993    616.0
                 1     56.452  63.063    59.574    111.0
                 2     66.176  67.669    66.914    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.598  72.528    71.494    860.0
      weighted avg     81.389  80.814    81.066    860.0
avg f1 (class 1&2)        NaN     NaN    63.244      NaN


Epoch 8 - Batches: 100%|██████████| 415/415 [04:48<00:00,  1.44it/s]
INFO:root:Epoch 8, Loss: 0.05299506319956068, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0244
F1-macro: 0.9947
F1-pn: 0.9928
             label  precision  recall  f1-score  support
                 0     99.874  99.811    99.843   4774.0
                 1     99.415  99.182    99.298    856.0
                 2     99.012  99.503    99.257   1007.0
          accuracy     99.684     NaN       NaN      NaN
         macro avg     99.434  99.499    99.466   6637.0
      weighted avg     99.684  99.684    99.684   6637.0
avg f1 (class 1&2)        NaN     NaN    99.278      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6724
F1-macro: 0.7168
F1-pn: 0.6308
             label  precision  recall  f1-score  support
                 0     86.957  90.909    88.889    616.0
                 1     64.211  54.955    59.223    111.0
                 2     70.248  63.910    66.929    133.0
          accuracy     82.093     NaN       

Epoch 9 - Batches:  20%|██        | 85/415 [00:44<02:52,  1.91it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  21%|██        | 86/415 [00:51<13:31,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7239
F1-macro: 0.7116
F1-pn: 0.6273
             label  precision  recall  f1-score  support
                 0     88.380  87.662    88.020    616.0
                 1     56.911  63.063    59.829    111.0
                 2     67.460  63.910    65.637    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.917  71.545    71.162    860.0
      weighted avg     81.083  80.814    80.920    860.0
avg f1 (class 1&2)        NaN     NaN    62.733      NaN


Epoch 9 - Batches:  41%|████      | 170/415 [01:35<02:07,  1.92it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  41%|████      | 171/415 [01:42<10:03,  2.48s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7243
F1-macro: 0.7146
F1-pn: 0.6294
             label  precision  recall  f1-score  support
                 0     88.350  88.636    88.493    616.0
                 1     59.813  57.658    58.716    111.0
                 2     66.667  67.669    67.164    133.0
          accuracy     81.395     NaN       NaN      NaN
         macro avg     71.610  71.321    71.457    860.0
      weighted avg     81.313  81.395    81.351    860.0
avg f1 (class 1&2)        NaN     NaN    62.940      NaN


Epoch 9 - Batches:  50%|█████     | 209/415 [02:01<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 9 - Batches:  61%|██████▏   | 255/415 [03:12<01:23,  1.91it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  62%|██████▏   | 256/415 [03:19<06:33,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7450
F1-macro: 0.7171
F1-pn: 0.6326
             label  precision  recall  f1-score  support
                 0     88.889  88.312    88.599    616.0
                 1     58.824  63.063    60.870    111.0
                 2     66.667  64.662    65.649    133.0
          accuracy     81.395     NaN       NaN      NaN
         macro avg     71.460  72.012    71.706    860.0
      weighted avg     81.572  81.395    81.471    860.0
avg f1 (class 1&2)        NaN     NaN    63.259      NaN


Epoch 9 - Batches:  82%|████████▏ | 340/415 [04:02<00:39,  1.92it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  82%|████████▏ | 341/415 [04:09<03:02,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7166
F1-macro: 0.7266
F1-pn: 0.6448
             label  precision  recall  f1-score  support
                 0     88.585  89.448    89.015    616.0
                 1     63.810  60.360    62.037    111.0
                 2     66.917  66.917    66.917    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     73.104  72.242    72.656    860.0
      weighted avg     82.036  82.209    82.115    860.0
avg f1 (class 1&2)        NaN     NaN    64.477      NaN


Epoch 9 - Batches: 100%|██████████| 415/415 [04:48<00:00,  1.44it/s]
INFO:root:Epoch 9, Loss: 0.02304945271209735, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0120
F1-macro: 0.9983
F1-pn: 0.9978
             label  precision  recall  f1-score  support
                 0     99.979  99.895    99.937   4774.0
                 1     99.534  99.883    99.708    856.0
                 2     99.802  99.901    99.851   1007.0
          accuracy     99.895     NaN       NaN      NaN
         macro avg     99.772  99.893    99.832   6637.0
      weighted avg     99.895  99.895    99.895   6637.0
avg f1 (class 1&2)        NaN     NaN    99.780      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7454
F1-macro: 0.7227
F1-pn: 0.6393
             label  precision  recall  f1-score  support
                 0     89.088  88.799    88.943    616.0
                 1     60.870  63.063    61.947    111.0
                 2     66.412  65.414    65.909    133.0
          accuracy     81.860     NaN       

Epoch 10 - Batches:  20%|██        | 85/415 [00:44<02:51,  1.92it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  21%|██        | 86/415 [00:51<13:33,  2.47s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7893
F1-macro: 0.7266
F1-pn: 0.6441
             label  precision  recall  f1-score  support
                 0     88.997  89.286    89.141    616.0
                 1     62.162  62.162    62.162    111.0
                 2     67.176  66.165    66.667    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     72.778  72.538    72.657    860.0
      weighted avg     82.159  82.209    82.183    860.0
avg f1 (class 1&2)        NaN     NaN    64.414      NaN


Epoch 10 - Batches:  41%|████      | 170/415 [01:35<02:07,  1.92it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  41%|████      | 171/415 [01:42<10:04,  2.48s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.8172
F1-macro: 0.7216
F1-pn: 0.6371
             label  precision  recall  f1-score  support
                 0     88.217  89.935    89.068    616.0
                 1     61.818  61.261    61.538    111.0
                 2     68.852  63.158    65.882    133.0
          accuracy     82.093     NaN       NaN      NaN
         macro avg     72.962  71.451    72.163    860.0
      weighted avg     81.815  82.093    81.929    860.0
avg f1 (class 1&2)        NaN     NaN    63.710      NaN


Epoch 10 - Batches:  50%|█████     | 209/415 [02:01<01:47,  1.92it/s]

evaluate, backprop: Validation


Epoch 10 - Batches:  61%|██████▏   | 255/415 [03:12<01:23,  1.91it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  62%|██████▏   | 256/415 [03:19<06:34,  2.48s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.8424
F1-macro: 0.7113
F1-pn: 0.6264
             label  precision  recall  f1-score  support
                 0     88.525  87.662    88.091    616.0
                 1     56.800  63.964    60.169    111.0
                 2     67.200  63.158    65.116    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.842  71.595    71.126    860.0
      weighted avg     81.132  80.814    80.934    860.0
avg f1 (class 1&2)        NaN     NaN    62.643      NaN


Epoch 10 - Batches:  82%|████████▏ | 340/415 [04:03<00:39,  1.91it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  82%|████████▏ | 341/415 [04:10<03:03,  2.48s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.8347
F1-macro: 0.7263
F1-pn: 0.6425
             label  precision  recall  f1-score  support
                 0     88.658  90.097    89.372    616.0
                 1     62.963  61.261    62.100    111.0
                 2     68.254  64.662    66.409    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     73.292  72.007    72.627    860.0
      weighted avg     82.186  82.442    82.301    860.0
avg f1 (class 1&2)        NaN     NaN    64.255      NaN


Epoch 10 - Batches: 100%|██████████| 415/415 [04:48<00:00,  1.44it/s]
INFO:root:Epoch 10, Loss: 0.010223582985738, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0049
F1-macro: 0.9995
F1-pn: 0.9994
             label  precision  recall  f1-score  support
                 0    100.000  99.958    99.979   4774.0
                 1     99.767 100.000    99.883    856.0
                 2    100.000 100.000   100.000   1007.0
          accuracy     99.970     NaN       NaN      NaN
         macro avg     99.922  99.986    99.954   6637.0
      weighted avg     99.970  99.970    99.970   6637.0
avg f1 (class 1&2)        NaN     NaN    99.942      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.8500
F1-macro: 0.7165
F1-pn: 0.6303
             label  precision  recall  f1-score  support
                 0     88.301  89.448    88.871    616.0
                 1     61.321  58.559    59.908    111.0
                 2     66.923  65.414    66.160    133.0
          accuracy     81.744     NaN       

In [11]:
import pandas as pd

def load_and_display_metrics(metrics_file):
    # Загружаем CSV-файл
    df = pd.read_csv(metrics_file)
    df = df[df["backprop"] == "Test"]
    # Определяем количество эпох
    epochs = df["epoch"].unique()
    print(epochs)
    
    for epoch in epochs:
        # Фильтруем данные по эпохе
        epoch_df = df[df["epoch"] == epoch]
        
        for idx, row in epoch_df.iterrows():
            loss = row["loss"]
            backprop_value = row["backprop"]
            print(f"\nEpoch {epoch} (Backprop: {backprop_value}) (Loss: {loss})\n" + "-"*30)
            # Формируем таблицу в стиле classification_report
            table_data = {}
            class_labels = sorted(
                set(col.split("_")[1] for col in df.columns 
                    if col.startswith("class_") and "precision" in col)
            )
            
            # Фильтруем только числовые метки классов
            class_labels = [label for label in class_labels if label.isdigit()]
            
            for label in class_labels:
                table_data[int(label)] = {
                    "precision": row[f"class_{label}_precision"],
                    "recall": row[f"class_{label}_recall"],
                    "f1-score": row[f"class_{label}_f1"],
                }
            
            # Добавляем средние значения
            table_data["accuracy"] = {"precision": "", "recall": "", "f1-score": row["accuracy"] }
            table_data["macro avg"] = {
                "precision": row["macro_precision"],
                "recall": row["macro_recall"],
                "f1-score": row["macro_f1"],
            }
            table_data["weighted avg"] = {
                "precision": row["weighted_precision"],
                "recall": row["weighted_recall"],
                "f1-score": row["weighted_f1"],
            }
            
            # Выводим таблицу
            df_table = pd.DataFrame.from_dict(table_data, orient="index")
            print(df_table.to_string())

# Пример использования
metrics_file = "./result_lr_1e-06_test/metrics_class_weights.csv"
load_and_display_metrics(metrics_file)

FileNotFoundError: [Errno 2] No such file or directory: './result_lr_1e-06_test/metrics_class_weights.csv'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Загрузка CSV-файла
df = pd.read_csv("metrics_class_weights.csv")  # укажи путь к файлу, если он отличается

# Построение графика loss
plt.figure(figsize=(10, 6))

for stage in ['Train', 'Validation', 'Test']:
    stage_data = df[df['backprop'] == stage]
    plt.plot(stage_data['epoch'], stage_data['loss'], label=stage)

plt.title('Значение Loss по эпохам')
plt.xlabel('Эпоха')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
