In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import argparse
import logging
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import f1_score, classification_report
from torch.utils.data import DataLoader, TensorDataset
import sys
from tqdm import tqdm, trange
import os
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CyclicLR


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import RobertaModel
import torch
import torch.nn as nn
from transformers.modeling_outputs import SequenceClassifierOutput

class RobertaWithMultiSampleDropoutTarget(nn.Module):
    def __init__(self, model_name, num_labels, dropout_rate=0.3, num_dropouts=5, use_multi_sample_dropout=True):
        super(RobertaWithMultiSampleDropoutTarget, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.use_multi_sample_dropout = use_multi_sample_dropout
        self.hidden_size = self.roberta.config.hidden_size

        if self.use_multi_sample_dropout:
            self.dropouts = nn.ModuleList([
                nn.Dropout(dropout_rate) for _ in range(num_dropouts)
            ])
        else:
            self.dropout = nn.Dropout(dropout_rate)

        self.classifier = nn.Linear(self.hidden_size, num_labels)

    def extract_entity_embeddings(self, input_ids, sequence_output, en_token_id, end_en_token_id):
        batch_size = input_ids.size(0)
        entity_representations = []

        for i in range(batch_size):
            input_id = input_ids[i]
            output = sequence_output[i]

            try:
                start = (input_id == en_token_id).nonzero(as_tuple=True)[0].item()
                end = (input_id == end_en_token_id).nonzero(as_tuple=True)[0].item()
            except IndexError:
                # –µ—Å–ª–∏ –≤–¥—Ä—É–≥ —Ç–æ–∫–µ–Ω—ã –Ω–µ –Ω–∞–π–¥–µ–Ω—ã ‚Äî fallback –Ω–∞ [CLS]
                entity_representations.append(output[0])
                continue

            # –≤—ã—Ä–µ–∑–∞–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –º–µ–∂–¥—É <en> –∏ </en>
            entity_tokens = output[start + 1:end]
            if entity_tokens.shape[0] == 0:
                entity_representations.append(output[0])  # fallback –Ω–∞ [CLS]
            else:
                entity_representations.append(entity_tokens.mean(dim=0))  # mean pooling

        return torch.stack(entity_representations)

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
        )
        sequence_output = outputs[0]  # [batch_size, seq_len, hidden_size]

        # --- 1. –ù–∞—Ö–æ–¥–∏–º –ø–æ–∑–∏—Ü–∏–∏ <en> –∏ </en> ---
        start_token_id = tokenizer.convert_tokens_to_ids("<en>")
        end_token_id = tokenizer.convert_tokens_to_ids("</en>")
    
        start_positions = (input_ids == start_token_id).nonzero(as_tuple=False)
        end_positions = (input_ids == end_token_id).nonzero(as_tuple=False)

        # --- 2. –î–ª—è –∫–∞–∂–¥–æ–≥–æ –ø—Ä–∏–º–µ—Ä–∞ –∞–≥—Ä–µ–≥–∏—Ä—É–µ–º hidden states –º–µ–∂–¥—É <en> –∏ </en> ---
        pooled_output = []
        for batch_idx in range(input_ids.size(0)):
            start_pos = start_positions[start_positions[:, 0] == batch_idx][:, 1]
            end_pos = end_positions[end_positions[:, 0] == batch_idx][:, 1]
            if len(start_pos) > 0 and len(end_pos) > 0:
                s, e = start_pos[0].item() + 1, end_pos[0].item()  # –º–µ–∂–¥—É —Ç–µ–≥–∞–º–∏
                if e > s:
                    token_embeds = sequence_output[batch_idx, s:e, :]  # [num_entity_tokens, hidden]
                    pooled = torch.mean(token_embeds, dim=0)  # Mean-pooling
                else:
                    pooled = sequence_output[batch_idx, 0, :]  # fallback to [CLS]
            else:
                pooled = sequence_output[batch_idx, 0, :]  # fallback to [CLS]

            pooled_output.append(pooled)

        pooled_output = torch.stack(pooled_output)  # [batch_size, hidden_size]

        if self.use_multi_sample_dropout:
            logits_list = [self.classifier(dropout(pooled_output)) for dropout in self.dropouts]
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
        else:
            avg_logits = self.classifier(self.dropout(pooled_output))

        return SequenceClassifierOutput(logits=avg_logits)


In [3]:
def save_metrics_best(epoch, all_labels, all_preds, result_path, backprop, loss):
    import os
    import pandas as pd
    from sklearn.metrics import classification_report

    metrics_file = os.path.join(result_path, "metrics_best.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)

    # –°–æ–∑–¥–∞–µ–º —Å–ª–æ–≤–∞—Ä—å —Å –º–µ—Ç—Ä–∏–∫–∞–º–∏
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop,
        "loss": loss
    }

    # –î–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç—Ä–∏–∫–∏ –ø–æ –∫–∞–∂–¥–æ–º—É –∫–ª–∞—Å—Å—É
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]

    # –°–æ–∑–¥–∞–µ–º DataFrame –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º –µ–≥–æ –≤ CSV (–ø–µ—Ä–µ–∑–∞–ø–∏—Å—å —Ñ–∞–π–ª–∞)
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='w', header=True, index=False)

In [4]:
def compute_ens_weights(train_labels, beta: float = 0.999):
    train_labels = np.array(train_labels)
    classes, counts = np.unique(train_labels, return_counts=True)
    effective_num = (1 - np.power(beta, counts)) / (1 - beta)
    weights = 1.0 / effective_num
    weights = weights / np.sum(weights) * len(classes)  # –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è –∫–∞–∫ –≤ –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ
    return torch.tensor(weights, dtype=torch.float)

In [5]:
def get_entity_tags_from_files(*file_paths):
    all_tags = set()
    for path in file_paths:
        try:
            df = pd.read_csv(path, sep='\t')
            if "entity_tag" in df.columns:
                tags = df["entity_tag"].dropna().unique().tolist()
                all_tags.update(tags)
        except Exception as e:
            print(f"[–û—à–∏–±–∫–∞] –ù–µ —É–¥–∞–ª–æ—Å—å –∑–∞–≥—Ä—É–∑–∏—Ç—å {path}: {e}")
    return sorted(list(all_tags))

In [6]:
def load_data(file_path, tokenizer, max_seq_len):
    try:
        df = pd.read_csv(file_path, sep='\t')  # –ó–∞–≥—Ä—É–∂–∞–µ–º CSV (TSV)

        # –ü—Ä–æ–≤–µ—Ä–∫–∞ –Ω—É–∂–Ω—ã—Ö –∫–æ–ª–æ–Ω–æ–∫
        required_columns = {
            "sentence", "entity", "label", "entity_tag",
            "entity_pos_start_rel", "entity_pos_end_rel"
        }
        if not required_columns.issubset(df.columns):
            raise ValueError(f"–û–∂–∏–¥–∞–µ–º—ã–µ –∫–æ–ª–æ–Ω–∫–∏: {required_columns}, –Ω–æ –≤ —Ñ–∞–π–ª–µ: {df.columns}")

        # –ó–∞–º–µ–Ω—è–µ–º -1 –Ω–∞ 2 (–µ—Å–ª–∏ –µ—Å—Ç—å)
        df["label"] = df["label"].replace(-1, 2)

        # –†–∞–∑–º–µ—á–∞–µ–º –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è —Å [ENTITY] —Ç–æ–∫–µ–Ω–∞–º–∏
        def mark_entity_inline(row):
            sent = row["sentence"]
            start = row["entity_pos_start_rel"]
            end = row["entity_pos_end_rel"]
            tag = row["entity_tag"]
            tag_token = f"<en>"
            tag_token_close = f"</en>"
            return (
                sent[:start] +
                f"{tag_token} " + sent[start:end] + f" {tag_token_close}" +
                sent[end:]
            )

        df["input_text"] = df.apply(mark_entity_inline, axis=1)
        texts = df["input_text"].tolist()
        labels = torch.tensor(df["label"].astype(int).tolist(), dtype=torch.long)

        # –¢–æ–∫–µ–Ω–∏–∑–∏—Ä—É–µ–º
        encodings = tokenizer(texts, padding=True, truncation=True,
                              max_length=max_seq_len, return_tensors='pt')
        dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'], labels)
        return dataset

    except FileNotFoundError:
        logging.error(f"–§–∞–π–ª {file_path} –Ω–µ –Ω–∞–π–¥–µ–Ω!")
        return None

In [7]:

def save_metrics(epoch, all_labels, all_preds, result_path, backprop, loss):
    metrics_file = os.path.join(result_path, "metrics_class_weights.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)
    
    # –°–æ–∑–¥–∞–µ–º DataFrame –¥–ª—è –º–µ—Ç—Ä–∏–∫
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop , # (–î–æ–±–∞–≤–ª–µ–Ω–æ)
        "loss": loss
    }
    
    # –î–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç—Ä–∏–∫–∏ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∫–ª–∞—Å—Å–∞
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):  # –ü—Ä–æ–ø—É—Å–∫–∞–µ–º 'accuracy', —Ç–∞–∫ –∫–∞–∫ —ç—Ç–æ float
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]
    
    # –°–æ–∑–¥–∞–µ–º DataFrame –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º –≤ CSV
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='a', header=not os.path.exists(metrics_file), index=False)

def save_confusion_matrix(epoch, y_true, y_pred, result_path,backprop):
    cm = confusion_matrix(y_true, y_pred)
    cm_df = pd.DataFrame(cm, columns=["Pred_0", "Pred_1", "Pred_2"], index=["True_0", "True_1", "True_2"])
    cm_file = os.path.join(result_path, f"confusion_matrix_class_weights_epoch_{epoch}_backprop_{backprop}.csv")
    cm_df.to_csv(cm_file)



In [8]:
from sklearn.metrics import classification_report
import pandas as pd

def castom_classification_report(all_labels, all_preds):
    report = classification_report(all_labels, all_preds, output_dict=True)
    data = []
    for label, metrics in report.items():
        if isinstance(metrics, dict):  # –î–ª—è –≤—Å–µ—Ö –º–µ—Ç—Ä–∏–∫, –∫—Ä–æ–º–µ accuracy
            row = {'label': label}
            for metric, value in metrics.items():
                row[metric] = round(value * 100, 3) if metric != 'support' else value
            data.append(row)
        else:  # –î–ª—è accuracy
            data.append({'label': 'accuracy', 'precision': round(metrics * 100, 3), 'recall': None, 'f1-score': None, 'support': None})

    # –î–æ–±–∞–≤–ª—è–µ–º macro F1 –¥–ª—è –∫–ª–∞—Å—Å–æ–≤ 1 –∏ 2
    if '1' in report and '2' in report:
        f1_1 = report['1']['f1-score']
        f1_2 = report['2']['f1-score']
        f1_macro_1_2 = (f1_1 + f1_2) / 2
        data.append({
            'label': 'avg f1 (class 1&2)',
            'precision': None,
            'recall': None,
            'f1-score': round(f1_macro_1_2 * 100, 3),
            'support': None
        })
    
    # –°–æ–∑–¥–∞–µ–º DataFrame
    df = pd.DataFrame(data)
    df.index = [''] * len(df)

    # –í—ã–≤–æ–¥–∏–º —Ç–∞–±–ª–∏—Ü—É
    print(df.to_string(index=False))

In [9]:
def add_token(flag = False):
    # –§–æ—Ä–º–∏—Ä—É–µ–º —Å–ø–∏—Å–æ–∫ —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã—Ö —Ç–æ–∫–µ–Ω–æ–≤
    if not flag: return None
    special_tokens = {
        "additional_special_tokens": 
            ["<en>", "</en>"]
    }

    print("–î–æ–±–∞–≤–ª–µ–Ω–Ω—ã–µ —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã–µ —Ç–æ–∫–µ–Ω—ã:")
    for token in special_tokens["additional_special_tokens"]:
        print(token)
    return special_tokens

def save_contrel_date(tokenizer, train_dataset):
    special_tokens = add_token(flag = True)
    if special_tokens != None: 
        tokenizer.add_special_tokens(special_tokens)
    tokenizer.save_pretrained(os.path.join(args.result, "tokenizer"))
    print(tokenizer.special_tokens_map)
    print(tokenizer.additional_special_tokens)

    
    file_control_text = os.path.join(args.result, "use_market_text.txt")
    input_ids, attention_mask, label = train_dataset[0]
    decoded_text_token = tokenizer.decode(input_ids, skip_special_tokens=False)
    decoded_text = tokenizer.decode(input_ids, skip_special_tokens=True)
    with open(file_control_text, "w") as f:
        f.write(f"Text token:\n{decoded_text_token}\n\n")
        f.write(f"Text:\n{decoded_text}\n\n")
    print(f"Text token:\n{decoded_text_token}\n\n")
    print(f"Text:\n{decoded_text}\n\n")

In [10]:

lr_m = 1e-6
# lr = 1e-6 –Ω–∞—á–∞–ª–æ—Å—å –æ–±—É—á–µ–Ω–∏–µ, —É–º–µ–Ω—å—à–µ–Ω–∏–µ TrainError
# –¥–æ–±–∞–≤–∏—Ç—å –≤–æ–ª–∏–¥–∞—Ü–∏—é –ø–æ —à–∞–≥–∞–º –≤ —Ç–µ—á–µ–Ω–∏–µ —ç–ø–æ—Ö–∏ –æ–±—É—á–µ–Ω–∏—è, –∞ –Ω–µ –∫–∞–¥–∂—ã–µ N –µ–ø–æ—Ö
# –î–æ–±–∞–≤–∏—Ç—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –æ—à–∏–±–∫–∏ –≤ —Ñ–∞–π–ª—ã –¥–ª—è –ø–æ—Å—Ç—Ä–æ–µ–Ω–∏—è –≥—Ä–∞—Ñ–∏–∫–æ–≤
# –ø—Ä–æ–≤–µ—Ä–∏—Ç—å –º–µ—Ç–æ–¥—ã –ø–µ—Ä–µ–¥–∞—á–∏ —Å—É—â—å–Ω–æ—Å—Ç–∏ (–ø–∞—Ä–∞–µ—Ç—Ä–æ–º, +—Ç–∏–ø, –∏–ª–∏ –≤ —Ç–µ–∫—Å—Ç–µ –≤—ã–¥–µ–ª—è—è —Ç–µ–≥–∞–º–∏).
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name", type=str, default="./../ruRoberta-large/")
    #parser.add_argument("--model_name", type=str, default="sberbank-ai/ruRoberta-large", help="–ò–º—è –º–æ–¥–µ–ª–∏")
    parser.add_argument("--max_seq_len", type=int, default=512) #128
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--lr", type=float, default=lr_m)
    parser.add_argument("--init_checkpoint", type=str, default=None)
    parser.add_argument("--train_data", type=str, required=False, default="./../data/train_data.csv")
    parser.add_argument("--validation_data", type=str, required=False, default="./../data/validation.csv")
    parser.add_argument("--eval_data", type=str, required=False, default="./../data/test.csv")
    parser.add_argument("--result", type=str, default=f"./result_lr_{lr_m}_ENS_Teg_multi_dropout_V3_Token_Target/")
    # –ò–≥–Ω–æ—Ä–∏—Ä—É–µ–º –∞—Ä–≥—É–º–µ–Ω—Ç—ã Jupyter
    args, unknown = parser.parse_known_args()
    os.makedirs(args.result, exist_ok=True)
    return args


if __name__ == "__main__":
    args = parse_args()
    print(args)  # –ü—Ä–æ–≤–µ—Ä—è–µ–º –∞—Ä–≥—É–º–µ–Ω—Ç—ã

    base_lr = args.lr
    min_lr = base_lr * (1/4)
    step_size_up = 400 #400
    best_avg_f1 = 0.0
    
    best_model_path = os.path.join(args.result, "best_model.pth")

    SEED = 42
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    
    logging.basicConfig(level=logging.INFO)

    tokenizer = RobertaTokenizer.from_pretrained(args.model_name)
    #model = RobertaForSequenceClassification.from_pretrained(args.model_name, num_labels=3)
    model = RobertaWithMultiSampleDropoutTarget(model_name='./../ruRoberta-large', num_labels=3, use_multi_sample_dropout=True )
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_dataset = load_data(args.train_data, tokenizer, args.max_seq_len)
    validation_data = load_data(args.validation_data, tokenizer, args.max_seq_len)
    eval_dataset = load_data(args.eval_data, tokenizer, args.max_seq_len)

    if train_dataset is None or eval_dataset is None or validation_data is None:
        sys.exit(f"–û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏ –¥–∞–Ω–Ω—ã—Ö: —É–±–µ–¥–∏—Ç–µ—Å—å, —á—Ç–æ —Ñ–∞–π–ª—ã {args.train_data}, {args.validation_data} –∏ {args.eval_data} —Å—É—â–µ—Å—Ç–≤—É—é—Ç –∏ —Å–æ–¥–µ—Ä–∂–∞—Ç –Ω—É–∂–Ω—ã–µ –∫–æ–ª–æ–Ω–∫–∏.")

    #—Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –∏ —Ç–æ–∫–µ–Ω–æ–≤, —Å –¥–∞–±–æ–≤–ª–µ–Ω–∏–µ–º —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—ã—Ö
    save_contrel_date(tokenizer,train_dataset)
    model.roberta.resize_token_embeddings(len(tokenizer))
    
    embedding_layer = model.roberta.embeddings.word_embeddings
    print(embedding_layer.weight[tokenizer.convert_tokens_to_ids("<en>")])
    
    # –ü–æ–ª—É—á–∞–µ–º –º–µ—Ç–∫–∏ –∫–ª–∞—Å—Å–æ–≤ –∏–∑ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞
    train_labels = [label.item() for _, _, label in train_dataset]

    # –í—ã—á–∏—Å–ª—è–µ–º –≤–µ—Å–∞ –∫–ª–∞—Å—Å–æ–≤
    class_weights = compute_ens_weights(train_labels, beta=0.999)

    # –í—ã–≤–æ–¥ –≤–µ—Å–æ–≤ –∫–ª–∞—Å—Å–æ–≤
    print(f"class_weights = {class_weights}")
    
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    validation_loader = DataLoader(validation_data, batch_size=args.batch_size)
    eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size)
    
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    
    #scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr = base_lr, step_size_up = step_size_up, mode="triangular2",cycle_momentum=False )

    loss_fct = nn.CrossEntropyLoss(weight=class_weights.to(device))
    
    def train():
        model.train()
        batches_per_validation = (len(train_loader) // 2)+2
        batches_per_test = (len(train_loader) // 5)+2
        
        for epoch in range(args.epochs):
            print(f"Epoch: {epoch}/{args.epochs}")
            total_loss = 0
            for i,batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1} - Batches")):
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                optimizer.zero_grad()
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                loss.backward()
                optimizer.step()
                #scheduler.step()
                
                total_loss += loss.item()
                if(i % batches_per_validation == 0 and i >50):
                    evaluate(epoch, backprop="Validation", vall_train = True)
                if(i % batches_per_test == 0 and i >50):
                    evaluate(epoch, backprop="Test", flag_print = True)
                
            logging.info(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}, Step: {len(train_loader)}")
            # –í–∞–ª–∏–¥–∞—Ü–∏—è —Å –æ–±—Ä–∞—Ç–Ω—ã–º —Ä–∞—Å–ø—Ä–æ—Å—Ç—Ä–∞–Ω–µ–Ω–∏–µ–º –æ—à–∏–±–∫–∏ –∫–∞–∂–¥—ã–µ 2 —ç–ø–æ—Ö–∏
            evaluate(epoch , backprop="Train", flag_print = True)
            evaluate(epoch , backprop="Test", flag_print = True)
            evaluate(epoch , backprop="Validation", flag_print = True, vall_train = True)
            
            
    
    def evaluate(epoch = None, backprop = "None", flag_print = False, vall_train = False):
        global best_avg_f1
        model.eval()
        all_preds, all_labels = [], []
        total_loss = 0
        print(f"evaluate, backprop: {backprop}")
        loader = validation_loader
        if backprop == "Validation":
            loader = validation_loader
        elif backprop == "Test":
            loader = eval_loader
        elif backprop == "Train":
            loader = train_loader
        
        else: loader = eval_loader
        with torch.no_grad() if not backprop == "Validation" else torch.enable_grad():  # –í–∫–ª—é—á–∞–µ–º –≥—Ä–∞–¥–∏–µ–Ω—Ç—ã –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –≤–∞–ª–∏–¥–∞—Ü–∏–∏
            for batch in loader:
            #for batch in loader:
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                
                
                
                if backprop == "Validation" and vall_train:
                    #—É–º–µ–Ω—å—à–∞—è —Ç–∫ –æ–±—É—á–∞—é—â–∏–µ –¥–∞–Ω–Ω—ã–µ —É–¥–≤–æ–∏–ª–∏—Å—å
                    loss = loss
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    #scheduler.step()
                    loss = loss

                preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())
                total_loss += loss.item()
                
        avg_loss = total_loss / len(loader)
        f1 = f1_score(all_labels, all_preds, average='macro')
        f1_pn = f1_score(all_labels, all_preds, labels=[1, 2], average='macro')
        avg_f1 = (f1 + f1_pn) / 2

        if(flag_print):
            print()
            print("--"*20)
            print(f"Result evaluate in {backprop}")
            #logging.info(f"{backprop} Loss: {avg_loss:.4f}")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")
            #print(classification_report(all_labels, all_preds))
            castom_classification_report(all_labels, all_preds)

        # üíæ –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–æ–¥–µ–ª—å —Ç–æ–ª—å–∫–æ –ø—Ä–∏ —É–ª—É—á—à–µ–Ω–∏–∏ —Å—Ä–µ–¥–Ω–µ–π –º–µ—Ç—Ä–∏–∫–∏
        if (avg_f1 > best_avg_f1) and backprop == "Test":
            best_avg_f1 = avg_f1
            torch.save(model.state_dict(), best_model_path)
            
            print(f"[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: {best_avg_f1:.4f}")
            print(f"[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: {best_model_path}")
            # üî∏ –°–æ—Ö—Ä–∞–Ω—è–µ–º –∑–Ω–∞—á–µ–Ω–∏–µ –ª—É—á—à–µ–≥–æ F1 –≤ —Ñ–∞–π–ª
            best_score_path = os.path.join(args.result, "best_score.txt")
            save_metrics_best(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
            save_confusion_matrix(epoch, all_labels, all_preds, args.result, backprop)
            with open(best_score_path, "w") as f:
                f.write(f"Epoch: {epoch}\n")
                f.write(f"Loss: {avg_loss:.4f}\n")
                f.write(f"F1-pn0: {f1:.4f}\n")
                f.write(f"F1-pn: {f1_pn:.4f}\n")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")


        if epoch is not None:
            save_metrics(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
        
    if args.init_checkpoint:
        model.load_state_dict(torch.load(args.init_checkpoint, map_location=device))

    train()

Namespace(model_name='./../ruRoberta-large/', max_seq_len=512, batch_size=16, epochs=10, lr=1e-06, init_checkpoint=None, train_data='./../data/train_data.csv', validation_data='./../data/validation.csv', eval_data='./../data/test.csv', result='./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/')


Some weights of RobertaModel were not initialized from the model checkpoint at ./../ruRoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


–î–æ–±–∞–≤–ª–µ–Ω–Ω—ã–µ —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã–µ —Ç–æ–∫–µ–Ω—ã:
<en>
</en>
{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>', 'additional_special_tokens': ['<en>', '</en>']}
['<en>', '</en>']
Text token:
<s>–î–∂–µ–π–º—Å ¬´–ë–∞–¥–¥–∏¬ª –ú–∞–∫–≥–∏—Ä—Ç (James (Buddy) McGirt, —Ç—Ä–µ–Ω–µ—Ä –î–∞–¥–∞—à–µ–≤–∞ —É–ø—Ä–∞—à–∏–≤–∞–ª –¥–∞–≥–µ—Å—Ç–∞–Ω—Å–∫–æ–≥–æ <en> —Å–ø–æ—Ä—Ç—Å–º–µ–Ω–∞ </en> –æ—Å—Ç–∞–Ω–æ–≤–∏—Ç—å –±–æ–π, –Ω–æ —Ç–æ—Ç —Ö–æ—Ç–µ–ª –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><

Epoch 1 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:45<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.8128
F1-macro: 0.2924
F1-pn: 0.0229
             label  precision  recall  f1-score  support
                 0     71.731  98.864    83.140    616.0
                 1     50.000   0.901     1.770    111.0
                 2     22.222   1.504     2.817    133.0
          accuracy     71.163     NaN       NaN      NaN
         macro avg     47.985  33.756    29.242    860.0
      weighted avg     61.270  71.163    60.215    860.0
avg f1 (class 1&2)        NaN     NaN     2.293      NaN


Epoch 1 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:53<16:29,  3.01s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.1577
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.8128
F1-macro: 0.2924
F1-pn: 0.0229


Epoch 1 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:37<02:08,  1.90it/s]

evaluate, backprop: Test


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:44<10:07,  2.49s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.7599
F1-macro: 0.2841
F1-pn: 0.0088
             label  precision  recall  f1-score  support
                 0     71.678  99.838    83.446    616.0
                 1     50.000   0.901     1.770    111.0
                 2      0.000   0.000     0.000    133.0
          accuracy     71.628     NaN       NaN      NaN
         macro avg     40.559  33.580    28.405    860.0
      weighted avg     57.795  71.628    59.999    860.0
avg f1 (class 1&2)        NaN     NaN     0.885      NaN


Epoch 1 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 207/415 [02:03<01:49,  1.90it/s]

evaluate, backprop: Validation


Epoch 1 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:16<01:24,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7547
F1-macro: 0.3246
F1-pn: 0.0693
             label  precision  recall  f1-score  support
                 0     72.381  98.701    83.516    616.0
                 1     50.000   4.505     8.264    111.0
                 2     40.000   3.008     5.594    133.0
          accuracy     71.744     NaN       NaN      NaN
         macro avg     54.127  35.404    32.458    860.0
      weighted avg     64.484  71.744    61.753    860.0
avg f1 (class 1&2)        NaN     NaN     6.929      NaN


Epoch 1 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:26<09:18,  3.51s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.1969
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.7547
F1-macro: 0.3246
F1-pn: 0.0693


Epoch 1 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:10<00:39,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7125
F1-macro: 0.3332
F1-pn: 0.0825
             label  precision  recall  f1-score  support
                 0     72.488  98.377    83.471    616.0
                 1     66.667   1.802     3.509    111.0
                 2     47.619   7.519    12.987    133.0
          accuracy     71.860     NaN       NaN      NaN
         macro avg     62.258  35.899    33.322    860.0
      weighted avg     67.891  71.860    62.250    860.0
avg f1 (class 1&2)        NaN     NaN     8.248      NaN


Epoch 1 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:21<04:20,  3.52s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.2079
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.7125
F1-macro: 0.3332
F1-pn: 0.0825


Epoch 1 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:00<00:00,  1.38it/s]
INFO:root:Epoch 1, Loss: 0.8886837448700364, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.7759
F1-macro: 0.4048
F1-pn: 0.1858
             label  precision  recall  f1-score  support
                 0     74.316  97.340    84.284   4774.0
                 1     68.519   4.322     8.132    856.0
                 2     58.788  19.265    29.020   1007.0
          accuracy     73.497     NaN       NaN      NaN
         macro avg     67.208  40.309    40.479   6637.0
      weighted avg     71.213  73.497    66.077   6637.0
avg f1 (class 1&2)        NaN     NaN    18.576      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6929
F1-macro: 0.3769
F1-pn: 0.1474
             label  precision  recall  f1-score  support
                 0     73.374  97.078    83.578    616.0
                 1     83.333   4.505     8.547    111.0
                 2     46.154  13.534    20.930    133.0
          accuracy     72.209     NaN       

Epoch 2 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:44<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6643
F1-macro: 0.4590
F1-pn: 0.2688
             label  precision  recall  f1-score  support
                 0     75.128  95.130    83.954    616.0
                 1     70.833  15.315    25.185    111.0
                 2     48.214  20.301    28.571    133.0
          accuracy     73.256     NaN       NaN      NaN
         macro avg     64.725  43.582    45.904    860.0
      weighted avg     70.412  73.256    67.804    860.0
avg f1 (class 1&2)        NaN     NaN    26.878      NaN


Epoch 2 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:55<19:29,  3.55s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.3639
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.6643
F1-macro: 0.4590
F1-pn: 0.2688


Epoch 2 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:39<02:09,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6583
F1-macro: 0.5470
F1-pn: 0.4000
             label  precision  recall  f1-score  support
                 0     78.481  90.584    84.099    616.0
                 1     58.000  26.126    36.025    111.0
                 2     51.515  38.346    43.966    133.0
          accuracy     74.186     NaN       NaN      NaN
         macro avg     62.665  51.685    54.697    860.0
      weighted avg     71.667  74.186    71.688    860.0
avg f1 (class 1&2)        NaN     NaN    39.995      NaN


Epoch 2 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:50<14:32,  3.57s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.4735
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.6583
F1-macro: 0.5470
F1-pn: 0.4000


Epoch 2 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:10<01:48,  1.90it/s]

evaluate, backprop: Validation


Epoch 2 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:22<01:24,  1.90it/s]

evaluate, backprop: Test


Epoch 2 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:29<06:38,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6163
F1-macro: 0.5283
F1-pn: 0.3709
             label  precision  recall  f1-score  support
                 0     77.778  92.045    84.312    616.0
                 1     69.231  16.216    26.277    111.0
                 2     54.286  42.857    47.899    133.0
          accuracy     74.651     NaN       NaN      NaN
         macro avg     67.098  50.373    52.830    860.0
      weighted avg     73.042  74.651    71.190    860.0
avg f1 (class 1&2)        NaN     NaN    37.088      NaN


Epoch 2 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:13<00:39,  1.90it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6001
F1-macro: 0.5543
F1-pn: 0.4095
             label  precision  recall  f1-score  support
                 0     78.762  90.909    84.401    616.0
                 1     64.103  22.523    33.333    111.0
                 2     53.636  44.361    48.560    133.0
          accuracy     74.884     NaN       NaN      NaN
         macro avg     65.500  52.598    55.431    860.0
      weighted avg     72.984  74.884    72.267    860.0
avg f1 (class 1&2)        NaN     NaN    40.947      NaN


Epoch 2 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:24<04:21,  3.54s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.4819
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.6001
F1-macro: 0.5543
F1-pn: 0.4095


Epoch 2 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:03<00:00,  1.37it/s]
INFO:root:Epoch 2, Loss: 0.7459811371493053, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.6267
F1-macro: 0.6291
F1-pn: 0.5134
             label  precision  recall  f1-score  support
                 0     82.887  89.485    86.060   4774.0
                 1     68.217  30.841    42.478    856.0
                 2     57.755  62.860    60.200   1007.0
          accuracy     77.882     NaN       NaN      NaN
         macro avg     69.620  61.062    62.912   6637.0
      weighted avg     77.182  77.882    76.515   6637.0
avg f1 (class 1&2)        NaN     NaN    51.339      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5950
F1-macro: 0.5758
F1-pn: 0.4423
             label  precision  recall  f1-score  support
                 0     80.896  87.987    84.292    616.0
                 1     63.636  25.225    36.129    111.0
                 2     50.000  54.887    52.330    133.0
          accuracy     74.767     NaN       

Epoch 3 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:44<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5697
F1-macro: 0.5905
F1-pn: 0.4580
             label  precision  recall  f1-score  support
                 0     80.516  91.234    85.540    616.0
                 1     63.265  27.928    38.750    111.0
                 2     57.522  48.872    52.846    133.0
          accuracy     76.512     NaN       NaN      NaN
         macro avg     67.101  56.011    59.045    860.0
      weighted avg     74.733  76.512    74.445    860.0
avg f1 (class 1&2)        NaN     NaN    45.798      NaN


Epoch 3 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:56<21:44,  3.96s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5242
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5697
F1-macro: 0.5905
F1-pn: 0.4580


Epoch 3 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:41<02:09,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5522
F1-macro: 0.6104
F1-pn: 0.4883
             label  precision  recall  f1-score  support
                 0     81.967  89.286    85.470    616.0
                 1     58.462  34.234    43.182    111.0
                 2     56.452  52.632    54.475    133.0
          accuracy     76.512     NaN       NaN      NaN
         macro avg     65.627  58.717    61.042    860.0
      weighted avg     74.987  76.512    75.218    860.0
avg f1 (class 1&2)        NaN     NaN    48.828      NaN


Epoch 3 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:51<14:20,  3.53s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5494
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5522
F1-macro: 0.6104
F1-pn: 0.4883


Epoch 3 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:11<01:48,  1.89it/s]

evaluate, backprop: Validation


Epoch 3 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:23<01:24,  1.89it/s]

evaluate, backprop: Test


Epoch 3 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:30<06:38,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5492
F1-macro: 0.5985
F1-pn: 0.4709
             label  precision  recall  f1-score  support
                 0     80.753  90.584    85.386    616.0
                 1     60.000  32.432    42.105    111.0
                 2     57.798  47.368    52.066    133.0
          accuracy     76.395     NaN       NaN      NaN
         macro avg     66.184  56.795    59.853    860.0
      weighted avg     74.524  76.395    74.647    860.0
avg f1 (class 1&2)        NaN     NaN    47.086      NaN


Epoch 3 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:14<00:39,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5251
F1-macro: 0.6125
F1-pn: 0.4877
             label  precision  recall  f1-score  support
                 0     81.991  90.909    86.220    616.0
                 1     68.085  28.829    40.506    111.0
                 2     57.692  56.391    57.034    133.0
          accuracy     77.558     NaN       NaN      NaN
         macro avg     69.256  58.710    61.254    860.0
      weighted avg     76.439  77.558    75.806    860.0
avg f1 (class 1&2)        NaN     NaN    48.770      NaN


Epoch 3 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:25<04:21,  3.53s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5501
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5251
F1-macro: 0.6125
F1-pn: 0.4877


Epoch 3 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:04<00:00,  1.36it/s]
INFO:root:Epoch 3, Loss: 0.6200981029185904, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.5107
F1-macro: 0.7168
F1-pn: 0.6337
             label  precision  recall  f1-score  support
                 0     87.819  88.793    88.303   4774.0
                 1     64.380  51.519    57.236    856.0
                 2     65.867  73.585    69.512   1007.0
          accuracy     81.678     NaN       NaN      NaN
         macro avg     72.688  71.299    71.684   6637.0
      weighted avg     81.465  81.678    81.445   6637.0
avg f1 (class 1&2)        NaN     NaN    63.374      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5269
F1-macro: 0.6682
F1-pn: 0.5703
             label  precision  recall  f1-score  support
                 0     86.129  86.688    86.408    616.0
                 1     60.494  44.144    51.042    111.0
                 2     57.862  69.173    63.014    133.0
          accuracy     78.488     NaN       

Epoch 4 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:44<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5482
F1-macro: 0.6745
F1-pn: 0.5837
             label  precision  recall  f1-score  support
                 0     88.831  82.630    85.618    616.0
                 1     52.632  54.054    53.333    111.0
                 2     56.069  72.932    63.399    133.0
          accuracy     77.442     NaN       NaN      NaN
         macro avg     65.844  69.872    67.450    860.0
      weighted avg     79.092  77.442    78.015    860.0
avg f1 (class 1&2)        NaN     NaN    58.366      NaN


Epoch 4 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:55<19:19,  3.53s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6291
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5482
F1-macro: 0.6745
F1-pn: 0.5837


Epoch 4 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:39<02:09,  1.89it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:46<10:11,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5041
F1-macro: 0.6720
F1-pn: 0.5746
             label  precision  recall  f1-score  support
                 0     86.196  87.175    86.683    616.0
                 1     59.524  45.045    51.282    111.0
                 2     59.477  68.421    63.636    133.0
          accuracy     78.837     NaN       NaN      NaN
         macro avg     68.399  66.880    67.200    860.0
      weighted avg     78.621  78.837    78.549    860.0
avg f1 (class 1&2)        NaN     NaN    57.459      NaN


Epoch 4 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:06<01:48,  1.89it/s]

evaluate, backprop: Validation


Epoch 4 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:18<01:24,  1.89it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:25<06:39,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5015
F1-macro: 0.6579
F1-pn: 0.5542
             label  precision  recall  f1-score  support
                 0     85.086  87.987    86.512    616.0
                 1     59.494  42.342    49.474    111.0
                 2     59.028  63.910    61.372    133.0
          accuracy     78.372     NaN       NaN      NaN
         macro avg     67.869  64.746    65.786    860.0
      weighted avg     77.753  78.372    77.844    860.0
avg f1 (class 1&2)        NaN     NaN    55.423      NaN


Epoch 4 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:09<00:39,  1.89it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:17<03:05,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4947
F1-macro: 0.6721
F1-pn: 0.5738
             label  precision  recall  f1-score  support
                 0     87.234  86.526    86.879    616.0
                 1     57.778  46.847    51.741    111.0
                 2     57.862  69.173    63.014    133.0
          accuracy     78.721     NaN       NaN      NaN
         macro avg     67.624  67.515    67.211    860.0
      weighted avg     78.890  78.721    78.653    860.0
avg f1 (class 1&2)        NaN     NaN    57.377      NaN


Epoch 4 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [04:56<00:00,  1.40it/s]
INFO:root:Epoch 4, Loss: 0.5244151362453598, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.4321
F1-macro: 0.7649
F1-pn: 0.6978
             label  precision  recall  f1-score  support
                 0     90.355  89.485    89.918   4774.0
                 1     71.248  59.346    64.755    856.0
                 2     68.896  81.827    74.807   1007.0
          accuracy     84.436     NaN       NaN      NaN
         macro avg     76.833  76.886    76.493   6637.0
      weighted avg     84.635  84.436    84.380   6637.0
avg f1 (class 1&2)        NaN     NaN    69.781      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4924
F1-macro: 0.6763
F1-pn: 0.5829
             label  precision  recall  f1-score  support
                 0     87.752  84.903    86.304    616.0
                 1     58.889  47.748    52.736    111.0
                 2     56.322  73.684    63.844    133.0
          accuracy     78.372     NaN       

Epoch 5 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:44<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5181
F1-macro: 0.7055
F1-pn: 0.6267
             label  precision  recall  f1-score  support
                 0     91.927  81.331    86.305    616.0
                 1     56.667  61.261    58.874    111.0
                 2     55.897  81.955    66.463    133.0
          accuracy     78.837     NaN       NaN      NaN
         macro avg     68.164  74.849    70.548    860.0
      weighted avg     81.804  78.837    79.696    860.0
avg f1 (class 1&2)        NaN     NaN    62.669      NaN


Epoch 5 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:55<19:21,  3.53s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6661
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5181
F1-macro: 0.7055
F1-pn: 0.6267


Epoch 5 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:39<02:09,  1.89it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:46<10:12,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4800
F1-macro: 0.6989
F1-pn: 0.6152
             label  precision  recall  f1-score  support
                 0     89.310  84.091    86.622    616.0
                 1     56.667  61.261    58.874    111.0
                 2     58.750  70.677    64.164    133.0
          accuracy     79.070     NaN       NaN      NaN
         macro avg     68.242  72.010    69.887    860.0
      weighted avg     80.371  79.070    79.568    860.0
avg f1 (class 1&2)        NaN     NaN    61.519      NaN


Epoch 5 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:07<01:48,  1.90it/s]

evaluate, backprop: Validation


Epoch 5 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:18<01:24,  1.89it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:25<06:39,  2.51s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4733
F1-macro: 0.6843
F1-pn: 0.5899
             label  precision  recall  f1-score  support
                 0     85.871  88.799    87.310    616.0
                 1     58.947  50.450    54.369    111.0
                 2     64.844  62.406    63.602    133.0
          accuracy     79.767     NaN       NaN      NaN
         macro avg     69.887  67.218    68.427    860.0
      weighted avg     79.144  79.767    79.392    860.0
avg f1 (class 1&2)        NaN     NaN    58.985      NaN


Epoch 5 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:10<00:39,  1.89it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:17<03:06,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4719
F1-macro: 0.6995
F1-pn: 0.6149
             label  precision  recall  f1-score  support
                 0     88.403  85.390    86.870    616.0
                 1     57.273  56.757    57.014    111.0
                 2     61.290  71.429    65.972    133.0
          accuracy     79.535     NaN       NaN      NaN
         macro avg     68.989  71.192    69.952    860.0
      weighted avg     80.192  79.535    79.785    860.0
avg f1 (class 1&2)        NaN     NaN    61.493      NaN


Epoch 5 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [04:56<00:00,  1.40it/s]
INFO:root:Epoch 5, Loss: 0.4391646843717759, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.3435
F1-macro: 0.8288
F1-pn: 0.7810
             label  precision  recall  f1-score  support
                 0     93.265  91.663    92.457   4774.0
                 1     72.062  75.935    73.948    856.0
                 2     80.825  83.714    82.244   1007.0
          accuracy     88.429     NaN       NaN      NaN
         macro avg     82.051  83.771    82.883   6637.0
      weighted avg     88.643  88.429    88.520   6637.0
avg f1 (class 1&2)        NaN     NaN    78.096      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4732
F1-macro: 0.7100
F1-pn: 0.6298
             label  precision  recall  f1-score  support
                 0     89.116  85.065    87.043    616.0
                 1     56.250  64.865    60.251    111.0
                 2     63.194  68.421    65.704    133.0
          accuracy     79.884     NaN       

Epoch 6 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:44<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4859
F1-macro: 0.7179
F1-pn: 0.6401
             label  precision  recall  f1-score  support
                 0     90.295  84.578    87.343    616.0
                 1     55.396  69.369    61.600    111.0
                 2     63.889  69.173    66.426    133.0
          accuracy     80.233     NaN       NaN      NaN
         macro avg     69.860  74.373    71.790    860.0
      weighted avg     81.707  80.233    80.785    860.0
avg f1 (class 1&2)        NaN     NaN    64.013      NaN


Epoch 6 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:55<19:26,  3.54s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6790
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4859
F1-macro: 0.7179
F1-pn: 0.6401


Epoch 6 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:39<02:09,  1.90it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4735
F1-macro: 0.7289
F1-pn: 0.6533
             label  precision  recall  f1-score  support
                 0     90.273  85.877    88.020    616.0
                 1     60.504  64.865    62.609    111.0
                 2     63.226  73.684    68.056    133.0
          accuracy     81.279     NaN       NaN      NaN
         macro avg     71.334  74.809    72.895    860.0
      weighted avg     82.248  81.279    81.653    860.0
avg f1 (class 1&2)        NaN     NaN    65.332      NaN


Epoch 6 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:50<14:25,  3.55s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6911
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4735
F1-macro: 0.7289
F1-pn: 0.6533


Epoch 6 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:10<01:49,  1.88it/s]

evaluate, backprop: Validation


Epoch 6 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:22<01:25,  1.88it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:29<06:42,  2.53s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4726
F1-macro: 0.7168
F1-pn: 0.6337
             label  precision  recall  f1-score  support
                 0     89.091  87.500    88.288    616.0
                 1     61.682  59.459    60.550    111.0
                 2     62.838  69.925    66.192    133.0
          accuracy     81.163     NaN       NaN      NaN
         macro avg     71.204  72.295    71.677    860.0
      weighted avg     81.493  81.163    81.291    860.0
avg f1 (class 1&2)        NaN     NaN    63.371      NaN


Epoch 6 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:14<00:39,  1.89it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:21<03:07,  2.53s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4676
F1-macro: 0.7234
F1-pn: 0.6449
             label  precision  recall  f1-score  support
                 0     90.273  85.877    88.020    616.0
                 1     59.829  63.063    61.404    111.0
                 2     62.420  73.684    67.586    133.0
          accuracy     81.047     NaN       NaN      NaN
         macro avg     70.841  74.208    72.337    860.0
      weighted avg     82.036  81.047    81.424    860.0
avg f1 (class 1&2)        NaN     NaN    64.495      NaN


Epoch 6 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:00<00:00,  1.38it/s]
INFO:root:Epoch 6, Loss: 0.3508994158132967, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.2578
F1-macro: 0.8792
F1-pn: 0.8449
             label  precision  recall  f1-score  support
                 0     94.520  95.015    94.767   4774.0
                 1     84.489  78.271    81.261    856.0
                 2     86.124  89.374    87.719   1007.0
          accuracy     91.999     NaN       NaN      NaN
         macro avg     88.378  87.553    87.916   6637.0
      weighted avg     91.952  91.999    91.955   6637.0
avg f1 (class 1&2)        NaN     NaN    84.490      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4532
F1-macro: 0.7045
F1-pn: 0.6180
             label  precision  recall  f1-score  support
                 0     87.561  87.987    87.773    616.0
                 1     62.245  54.955    58.373    111.0
                 2     62.937  67.669    65.217    133.0
          accuracy     80.581     NaN       

Epoch 7 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:45<02:54,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4748
F1-macro: 0.7343
F1-pn: 0.6567
             label  precision  recall  f1-score  support
                 0     89.752  88.149    88.943    616.0
                 1     63.248  66.667    64.912    111.0
                 2     65.217  67.669    66.421    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     72.739  74.162    73.425    860.0
      weighted avg     82.537  82.209    82.359    860.0
avg f1 (class 1&2)        NaN     NaN    65.666      NaN


Epoch 7 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:55<19:26,  3.54s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6955
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4748
F1-macro: 0.7343
F1-pn: 0.6567


Epoch 7 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:40<02:09,  1.89it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:47<10:14,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4711
F1-macro: 0.7067
F1-pn: 0.6204
             label  precision  recall  f1-score  support
                 0     89.018  86.851    87.921    616.0
                 1     61.224  54.054    57.416    111.0
                 2     60.870  73.684    66.667    133.0
          accuracy     80.581     NaN       NaN      NaN
         macro avg     70.371  71.530    70.668    860.0
      weighted avg     81.078  80.581    80.697    860.0
avg f1 (class 1&2)        NaN     NaN    62.041      NaN


Epoch 7 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:07<01:49,  1.89it/s]

evaluate, backprop: Validation


Epoch 7 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:18<01:24,  1.88it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:25<06:41,  2.53s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4751
F1-macro: 0.7155
F1-pn: 0.6340
             label  precision  recall  f1-score  support
                 0     88.742  87.013    87.869    616.0
                 1     58.824  63.063    60.870    111.0
                 2     64.964  66.917    65.926    133.0
          accuracy     80.814     NaN       NaN      NaN
         macro avg     70.843  72.331    71.555    860.0
      weighted avg     81.203  80.814    80.991    860.0
avg f1 (class 1&2)        NaN     NaN    63.398      NaN


Epoch 7 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:10<00:39,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4728
F1-macro: 0.7352
F1-pn: 0.6631
             label  precision  recall  f1-score  support
                 0     90.690  85.390    87.960    616.0
                 1     59.398  71.171    64.754    111.0
                 2     64.626  71.429    67.857    133.0
          accuracy     81.395     NaN       NaN      NaN
         macro avg     71.571  75.996    73.524    860.0
      weighted avg     82.620  81.395    81.856    860.0
avg f1 (class 1&2)        NaN     NaN    66.306      NaN


Epoch 7 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:20<04:22,  3.55s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6991
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.4728
F1-macro: 0.7352
F1-pn: 0.6631


Epoch 7 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:00<00:00,  1.38it/s]
INFO:root:Epoch 7, Loss: 0.2633613132434078, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.1720
F1-macro: 0.9325
F1-pn: 0.9139
             label  precision  recall  f1-score  support
                 0     98.010  95.936    96.962   4774.0
                 1     86.300  93.458    89.736    856.0
                 2     91.707  94.439    93.053   1007.0
          accuracy     95.389     NaN       NaN      NaN
         macro avg     92.006  94.611    93.250   6637.0
      weighted avg     95.543  95.389    95.437   6637.0
avg f1 (class 1&2)        NaN     NaN    91.395      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4711
F1-macro: 0.7413
F1-pn: 0.6679
             label  precision  recall  f1-score  support
                 0     91.267  86.526    88.833    616.0
                 1     60.465  70.270    65.000    111.0
                 2     65.306  72.180    68.571    133.0
          accuracy     82.209     NaN       

Epoch 8 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:45<02:55,  1.88it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:52<13:51,  2.53s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4976
F1-macro: 0.7343
F1-pn: 0.6600
             label  precision  recall  f1-score  support
                 0     91.769  85.065    88.290    616.0
                 1     57.241  74.775    64.844    111.0
                 2     64.583  69.925    67.148    133.0
          accuracy     81.395     NaN       NaN      NaN
         macro avg     71.198  76.588    73.427    860.0
      weighted avg     83.108  81.395    81.994    860.0
avg f1 (class 1&2)        NaN     NaN    65.996      NaN


Epoch 8 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:36<02:09,  1.89it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:43<10:14,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4836
F1-macro: 0.7354
F1-pn: 0.6574
             label  precision  recall  f1-score  support
                 0     89.655  88.636    89.143    616.0
                 1     64.286  64.865    64.574    111.0
                 2     65.468  68.421    66.912    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     73.136  73.974    73.543    860.0
      weighted avg     82.640  82.442    82.534    860.0
avg f1 (class 1&2)        NaN     NaN    65.743      NaN


Epoch 8 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:04<01:49,  1.89it/s]

evaluate, backprop: Validation


Epoch 8 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:15<01:24,  1.89it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5013
F1-macro: 0.7444
F1-pn: 0.6681
             label  precision  recall  f1-score  support
                 0     89.627  89.773    89.700    616.0
                 1     66.055  64.865    65.455    111.0
                 2     67.910  68.421    68.165    133.0
          accuracy     83.256     NaN       NaN      NaN
         macro avg     74.531  74.353    74.440    860.0
      weighted avg     83.226  83.256    83.240    860.0
avg f1 (class 1&2)        NaN     NaN    66.810      NaN


Epoch 8 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:26<09:23,  3.55s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.7062
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_multi_dropout_V3_Token_Target/best_model.pth
Loss: 0.5013
F1-macro: 0.7444
F1-pn: 0.6681


Epoch 8 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:10<00:39,  1.89it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:17<03:06,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5114
F1-macro: 0.7366
F1-pn: 0.6616
             label  precision  recall  f1-score  support
                 0     91.237  86.201    88.648    616.0
                 1     59.542  70.270    64.463    111.0
                 2     64.626  71.429    67.857    133.0
          accuracy     81.860     NaN       NaN      NaN
         macro avg     71.802  75.967    73.656    860.0
      weighted avg     83.031  81.860    82.311    860.0
avg f1 (class 1&2)        NaN     NaN    66.160      NaN


Epoch 8 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [04:57<00:00,  1.40it/s]
INFO:root:Epoch 8, Loss: 0.1732703997315951, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.1006
F1-macro: 0.9677
F1-pn: 0.9586
             label  precision  recall  f1-score  support
                 0     99.007  98.178    98.591   4774.0
                 1     94.381  96.145    95.255    856.0
                 2     95.344  97.617    96.467   1007.0
          accuracy     97.830     NaN       NaN      NaN
         macro avg     96.244  97.313    96.771   6637.0
      weighted avg     97.855  97.830    97.838   6637.0
avg f1 (class 1&2)        NaN     NaN    95.861      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5018
F1-macro: 0.7491
F1-pn: 0.6751
             label  precision  recall  f1-score  support
                 0     90.849  88.636    89.729    616.0
                 1     65.789  67.568    66.667    111.0
                 2     65.517  71.429    68.345    133.0
          accuracy     83.256     NaN       

Epoch 9 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:45<02:54,  1.89it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:52<13:50,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5455
F1-macro: 0.7368
F1-pn: 0.6613
             label  precision  recall  f1-score  support
                 0     90.404  87.175    88.760    616.0
                 1     60.317  68.468    64.135    111.0
                 2     66.429  69.925    68.132    133.0
          accuracy     82.093     NaN       NaN      NaN
         macro avg     72.383  75.190    73.676    860.0
      weighted avg     82.813  82.093    82.392    860.0
avg f1 (class 1&2)        NaN     NaN    66.133      NaN


Epoch 9 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:36<02:10,  1.88it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:43<10:17,  2.53s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5360
F1-macro: 0.7436
F1-pn: 0.6685
             label  precision  recall  f1-score  support
                 0     91.356  87.500    89.386    616.0
                 1     61.905  70.270    65.823    111.0
                 2     65.278  70.677    67.870    133.0
          accuracy     82.674     NaN       NaN      NaN
         macro avg     72.846  76.149    74.360    860.0
      weighted avg     83.522  82.674    83.018    860.0
avg f1 (class 1&2)        NaN     NaN    66.846      NaN


Epoch 9 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:04<01:48,  1.89it/s]

evaluate, backprop: Validation


Epoch 9 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:15<01:24,  1.89it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:22<06:40,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5430
F1-macro: 0.7393
F1-pn: 0.6585
             label  precision  recall  f1-score  support
                 0     89.567  90.584    90.073    616.0
                 1     67.677  60.360    63.810    111.0
                 2     66.667  69.173    67.897    133.0
          accuracy     83.372     NaN       NaN      NaN
         macro avg     74.637  73.373    73.926    860.0
      weighted avg     83.200  83.372    83.253    860.0
avg f1 (class 1&2)        NaN     NaN    65.853      NaN


Epoch 9 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:07<00:39,  1.90it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:14<03:06,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5453
F1-macro: 0.7317
F1-pn: 0.6538
             label  precision  recall  f1-score  support
                 0     90.541  87.013    88.742    616.0
                 1     62.832  63.964    63.393    111.0
                 2     62.581  72.932    67.361    133.0
          accuracy     81.860     NaN       NaN      NaN
         macro avg     71.984  74.636    73.165    860.0
      weighted avg     82.640  81.860    82.163    860.0
avg f1 (class 1&2)        NaN     NaN    65.377      NaN


Epoch 9 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [04:53<00:00,  1.41it/s]
INFO:root:Epoch 9, Loss: 0.0964221142872001, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0503
F1-macro: 0.9891
F1-pn: 0.9858
             label  precision  recall  f1-score  support
                 0     99.790  99.351    99.570   4774.0
                 1     97.924  99.182    98.549    856.0
                 2     98.132  99.106    98.617   1007.0
          accuracy     99.292     NaN       NaN      NaN
         macro avg     98.615  99.213    98.912   6637.0
      weighted avg     99.297  99.292    99.293   6637.0
avg f1 (class 1&2)        NaN     NaN    98.583      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5352
F1-macro: 0.7405
F1-pn: 0.6615
             label  precision  recall  f1-score  support
                 0     89.013  90.747    89.871    616.0
                 1     66.667  61.261    63.850    111.0
                 2     69.231  67.669    68.441    133.0
          accuracy     83.372     NaN       

Epoch 10 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:45<02:54,  1.89it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:52<13:48,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5886
F1-macro: 0.7359
F1-pn: 0.6579
             label  precision  recall  f1-score  support
                 0     90.620  87.825    89.200    616.0
                 1     63.248  66.667    64.912    111.0
                 2     63.699  69.925    66.667    133.0
          accuracy     82.326     NaN       NaN      NaN
         macro avg     72.522  74.805    73.593    860.0
      weighted avg     82.923  82.326    82.581    860.0
avg f1 (class 1&2)        NaN     NaN    65.789      NaN


Epoch 10 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:36<02:09,  1.89it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:43<10:15,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5943
F1-macro: 0.7303
F1-pn: 0.6502
             label  precision  recall  f1-score  support
                 0     89.769  88.312    89.034    616.0
                 1     63.478  65.766    64.602    111.0
                 2     64.029  66.917    65.441    133.0
          accuracy     82.093     NaN       NaN      NaN
         macro avg     72.425  73.665    73.026    860.0
      weighted avg     82.395  82.093    82.232    860.0
avg f1 (class 1&2)        NaN     NaN    65.021      NaN


Epoch 10 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:03<01:48,  1.90it/s]

evaluate, backprop: Validation


Epoch 10 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:15<01:24,  1.90it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:22<06:40,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6200
F1-macro: 0.7237
F1-pn: 0.6432
             label  precision  recall  f1-score  support
                 0     89.799  87.175    88.468    616.0
                 1     62.832  63.964    63.393    111.0
                 2     61.745  69.173    65.248    133.0
          accuracy     81.395     NaN       NaN      NaN
         macro avg     71.459  73.437    72.370    860.0
      weighted avg     81.980  81.395    81.641    860.0
avg f1 (class 1&2)        NaN     NaN    64.321      NaN


Epoch 10 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:07<00:39,  1.89it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:14<03:06,  2.52s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6194
F1-macro: 0.7301
F1-pn: 0.6481
             label  precision  recall  f1-score  support
                 0     89.050  89.773    89.410    616.0
                 1     64.762  61.261    62.963    111.0
                 2     66.418  66.917    66.667    133.0
          accuracy     82.558     NaN       NaN      NaN
         macro avg     73.410  72.650    73.013    860.0
      weighted avg     82.415  82.558    82.479    860.0
avg f1 (class 1&2)        NaN     NaN    64.815      NaN


Epoch 10 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [04:53<00:00,  1.41it/s]
INFO:root:Epoch 10, Loss: 0.04621558942809611, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0200
F1-macro: 0.9970
F1-pn: 0.9962
             label  precision  recall  f1-score  support
                 0     99.958  99.791    99.874   4774.0
                 1     99.188  99.883    99.534    856.0
                 2     99.604  99.801    99.702   1007.0
          accuracy     99.804     NaN       NaN      NaN
         macro avg     99.583  99.825    99.704   6637.0
      weighted avg     99.805  99.804    99.804   6637.0
avg f1 (class 1&2)        NaN     NaN    99.618      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6315
F1-macro: 0.7294
F1-pn: 0.6495
             label  precision  recall  f1-score  support
                 0     89.884  87.987    88.925    616.0
                 1     62.609  64.865    63.717    111.0
                 2     64.085  68.421    66.182    133.0
          accuracy     81.977     NaN       

In [11]:
import pandas as pd

def load_and_display_metrics(metrics_file):
    # –ó–∞–≥—Ä—É–∂–∞–µ–º CSV-—Ñ–∞–π–ª
    df = pd.read_csv(metrics_file)
    df = df[df["backprop"] == "Test"]
    # –û–ø—Ä–µ–¥–µ–ª—è–µ–º –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —ç–ø–æ—Ö
    epochs = df["epoch"].unique()
    print(epochs)
    
    for epoch in epochs:
        # –§–∏–ª—å—Ç—Ä—É–µ–º –¥–∞–Ω–Ω—ã–µ –ø–æ —ç–ø–æ—Ö–µ
        epoch_df = df[df["epoch"] == epoch]
        
        for idx, row in epoch_df.iterrows():
            loss = row["loss"]
            backprop_value = row["backprop"]
            print(f"\nEpoch {epoch} (Backprop: {backprop_value}) (Loss: {loss})\n" + "-"*30)
            # –§–æ—Ä–º–∏—Ä—É–µ–º —Ç–∞–±–ª–∏—Ü—É –≤ —Å—Ç–∏–ª–µ classification_report
            table_data = {}
            class_labels = sorted(
                set(col.split("_")[1] for col in df.columns 
                    if col.startswith("class_") and "precision" in col)
            )
            
            # –§–∏–ª—å—Ç—Ä—É–µ–º —Ç–æ–ª—å–∫–æ —á–∏—Å–ª–æ–≤—ã–µ –º–µ—Ç–∫–∏ –∫–ª–∞—Å—Å–æ–≤
            class_labels = [label for label in class_labels if label.isdigit()]
            
            for label in class_labels:
                table_data[int(label)] = {
                    "precision": row[f"class_{label}_precision"],
                    "recall": row[f"class_{label}_recall"],
                    "f1-score": row[f"class_{label}_f1"],
                }
            
            # –î–æ–±–∞–≤–ª—è–µ–º —Å—Ä–µ–¥–Ω–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è
            table_data["accuracy"] = {"precision": "", "recall": "", "f1-score": row["accuracy"] }
            table_data["macro avg"] = {
                "precision": row["macro_precision"],
                "recall": row["macro_recall"],
                "f1-score": row["macro_f1"],
            }
            table_data["weighted avg"] = {
                "precision": row["weighted_precision"],
                "recall": row["weighted_recall"],
                "f1-score": row["weighted_f1"],
            }
            
            # –í—ã–≤–æ–¥–∏–º —Ç–∞–±–ª–∏—Ü—É
            df_table = pd.DataFrame.from_dict(table_data, orient="index")
            print(df_table.to_string())

# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
metrics_file = "./result_lr_1e-06_test/metrics_class_weights.csv"
load_and_display_metrics(metrics_file)

FileNotFoundError: [Errno 2] No such file or directory: './result_lr_1e-06_test/metrics_class_weights.csv'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# –ó–∞–≥—Ä—É–∑–∫–∞ CSV-—Ñ–∞–π–ª–∞
df = pd.read_csv("metrics_class_weights.csv")  # —É–∫–∞–∂–∏ –ø—É—Ç—å –∫ —Ñ–∞–π–ª—É, –µ—Å–ª–∏ –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è

# –ü–æ—Å—Ç—Ä–æ–µ–Ω–∏–µ –≥—Ä–∞—Ñ–∏–∫–∞ loss
plt.figure(figsize=(10, 6))

for stage in ['Train', 'Validation', 'Test']:
    stage_data = df[df['backprop'] == stage]
    plt.plot(stage_data['epoch'], stage_data['loss'], label=stage)

plt.title('–ó–Ω–∞—á–µ–Ω–∏–µ Loss –ø–æ —ç–ø–æ—Ö–∞–º')
plt.xlabel('–≠–ø–æ—Ö–∞')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
