In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import argparse
import logging
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import f1_score, classification_report
from torch.utils.data import DataLoader, TensorDataset
import sys
from tqdm import tqdm, trange
import os
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CyclicLR


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import RobertaModel
import torch
import torch.nn as nn
from transformers.modeling_outputs import SequenceClassifierOutput

class RobertaWithMultiSampleDropout(nn.Module):
    def __init__(self, model_name, num_labels, dropout_rate=0.3, num_dropouts=5, use_multi_sample_dropout=True):
        super(RobertaWithMultiSampleDropout, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.use_multi_sample_dropout = use_multi_sample_dropout

        if self.use_multi_sample_dropout:
            self.dropouts = nn.ModuleList([
                nn.Dropout(dropout_rate) for _ in range(num_dropouts)
            ])
        else:
            self.dropout = nn.Dropout(dropout_rate)

        self.classifier = nn.Linear(self.roberta.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )
        sequence_output = outputs[0]  # –ø–æ—Å–ª–µ–¥–Ω–∏–π —Å–∫—Ä—ã—Ç—ã–π —Å–ª–æ–π

        if self.use_multi_sample_dropout:
            logits_list = []
            for dropout in self.dropouts:
                dropped = dropout(sequence_output[:, 0, :])  # [CLS]-—Ç–æ–∫–µ–Ω
                logits_list.append(self.classifier(dropped))
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
        else:
            dropped = self.dropout(sequence_output[:, 0, :])  # [CLS]-—Ç–æ–∫–µ–Ω
            avg_logits = self.classifier(dropped)

        return SequenceClassifierOutput(logits=avg_logits)


In [3]:
def save_metrics_best(epoch, all_labels, all_preds, result_path, backprop, loss):
    import os
    import pandas as pd
    from sklearn.metrics import classification_report

    metrics_file = os.path.join(result_path, "metrics_best.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)

    # –°–æ–∑–¥–∞–µ–º —Å–ª–æ–≤–∞—Ä—å —Å –º–µ—Ç—Ä–∏–∫–∞–º–∏
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop,
        "loss": loss
    }

    # –î–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç—Ä–∏–∫–∏ –ø–æ –∫–∞–∂–¥–æ–º—É –∫–ª–∞—Å—Å—É
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]

    # –°–æ–∑–¥–∞–µ–º DataFrame –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º –µ–≥–æ –≤ CSV (–ø–µ—Ä–µ–∑–∞–ø–∏—Å—å —Ñ–∞–π–ª–∞)
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='w', header=True, index=False)

In [4]:
def compute_ens_weights(train_labels, beta: float = 0.999):
    train_labels = np.array(train_labels)
    classes, counts = np.unique(train_labels, return_counts=True)
    effective_num = (1 - np.power(beta, counts)) / (1 - beta)
    weights = 1.0 / effective_num
    weights = weights / np.sum(weights) * len(classes)  # –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è –∫–∞–∫ –≤ –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–æ–π —Å—Ç–∞—Ç—å–µ
    return torch.tensor(weights, dtype=torch.float)

In [5]:
def get_entity_tags_from_files(*file_paths):
    all_tags = set()
    for path in file_paths:
        try:
            df = pd.read_csv(path, sep='\t')
            if "entity_tag" in df.columns:
                tags = df["entity_tag"].dropna().unique().tolist()
                all_tags.update(tags)
        except Exception as e:
            print(f"[–û—à–∏–±–∫–∞] –ù–µ —É–¥–∞–ª–æ—Å—å –∑–∞–≥—Ä—É–∑–∏—Ç—å {path}: {e}")
    return sorted(list(all_tags))

In [6]:
def load_data(file_path, tokenizer, max_seq_len):
    try:
        df = pd.read_csv(file_path, sep='\t')  # –ó–∞–≥—Ä—É–∂–∞–µ–º CSV (TSV)

        # –ü—Ä–æ–≤–µ—Ä–∫–∞ –Ω—É–∂–Ω—ã—Ö –∫–æ–ª–æ–Ω–æ–∫
        required_columns = {
            "sentence", "entity", "label", "entity_tag",
            "entity_pos_start_rel", "entity_pos_end_rel"
        }
        if not required_columns.issubset(df.columns):
            raise ValueError(f"–û–∂–∏–¥–∞–µ–º—ã–µ –∫–æ–ª–æ–Ω–∫–∏: {required_columns}, –Ω–æ –≤ —Ñ–∞–π–ª–µ: {df.columns}")

        # –ó–∞–º–µ–Ω—è–µ–º -1 –Ω–∞ 2 (–µ—Å–ª–∏ –µ—Å—Ç—å)
        df["label"] = df["label"].replace(-1, 2)

        # –†–∞–∑–º–µ—á–∞–µ–º –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è —Å [ENTITY] —Ç–æ–∫–µ–Ω–∞–º–∏
        def mark_entity_inline(row):
            sent = row["sentence"]
            start = row["entity_pos_start_rel"]
            end = row["entity_pos_end_rel"]
            tag = row["entity_tag"]
            tag_token = f"[ENTITY:{tag}]"
            tag_token_close = f"[/ENTITY:{tag}]"
            return (
                sent[:start] +
                f"{tag_token} " + sent[start:end] + f" {tag_token_close}" +
                sent[end:]
            )

        df["input_text"] = df.apply(mark_entity_inline, axis=1)
        texts = df["input_text"].tolist()
        labels = torch.tensor(df["label"].astype(int).tolist(), dtype=torch.long)

        # –¢–æ–∫–µ–Ω–∏–∑–∏—Ä—É–µ–º
        encodings = tokenizer(texts, padding=True, truncation=True,
                              max_length=max_seq_len, return_tensors='pt')
        dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'], labels)
        return dataset

    except FileNotFoundError:
        logging.error(f"–§–∞–π–ª {file_path} –Ω–µ –Ω–∞–π–¥–µ–Ω!")
        return None

In [7]:

def save_metrics(epoch, all_labels, all_preds, result_path, backprop, loss):
    metrics_file = os.path.join(result_path, "metrics_class_weights.csv")
    report = classification_report(all_labels, all_preds, output_dict=True)
    
    # –°–æ–∑–¥–∞–µ–º DataFrame –¥–ª—è –º–µ—Ç—Ä–∏–∫
    metrics_data = {
        "epoch": epoch,
        "accuracy": report["accuracy"],
        "macro_precision": report["macro avg"]["precision"],
        "macro_recall": report["macro avg"]["recall"],
        "macro_f1": report["macro avg"]["f1-score"],
        "weighted_precision": report["weighted avg"]["precision"],
        "weighted_recall": report["weighted avg"]["recall"],
        "weighted_f1": report["weighted avg"]["f1-score"],
        "backprop": backprop , # (–î–æ–±–∞–≤–ª–µ–Ω–æ)
        "loss": loss
    }
    
    # –î–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç—Ä–∏–∫–∏ –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∫–ª–∞—Å—Å–∞
    for label in sorted(report.keys()):
        if isinstance(report[label], dict):  # –ü—Ä–æ–ø—É—Å–∫–∞–µ–º 'accuracy', —Ç–∞–∫ –∫–∞–∫ —ç—Ç–æ float
            metrics_data[f"class_{label}_precision"] = report[label]["precision"]
            metrics_data[f"class_{label}_recall"] = report[label]["recall"]
            metrics_data[f"class_{label}_f1"] = report[label]["f1-score"]
    
    # –°–æ–∑–¥–∞–µ–º DataFrame –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º –≤ CSV
    metrics_df = pd.DataFrame([metrics_data])
    metrics_df.to_csv(metrics_file, mode='a', header=not os.path.exists(metrics_file), index=False)

def save_confusion_matrix(epoch, y_true, y_pred, result_path,backprop):
    cm = confusion_matrix(y_true, y_pred)
    cm_df = pd.DataFrame(cm, columns=["Pred_0", "Pred_1", "Pred_2"], index=["True_0", "True_1", "True_2"])
    cm_file = os.path.join(result_path, f"confusion_matrix_class_weights_epoch_{epoch}_backprop_{backprop}.csv")
    cm_df.to_csv(cm_file)



In [8]:
from sklearn.metrics import classification_report
import pandas as pd

def castom_classification_report(all_labels, all_preds):
    report = classification_report(all_labels, all_preds, output_dict=True)
    data = []
    for label, metrics in report.items():
        if isinstance(metrics, dict):  # –î–ª—è –≤—Å–µ—Ö –º–µ—Ç—Ä–∏–∫, –∫—Ä–æ–º–µ accuracy
            row = {'label': label}
            for metric, value in metrics.items():
                row[metric] = round(value * 100, 3) if metric != 'support' else value
            data.append(row)
        else:  # –î–ª—è accuracy
            data.append({'label': 'accuracy', 'precision': round(metrics * 100, 3), 'recall': None, 'f1-score': None, 'support': None})

    # –î–æ–±–∞–≤–ª—è–µ–º macro F1 –¥–ª—è –∫–ª–∞—Å—Å–æ–≤ 1 –∏ 2
    if '1' in report and '2' in report:
        f1_1 = report['1']['f1-score']
        f1_2 = report['2']['f1-score']
        f1_macro_1_2 = (f1_1 + f1_2) / 2
        data.append({
            'label': 'avg f1 (class 1&2)',
            'precision': None,
            'recall': None,
            'f1-score': round(f1_macro_1_2 * 100, 3),
            'support': None
        })
    
    # –°–æ–∑–¥–∞–µ–º DataFrame
    df = pd.DataFrame(data)
    df.index = [''] * len(df)

    # –í—ã–≤–æ–¥–∏–º —Ç–∞–±–ª–∏—Ü—É
    print(df.to_string(index=False))

In [9]:
def save_contrel_date():
    pass

def add_token():
    pass


def use_CyclicLR():
    pass

In [10]:

lr_m = 1e-6
# lr = 1e-6 –Ω–∞—á–∞–ª–æ—Å—å –æ–±—É—á–µ–Ω–∏–µ, —É–º–µ–Ω—å—à–µ–Ω–∏–µ TrainError
# –¥–æ–±–∞–≤–∏—Ç—å –≤–æ–ª–∏–¥–∞—Ü–∏—é –ø–æ —à–∞–≥–∞–º –≤ —Ç–µ—á–µ–Ω–∏–µ —ç–ø–æ—Ö–∏ –æ–±—É—á–µ–Ω–∏—è, –∞ –Ω–µ –∫–∞–¥–∂—ã–µ N –µ–ø–æ—Ö
# –î–æ–±–∞–≤–∏—Ç—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –æ—à–∏–±–∫–∏ –≤ —Ñ–∞–π–ª—ã –¥–ª—è –ø–æ—Å—Ç—Ä–æ–µ–Ω–∏—è –≥—Ä–∞—Ñ–∏–∫–æ–≤
# –ø—Ä–æ–≤–µ—Ä–∏—Ç—å –º–µ—Ç–æ–¥—ã –ø–µ—Ä–µ–¥–∞—á–∏ —Å—É—â—å–Ω–æ—Å—Ç–∏ (–ø–∞—Ä–∞–µ—Ç—Ä–æ–º, +—Ç–∏–ø, –∏–ª–∏ –≤ —Ç–µ–∫—Å—Ç–µ –≤—ã–¥–µ–ª—è—è —Ç–µ–≥–∞–º–∏).
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name", type=str, default="./ruRoberta-large/")
    #parser.add_argument("--model_name", type=str, default="sberbank-ai/ruRoberta-large", help="–ò–º—è –º–æ–¥–µ–ª–∏")
    parser.add_argument("--max_seq_len", type=int, default=512) #128
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=15)
    parser.add_argument("--lr", type=float, default=lr_m)
    parser.add_argument("--init_checkpoint", type=str, default=None)
    parser.add_argument("--train_data", type=str, required=False, default="./data/train_data.csv")
    parser.add_argument("--validation_data", type=str, required=False, default="./data/validation.csv")
    parser.add_argument("--eval_data", type=str, required=False, default="./data/test.csv")
    parser.add_argument("--result", type=str, default=f"./result_lr_{lr_m}_ENS_Teg_21_multi_dropout_V2_T_NotToken/")
    # –ò–≥–Ω–æ—Ä–∏—Ä—É–µ–º –∞—Ä–≥—É–º–µ–Ω—Ç—ã Jupyter
    args, unknown = parser.parse_known_args()
    os.makedirs(args.result, exist_ok=True)
    return args


if __name__ == "__main__":
    args = parse_args()
    print(args)  # –ü—Ä–æ–≤–µ—Ä—è–µ–º –∞—Ä–≥—É–º–µ–Ω—Ç—ã

    base_lr = args.lr
    min_lr = base_lr * (1/4)
    step_size_up = 400 #400

    best_avg_f1 = 0.0
    best_model_path = os.path.join(args.result, "best_model.pth")

    SEED = 42
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    
    logging.basicConfig(level=logging.INFO)

    tokenizer = RobertaTokenizer.from_pretrained(args.model_name)
    #model = RobertaForSequenceClassification.from_pretrained(args.model_name, num_labels=3)
    model = RobertaWithMultiSampleDropout(model_name='./ruRoberta-large', num_labels=3, use_multi_sample_dropout=True )


    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    

    train_dataset = load_data(args.train_data, tokenizer, args.max_seq_len)
    validation_data = load_data(args.validation_data, tokenizer, args.max_seq_len)
    eval_dataset = load_data(args.eval_data, tokenizer, args.max_seq_len)
    # –ò–∑–≤–ª–µ–∫–∞–µ–º –ø–µ—Ä–≤—ã–µ 10 –ø—Ä–∏–º–µ—Ä–æ–≤ –∏–∑ –¥–∞—Ç–∞—Å–µ—Ç–∞
    for i in range(2):
        input_ids, attention_mask, label = train_dataset[i]
        decoded_text = tokenizer.decode(input_ids, skip_special_tokens=False)
        print(f"Text: {decoded_text}")
        print(f"Label: {label.item()}")
        print("-" * 50)
    
    if train_dataset is None or eval_dataset is None or validation_data is None:
        sys.exit(f"–û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏ –¥–∞–Ω–Ω—ã—Ö: —É–±–µ–¥–∏—Ç–µ—Å—å, —á—Ç–æ —Ñ–∞–π–ª—ã {args.train_data}, {args.validation_data} –∏ {args.eval_data} —Å—É—â–µ—Å—Ç–≤—É—é—Ç –∏ —Å–æ–¥–µ—Ä–∂–∞—Ç –Ω—É–∂–Ω—ã–µ –∫–æ–ª–æ–Ω–∫–∏.")

    # –ü–æ–ª—É—á–∞–µ–º –º–µ—Ç–∫–∏ –∫–ª–∞—Å—Å–æ–≤ –∏–∑ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞
    train_labels = [label.item() for _, _, label in train_dataset]

    # –í—ã—á–∏—Å–ª—è–µ–º –≤–µ—Å–∞ –∫–ª–∞—Å—Å–æ–≤
    class_weights = compute_ens_weights(train_labels, beta=0.999)

    # –í—ã–≤–æ–¥ –≤–µ—Å–æ–≤ –∫–ª–∞—Å—Å–æ–≤
    print(f"class_weights = {class_weights}")
    
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    validation_loader = DataLoader(validation_data, batch_size=args.batch_size)
    eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size)
    
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    #scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr = base_lr, step_size_up = step_size_up, mode="triangular2",cycle_momentum=False )

    loss_fct = nn.CrossEntropyLoss(weight=class_weights.to(device))
    
    def train():
        model.train()
        batches_per_validation = (len(train_loader) // 2)+2
        batches_per_test = (len(train_loader) // 5)+2
        
        for epoch in range(args.epochs):
            print(f"Epoch: {epoch}/{args.epochs}")
            total_loss = 0
            for i,batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1} - Batches")):
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                optimizer.zero_grad()
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                loss.backward()
                optimizer.step()
                #scheduler.step()
                
                total_loss += loss.item()
                if(i % batches_per_validation == 0 and i >50):
                    evaluate(epoch, backprop="Validation", vall_train = True)
                if(i % batches_per_test == 0 and i >50):
                    evaluate(epoch, backprop="Test", flag_print = True)
                
            logging.info(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}, Step: {len(train_loader)}")
            # –í–∞–ª–∏–¥–∞—Ü–∏—è —Å –æ–±—Ä–∞—Ç–Ω—ã–º —Ä–∞—Å–ø—Ä–æ—Å—Ç—Ä–∞–Ω–µ–Ω–∏–µ–º –æ—à–∏–±–∫–∏ –∫–∞–∂–¥—ã–µ 2 —ç–ø–æ—Ö–∏
            evaluate(epoch , backprop="Train", flag_print = True)
            evaluate(epoch , backprop="Test", flag_print = True)
            evaluate(epoch , backprop="Validation", flag_print = True, vall_train = True)
            
            
    
    def evaluate(epoch = None, backprop = "None", flag_print = False, vall_train = False):
        global best_avg_f1
        model.eval()
        all_preds, all_labels = [], []
        total_loss = 0
        print(f"evaluate, backprop: {backprop}")
        loader = validation_loader
        if backprop == "Validation":
            loader = validation_loader
        elif backprop == "Test":
            loader = eval_loader
        elif backprop == "Train":
            loader = train_loader
        
        else: loader = eval_loader
        with torch.no_grad() if not backprop == "Validation" else torch.enable_grad():  # –í–∫–ª—é—á–∞–µ–º –≥—Ä–∞–¥–∏–µ–Ω—Ç—ã –¥–ª—è –æ–±—É—á–µ–Ω–∏—è –Ω–∞ –≤–∞–ª–∏–¥–∞—Ü–∏–∏
            for batch in loader:
            #for batch in loader:
                input_ids, attention_mask, labels = [x.to(device) for x in batch]
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = loss_fct(outputs.logits, labels)
                
                
                
                if backprop == "Validation" and vall_train:
                    #—É–º–µ–Ω—å—à–∞—è —Ç–∫ –æ–±—É—á–∞—é—â–∏–µ –¥–∞–Ω–Ω—ã–µ —É–¥–≤–æ–∏–ª–∏—Å—å
                    loss = loss
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    #scheduler.step()
                    loss = loss

                preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())
                total_loss += loss.item()
                
        avg_loss = total_loss / len(loader)
        f1 = f1_score(all_labels, all_preds, average='macro')
        f1_pn = f1_score(all_labels, all_preds, labels=[1, 2], average='macro')
        avg_f1 = (f1 + f1_pn) / 2

        if(flag_print):
            print()
            print("--"*20)
            print(f"Result evaluate in {backprop}")
            #logging.info(f"{backprop} Loss: {avg_loss:.4f}")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")
            #print(classification_report(all_labels, all_preds))
            castom_classification_report(all_labels, all_preds)

        # üíæ –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–æ–¥–µ–ª—å —Ç–æ–ª—å–∫–æ –ø—Ä–∏ —É–ª—É—á—à–µ–Ω–∏–∏ —Å—Ä–µ–¥–Ω–µ–π –º–µ—Ç—Ä–∏–∫–∏
        if (avg_f1 > best_avg_f1) and backprop == "Test":
            best_avg_f1 = avg_f1
            torch.save(model.state_dict(), best_model_path)
            tokenizer.save_pretrained(os.path.join(args.result, "tokenizer"))
            
            print(f"[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: {best_avg_f1:.4f}")
            print(f"[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: {best_model_path}")
            # üî∏ –°–æ—Ö—Ä–∞–Ω—è–µ–º –∑–Ω–∞—á–µ–Ω–∏–µ –ª—É—á—à–µ–≥–æ F1 –≤ —Ñ–∞–π–ª
            best_score_path = os.path.join(args.result, "best_score.txt")
            save_metrics_best(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
            save_confusion_matrix(epoch, all_labels, all_preds, args.result, backprop)
            with open(best_score_path, "w") as f:
                f.write(f"Epoch: {epoch}\n")
                f.write(f"Loss: {avg_loss:.4f}\n")
                f.write(f"F1-pn0: {f1:.4f}\n")
                f.write(f"F1-pn: {f1_pn:.4f}\n")
            print(f"Loss: {avg_loss:.4f}")
            print(f"F1-macro: {f1:.4f}")
            print(f"F1-pn: {f1_pn:.4f}")


        if epoch is not None:
            save_metrics(epoch, all_labels, all_preds, args.result, backprop, avg_loss)
            
        
    if args.init_checkpoint:
        model.load_state_dict(torch.load(args.init_checkpoint, map_location=device))

    train()

Namespace(model_name='./ruRoberta-large/', max_seq_len=512, batch_size=16, epochs=15, lr=1e-06, init_checkpoint=None, train_data='./data/train_data.csv', validation_data='./data/validation.csv', eval_data='./data/test.csv', result='./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/')


Some weights of RobertaModel were not initialized from the model checkpoint at ./ruRoberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Text: <s>–î–∂–µ–π–º—Å ¬´–ë–∞–¥–¥–∏¬ª –ú–∞–∫–≥–∏—Ä—Ç (James (Buddy) McGirt, —Ç—Ä–µ–Ω–µ—Ä –î–∞–¥–∞—à–µ–≤–∞ —É–ø—Ä–∞—à–∏–≤–∞–ª –¥–∞–≥–µ—Å—Ç–∞–Ω—Å–∫–æ–≥–æ [ENTITY:PROFESSION] —Å–ø–æ—Ä—Ç—Å–º–µ–Ω–∞ [/ENTITY:PROFESSION] –æ—Å—Ç–∞–Ω–æ–≤–∏—Ç—å –±–æ–π, –Ω–æ —Ç–æ—Ç —Ö–æ—Ç–µ–ª –ø—Ä–æ–¥–æ–ª–∂–∞—Ç—å.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Label: 0
--------------------

Epoch 1 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:50<03:15,  1.69it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7907
F1-macro: 0.2782
F1-pn: 0.0000
             label  precision  recall  f1-score  support
                 0     71.628 100.000    83.469    616.0
                 1      0.000   0.000     0.000    111.0
                 2      0.000   0.000     0.000    133.0
          accuracy     71.628     NaN       NaN      NaN
         macro avg     23.876  33.333    27.823    860.0
      weighted avg     51.306  71.628    59.787    860.0
avg f1 (class 1&2)        NaN     NaN     0.000      NaN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:59<17:31,  3.20s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.1391
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.7907
F1-macro: 0.2782
F1-pn: 0.0000


Epoch 1 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:49<02:23,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7354
F1-macro: 0.2901
F1-pn: 0.0174
             label  precision  recall  f1-score  support
                 0     71.846  99.838    83.560    616.0
                 1     50.000   1.802     3.478    111.0
                 2      0.000   0.000     0.000    133.0
          accuracy     71.744     NaN       NaN      NaN
         macro avg     40.615  33.880    29.013    860.0
      weighted avg     57.915  71.744    60.301    860.0
avg f1 (class 1&2)        NaN     NaN     1.739      NaN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:59<14:53,  3.66s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.1538
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.7354
F1-macro: 0.2901
F1-pn: 0.0174


Epoch 1 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:22<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 1 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:42<01:33,  1.71it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7537
F1-macro: 0.3003
F1-pn: 0.0331
             label  precision  recall  f1-score  support
                 0     71.864  99.513    83.458    616.0
                 1     50.000   2.703     5.128    111.0
                 2    100.000   0.752     1.493    133.0
          accuracy     71.744     NaN       NaN      NaN
         macro avg     73.955  34.323    30.026    860.0
      weighted avg     73.393  71.744    60.672    860.0
avg f1 (class 1&2)        NaN     NaN     3.310      NaN


Epoch 1 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:53<09:42,  3.67s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.1667
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.7537
F1-macro: 0.3003
F1-pn: 0.0331


Epoch 1 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:42<00:44,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.7015
F1-macro: 0.3489
F1-pn: 0.1043
             label  precision  recall  f1-score  support
                 0     73.012  98.377    83.817    616.0
                 1      0.000   0.000     0.000    111.0
                 2     56.667  12.782    20.859    133.0
          accuracy     72.442     NaN       NaN      NaN
         macro avg     43.226  37.053    34.892    860.0
      weighted avg     61.061  72.442    63.263    860.0
avg f1 (class 1&2)        NaN     NaN    10.429      NaN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Epoch 1 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:53<04:33,  3.70s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.2266
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.7015
F1-macro: 0.3489
F1-pn: 0.1043


Epoch 1 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:37<00:00,  1.23it/s]
INFO:root:Epoch 1, Loss: 0.8886691056102155, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.7634
F1-macro: 0.4279
F1-pn: 0.2190
             label  precision  recall  f1-score  support
                 0     75.315  96.376    84.554   4774.0
                 1     69.767   3.505     6.674    856.0
                 2     57.113  27.507    37.131   1007.0
          accuracy     73.949     NaN       NaN      NaN
         macro avg     67.399  42.463    42.786   6637.0
      weighted avg     71.838  73.949    67.314   6637.0
avg f1 (class 1&2)        NaN     NaN    21.903      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6707
F1-macro: 0.4341
F1-pn: 0.2285
             label  precision  recall  f1-score  support
                 0     75.349  96.266    84.533    616.0
                 1     80.000   3.604     6.897    111.0
                 2     57.353  29.323    38.806    133.0
          accuracy     73.953     NaN       

Epoch 2 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:50<03:14,  1.69it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6535
F1-macro: 0.4757
F1-pn: 0.2923
             label  precision  recall  f1-score  support
                 0     75.911  94.643    84.249    616.0
                 1     66.667  12.613    21.212    111.0
                 2     53.521  28.571    37.255    133.0
          accuracy     73.837     NaN       NaN      NaN
         macro avg     65.366  45.276    47.572    860.0
      weighted avg     71.256  73.837    68.845    860.0
avg f1 (class 1&2)        NaN     NaN    29.234      NaN


Epoch 2 - Batches:  21%|‚ñà‚ñà        | 86/415 [01:00<19:54,  3.63s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.3840
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.6535
F1-macro: 0.4757
F1-pn: 0.2923


Epoch 2 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:50<02:24,  1.69it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6403
F1-macro: 0.5837
F1-pn: 0.4509
             label  precision  recall  f1-score  support
                 0     80.612  89.773    84.946    616.0
                 1     59.259  28.829    38.788    111.0
                 2     54.167  48.872    51.383    133.0
          accuracy     75.581     NaN       NaN      NaN
         macro avg     64.679  55.825    58.373    860.0
      weighted avg     73.766  75.581    73.798    860.0
avg f1 (class 1&2)        NaN     NaN    45.086      NaN


Epoch 2 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [02:01<15:01,  3.69s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5173
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.6403
F1-macro: 0.5837
F1-pn: 0.4509


Epoch 2 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:23<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 2 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:44<01:34,  1.70it/s]

evaluate, backprop: Test


Epoch 2 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:51<07:08,  2.70s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6007
F1-macro: 0.5540
F1-pn: 0.4072
             label  precision  recall  f1-score  support
                 0     79.155  91.234    84.766    616.0
                 1     70.370  17.117    27.536    111.0
                 2     56.098  51.880    53.906    133.0
          accuracy     75.581     NaN       NaN      NaN
         macro avg     68.541  53.410    55.403    860.0
      weighted avg     74.455  75.581    72.607    860.0
avg f1 (class 1&2)        NaN     NaN    40.721      NaN


Epoch 2 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:41<00:44,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5758
F1-macro: 0.5864
F1-pn: 0.4552
             label  precision  recall  f1-score  support
                 0     80.495  89.773    84.881    616.0
                 1     67.568  22.523    33.784    111.0
                 2     56.618  57.895    57.249    133.0
          accuracy     76.163     NaN       NaN      NaN
         macro avg     68.227  56.730    58.638    860.0
      weighted avg     75.134  76.163    74.013    860.0
avg f1 (class 1&2)        NaN     NaN    45.516      NaN


Epoch 2 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:51<04:31,  3.66s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5208
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.5758
F1-macro: 0.5864
F1-pn: 0.4552


Epoch 2 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:35<00:00,  1.24it/s]
INFO:root:Epoch 2, Loss: 0.7446353566933828, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.6149
F1-macro: 0.6414
F1-pn: 0.5307
             label  precision  recall  f1-score  support
                 0     84.705  87.935    86.290   4774.0
                 1     69.600  30.491    42.405    856.0
                 2     56.432  73.188    63.727   1007.0
          accuracy     78.288     NaN       NaN      NaN
         macro avg     70.246  63.871    64.140   6637.0
      weighted avg     78.467  78.288    77.206   6637.0
avg f1 (class 1&2)        NaN     NaN    53.066      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5817
F1-macro: 0.6226
F1-pn: 0.5075
             label  precision  recall  f1-score  support
                 0     84.530  86.039    85.278    616.0
                 1     69.767  27.027    38.961    111.0
                 2     53.158  75.940    62.539    133.0
          accuracy     76.860     NaN       

Epoch 3 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.70it/s]

evaluate, backprop: Test


Epoch 3 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:57<14:45,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5531
F1-macro: 0.6177
F1-pn: 0.4989
             label  precision  recall  f1-score  support
                 0     82.186  89.123    85.514    616.0
                 1     70.455  27.928    40.000    111.0
                 2     56.757  63.158    59.786    133.0
          accuracy     77.209     NaN       NaN      NaN
         macro avg     69.799  60.070    61.767    860.0
      weighted avg     76.739  77.209    75.661    860.0
avg f1 (class 1&2)        NaN     NaN    49.893      NaN


Epoch 3 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:47<02:23,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5332
F1-macro: 0.6355
F1-pn: 0.5243
             label  precision  recall  f1-score  support
                 0     83.436  88.312    85.804    616.0
                 1     62.687  37.838    47.191    111.0
                 2     56.028  59.398    57.664    133.0
          accuracy     77.326     NaN       NaN      NaN
         macro avg     67.384  61.849    63.553    860.0
      weighted avg     76.519  77.326    76.469    860.0
avg f1 (class 1&2)        NaN     NaN    52.428      NaN


Epoch 3 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:57<14:59,  3.69s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5799
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.5332
F1-macro: 0.6355
F1-pn: 0.5243


Epoch 3 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:20<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 3 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:40<01:34,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5276
F1-macro: 0.6424
F1-pn: 0.5327
             label  precision  recall  f1-score  support
                 0     83.107  89.448    86.161    616.0
                 1     63.636  37.838    47.458    111.0
                 2     59.542  58.647    59.091    133.0
          accuracy     78.023     NaN       NaN      NaN
         macro avg     68.762  61.978    64.237    860.0
      weighted avg     76.950  78.023    76.979    860.0
avg f1 (class 1&2)        NaN     NaN    53.274      NaN


Epoch 3 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:51<09:45,  3.68s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.5876
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.5276
F1-macro: 0.6424
F1-pn: 0.5327


Epoch 3 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:40<00:44,  1.70it/s]

evaluate, backprop: Test


Epoch 3 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:48<03:19,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5064
F1-macro: 0.6427
F1-pn: 0.5320
             label  precision  recall  f1-score  support
                 0     83.714  89.286    86.410    616.0
                 1     78.571  29.730    43.137    111.0
                 2     57.764  69.925    63.265    133.0
          accuracy     78.605     NaN       NaN      NaN
         macro avg     73.350  62.980    64.271    860.0
      weighted avg     79.037  78.605    77.245    860.0
avg f1 (class 1&2)        NaN     NaN    53.201      NaN


Epoch 3 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:31<00:00,  1.25it/s]
INFO:root:Epoch 3, Loss: 0.616292806400592, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.4999
F1-macro: 0.7328
F1-pn: 0.6542
             label  precision  recall  f1-score  support
                 0     88.672  89.359    89.014   4774.0
                 1     68.462  51.986    59.097    856.0
                 2     66.582  77.756    71.736   1007.0
          accuracy     82.778     NaN       NaN      NaN
         macro avg     74.572  73.034    73.282   6637.0
      weighted avg     82.714  82.778    82.534   6637.0
avg f1 (class 1&2)        NaN     NaN    65.417      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5090
F1-macro: 0.7023
F1-pn: 0.6178
             label  precision  recall  f1-score  support
                 0     88.591  85.714    87.129    616.0
                 1     65.432  47.748    55.208    111.0
                 2     59.016  81.203    68.354    133.0
          accuracy     80.116     NaN       

Epoch 4 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:14,  1.70it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:57<14:45,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5236
F1-macro: 0.7004
F1-pn: 0.6181
             label  precision  recall  f1-score  support
                 0     89.565  83.604    86.482    616.0
                 1     58.654  54.955    56.744    111.0
                 2     58.011  78.947    66.879    133.0
          accuracy     79.186     NaN       NaN      NaN
         macro avg     68.743  72.502    70.035    860.0
      weighted avg     80.696  79.186    79.612    860.0
avg f1 (class 1&2)        NaN     NaN    61.812      NaN


Epoch 4 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:46<02:23,  1.70it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:54<10:57,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4857
F1-macro: 0.6929
F1-pn: 0.6027
             label  precision  recall  f1-score  support
                 0     86.838  87.825    87.328    616.0
                 1     67.105  45.946    54.545    111.0
                 2     60.248  72.932    65.986    133.0
          accuracy     80.116     NaN       NaN      NaN
         macro avg     71.397  68.901    69.287    860.0
      weighted avg     80.179  80.116    79.797    860.0
avg f1 (class 1&2)        NaN     NaN    60.266      NaN


Epoch 4 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:16<02:01,  1.70it/s]

evaluate, backprop: Validation


Epoch 4 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:37<01:34,  1.70it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:44<07:08,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4832
F1-macro: 0.6792
F1-pn: 0.5831
             label  precision  recall  f1-score  support
                 0     85.714  88.636    87.151    616.0
                 1     66.667  41.441    51.111    111.0
                 2     61.039  70.677    65.505    133.0
          accuracy     79.767     NaN       NaN      NaN
         macro avg     71.140  66.918    67.922    860.0
      weighted avg     79.440  79.767    79.152    860.0
avg f1 (class 1&2)        NaN     NaN    58.308      NaN


Epoch 4 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:34<00:44,  1.70it/s]

evaluate, backprop: Test


Epoch 4 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:41<03:19,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4775
F1-macro: 0.6942
F1-pn: 0.6045
             label  precision  recall  f1-score  support
                 0     87.725  87.013    87.368    616.0
                 1     62.963  45.946    53.125    111.0
                 2     60.714  76.692    67.774    133.0
          accuracy     80.116     NaN       NaN      NaN
         macro avg     70.467  69.884    69.422    860.0
      weighted avg     80.352  80.116    79.918    860.0
avg f1 (class 1&2)        NaN     NaN    60.450      NaN


Epoch 4 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:25<00:00,  1.28it/s]
INFO:root:Epoch 4, Loss: 0.513326581881707, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.4083
F1-macro: 0.7897
F1-pn: 0.7294
             label  precision  recall  f1-score  support
                 0     91.422  90.637    91.028   4774.0
                 1     74.825  62.500    68.109    856.0
                 2     71.825  84.806    77.778   1007.0
          accuracy     86.123     NaN       NaN      NaN
         macro avg     79.357  79.314    78.972   6637.0
      weighted avg     86.308  86.123    86.061   6637.0
avg f1 (class 1&2)        NaN     NaN    72.944      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4730
F1-macro: 0.7093
F1-pn: 0.6266
             label  precision  recall  f1-score  support
                 0     88.796  86.201    87.479    616.0
                 1     65.169  52.252    58.000    111.0
                 2     59.538  77.444    67.320    133.0
          accuracy     80.465     NaN       

Epoch 5 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5017
F1-macro: 0.7153
F1-pn: 0.6400
             label  precision  recall  f1-score  support
                 0     91.351  82.305    86.593    616.0
                 1     61.818  61.261    61.538    111.0
                 2     55.897  81.955    66.463    133.0
          accuracy     79.535     NaN       NaN      NaN
         macro avg     69.689  75.174    71.532    860.0
      weighted avg     82.057  79.535    80.246    860.0
avg f1 (class 1&2)        NaN     NaN    64.001      NaN


Epoch 5 - Batches:  21%|‚ñà‚ñà        | 86/415 [01:00<20:21,  3.71s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6777
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.5017
F1-macro: 0.7153
F1-pn: 0.6400


Epoch 5 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:50<02:23,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4597
F1-macro: 0.7265
F1-pn: 0.6502
             label  precision  recall  f1-score  support
                 0     89.149  86.688    87.901    616.0
                 1     67.347  59.459    63.158    111.0
                 2     60.736  74.436    66.892    133.0
          accuracy     81.279     NaN       NaN      NaN
         macro avg     72.411  73.528    72.650    860.0
      weighted avg     81.941  81.279    81.458    860.0
avg f1 (class 1&2)        NaN     NaN    65.025      NaN


Epoch 5 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [02:01<15:00,  3.69s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6884
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.4597
F1-macro: 0.7265
F1-pn: 0.6502


Epoch 5 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:23<02:00,  1.71it/s]

evaluate, backprop: Validation


Epoch 5 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:43<01:34,  1.70it/s]

evaluate, backprop: Test


Epoch 5 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:51<07:11,  2.71s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4583
F1-macro: 0.7058
F1-pn: 0.6160
             label  precision  recall  f1-score  support
                 0     86.287  90.909    88.538    616.0
                 1     67.105  45.946    54.545    111.0
                 2     68.148  69.173    68.657    133.0
          accuracy     81.744     NaN       NaN      NaN
         macro avg     73.847  68.676    70.580    860.0
      weighted avg     81.006  81.744    81.076    860.0
avg f1 (class 1&2)        NaN     NaN    61.601      NaN


Epoch 5 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:41<00:44,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4565
F1-macro: 0.7325
F1-pn: 0.6561
             label  precision  recall  f1-score  support
                 0     89.404  87.662    88.525    616.0
                 1     69.149  58.559    63.415    111.0
                 2     61.728  75.188    67.797    133.0
          accuracy     81.977     NaN       NaN      NaN
         macro avg     73.427  73.803    73.245    860.0
      weighted avg     82.510  81.977    82.078    860.0
avg f1 (class 1&2)        NaN     NaN    65.606      NaN


Epoch 5 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:53<05:07,  4.16s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6943
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.4565
F1-macro: 0.7325
F1-pn: 0.6561


Epoch 5 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:37<00:00,  1.23it/s]
INFO:root:Epoch 5, Loss: 0.4172841107091272, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.3115
F1-macro: 0.8562
F1-pn: 0.8165
             label  precision  recall  f1-score  support
                 0     94.951  92.187    93.549   4774.0
                 1     74.231  81.776    77.821    856.0
                 2     83.381  87.686    85.479   1007.0
          accuracy     90.161     NaN       NaN      NaN
         macro avg     84.188  87.216    85.616   6637.0
      weighted avg     90.523  90.161    90.296   6637.0
avg f1 (class 1&2)        NaN     NaN    81.650      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4632
F1-macro: 0.7339
F1-pn: 0.6615
             label  precision  recall  f1-score  support
                 0     89.966  85.877    87.874    616.0
                 1     62.500  67.568    64.935    111.0
                 2     63.158  72.180    67.368    133.0
          accuracy     81.395     NaN       

Epoch 6 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.71it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4696
F1-macro: 0.7349
F1-pn: 0.6629
             label  precision  recall  f1-score  support
                 0     91.115  84.903    87.899    616.0
                 1     57.971  72.072    64.257    111.0
                 2     64.865  72.180    68.327    133.0
          accuracy     81.279     NaN       NaN      NaN
         macro avg     71.317  76.385    73.495    860.0
      weighted avg     82.777  81.279    81.821    860.0
avg f1 (class 1&2)        NaN     NaN    66.292      NaN


Epoch 6 - Batches:  21%|‚ñà‚ñà        | 86/415 [01:00<20:11,  3.68s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.6989
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.4696
F1-macro: 0.7349
F1-pn: 0.6629


Epoch 6 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:50<02:23,  1.71it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:57<10:56,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4616
F1-macro: 0.7254
F1-pn: 0.6483
             label  precision  recall  f1-score  support
                 0     90.119  85.877    87.947    616.0
                 1     65.714  62.162    63.889    111.0
                 2     58.929  74.436    65.781    133.0
          accuracy     81.047     NaN       NaN      NaN
         macro avg     71.587  74.158    72.539    860.0
      weighted avg     82.146  81.047    81.414    860.0
avg f1 (class 1&2)        NaN     NaN    64.835      NaN


Epoch 6 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:19<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 6 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:40<01:33,  1.70it/s]

evaluate, backprop: Test


Epoch 6 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:47<07:08,  2.70s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4632
F1-macro: 0.7331
F1-pn: 0.6561
             label  precision  recall  f1-score  support
                 0     88.780  88.636    88.708    616.0
                 1     69.474  59.459    64.078    111.0
                 2     63.333  71.429    67.138    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     73.863  73.175    73.308    860.0
      weighted avg     82.353  82.209    82.193    860.0
avg f1 (class 1&2)        NaN     NaN    65.608      NaN


Epoch 6 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:37<00:44,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4551
F1-macro: 0.7379
F1-pn: 0.6632
             label  precision  recall  f1-score  support
                 0     89.309  88.149    88.725    616.0
                 1     70.526  60.360    65.049    111.0
                 2     62.420  73.684    67.586    133.0
          accuracy     82.326     NaN       NaN      NaN
         macro avg     74.085  74.065    73.787    860.0
      weighted avg     82.727  82.326    82.400    860.0
avg f1 (class 1&2)        NaN     NaN    66.317      NaN


Epoch 6 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:48<04:31,  3.67s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.7005
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.4551
F1-macro: 0.7379
F1-pn: 0.6632


Epoch 6 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:31<00:00,  1.25it/s]
INFO:root:Epoch 6, Loss: 0.31694520773837365, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.2147
F1-macro: 0.9093
F1-pn: 0.8830
             label  precision  recall  f1-score  support
                 0     96.090  96.271    96.181   4774.0
                 1     87.653  83.762    85.663    856.0
                 2     89.672  92.254    90.945   1007.0
          accuracy     94.049     NaN       NaN      NaN
         macro avg     91.138  90.762    90.930   6637.0
      weighted avg     94.028  94.049    94.030   6637.0
avg f1 (class 1&2)        NaN     NaN    88.304      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4475
F1-macro: 0.7318
F1-pn: 0.6548
             label  precision  recall  f1-score  support
                 0     89.016  88.149    88.581    616.0
                 1     68.085  57.658    62.439    111.0
                 2     63.462  74.436    68.512    133.0
          accuracy     82.093     NaN       

Epoch 7 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4689
F1-macro: 0.7558
F1-pn: 0.6858
             label  precision  recall  f1-score  support
                 0     90.547  88.636    89.582    616.0
                 1     68.182  67.568    67.873    111.0
                 2     65.986  72.932    69.286    133.0
          accuracy     83.488     NaN       NaN      NaN
         macro avg     74.905  76.379    75.580    860.0
      weighted avg     83.862  83.488    83.641    860.0
avg f1 (class 1&2)        NaN     NaN    68.580      NaN


Epoch 7 - Batches:  21%|‚ñà‚ñà        | 86/415 [01:00<20:16,  3.70s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.7208
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.4689
F1-macro: 0.7558
F1-pn: 0.6858


Epoch 7 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:50<02:23,  1.70it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:57<10:55,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4751
F1-macro: 0.7393
F1-pn: 0.6653
             label  precision  recall  f1-score  support
                 0     89.850  87.662    88.743    616.0
                 1     70.526  60.360    65.049    111.0
                 2     61.585  75.940    68.013    133.0
          accuracy     82.326     NaN       NaN      NaN
         macro avg     73.987  74.654    73.935    860.0
      weighted avg     82.985  82.326    82.479    860.0
avg f1 (class 1&2)        NaN     NaN    66.531      NaN


Epoch 7 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:20<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 7 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:40<01:34,  1.70it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:48<07:09,  2.70s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4733
F1-macro: 0.7398
F1-pn: 0.6657
             label  precision  recall  f1-score  support
                 0     89.456  88.149    88.798    616.0
                 1     65.138  63.964    64.545    111.0
                 2     65.972  71.429    68.592    133.0
          accuracy     82.442     NaN       NaN      NaN
         macro avg     73.522  74.514    73.979    860.0
      weighted avg     82.686  82.442    82.543    860.0
avg f1 (class 1&2)        NaN     NaN    66.569      NaN


Epoch 7 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:37<00:44,  1.70it/s]

evaluate, backprop: Test


Epoch 7 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:45<03:20,  2.70s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4771
F1-macro: 0.7417
F1-pn: 0.6696
             label  precision  recall  f1-score  support
                 0     90.236  87.013    88.595    616.0
                 1     64.655  67.568    66.079    111.0
                 2     64.000  72.180    67.845    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     72.964  75.587    74.173    860.0
      weighted avg     82.877  82.209    82.480    860.0
avg f1 (class 1&2)        NaN     NaN    66.962      NaN


Epoch 7 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:28<00:00,  1.26it/s]
INFO:root:Epoch 7, Loss: 0.21603561976408384, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.1255
F1-macro: 0.9560
F1-pn: 0.9431
             label  precision  recall  f1-score  support
                 0     98.299  98.052    98.175   4774.0
                 1     92.865  92.757    92.811    856.0
                 2     95.196  96.425    95.807   1007.0
          accuracy     97.122     NaN       NaN      NaN
         macro avg     95.454  95.745    95.598   6637.0
      weighted avg     97.127  97.122    97.124   6637.0
avg f1 (class 1&2)        NaN     NaN    94.309      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.4768
F1-macro: 0.7387
F1-pn: 0.6645
             label  precision  recall  f1-score  support
                 0     88.907  88.474    88.690    616.0
                 1     68.224  65.766    66.972    111.0
                 2     64.286  67.669    65.934    133.0
          accuracy     82.326     NaN       

Epoch 8 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.70it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:57<14:45,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.4981
F1-macro: 0.7383
F1-pn: 0.6640
             label  precision  recall  f1-score  support
                 0     90.117  87.338    88.706    616.0
                 1     63.158  64.865    64.000    111.0
                 2     65.101  72.932    68.794    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     72.792  75.045    73.833    860.0
      weighted avg     82.769  82.209    82.438    860.0
avg f1 (class 1&2)        NaN     NaN    66.397      NaN


Epoch 8 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:46<02:23,  1.70it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:54<10:56,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5065
F1-macro: 0.7461
F1-pn: 0.6723
             label  precision  recall  f1-score  support
                 0     89.431  89.286    89.358    616.0
                 1     69.000  62.162    65.403    111.0
                 2     66.207  72.180    69.065    133.0
          accuracy     83.140     NaN       NaN      NaN
         macro avg     74.879  74.543    74.609    860.0
      weighted avg     83.202  83.140    83.128    860.0
avg f1 (class 1&2)        NaN     NaN    67.234      NaN


Epoch 8 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:16<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 8 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:37<01:33,  1.70it/s]

evaluate, backprop: Test


Epoch 8 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:44<07:07,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5043
F1-macro: 0.7432
F1-pn: 0.6671
             label  precision  recall  f1-score  support
                 0     88.694  90.422    89.550    616.0
                 1     72.414  56.757    63.636    111.0
                 2     66.897  72.932    69.784    133.0
          accuracy     83.372     NaN       NaN      NaN
         macro avg     76.002  73.370    74.323    860.0
      weighted avg     83.222  83.372    83.148    860.0
avg f1 (class 1&2)        NaN     NaN    66.710      NaN


Epoch 8 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:33<00:44,  1.70it/s]

evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5168
F1-macro: 0.7565
F1-pn: 0.6886
             label  precision  recall  f1-score  support
                 0     90.349  88.149    89.236    616.0
                 1     66.942  72.973    69.828    111.0
                 2     66.667  69.173    67.897    133.0
          accuracy     83.256     NaN       NaN      NaN
         macro avg     74.653  76.765    75.653    860.0
      weighted avg     83.666  83.256    83.431    860.0
avg f1 (class 1&2)        NaN     NaN    68.862      NaN


Epoch 8 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:44<04:33,  3.69s/it]

[Checkpoint] üéØ –ù–æ–≤—ã–π –ª—É—á—à–∏–π —Å—Ä–µ–¥–Ω–∏–π F1: 0.7226
[Checkpoint] üíæ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: ./result_lr_1e-06_ENS_Teg_21_multi_dropout_V2_T_NotToken/best_model.pth
Loss: 0.5168
F1-macro: 0.7565
F1-pn: 0.6886


Epoch 8 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:28<00:00,  1.26it/s]
INFO:root:Epoch 8, Loss: 0.12099704800779561, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0667
F1-macro: 0.9810
F1-pn: 0.9753
             label  precision  recall  f1-score  support
                 0     99.516  98.974    99.244   4774.0
                 1     97.427  97.313    97.370    856.0
                 2     96.422  99.007    97.697   1007.0
          accuracy     98.765     NaN       NaN      NaN
         macro avg     97.788  98.431    98.104   6637.0
      weighted avg     98.777  98.765    98.768   6637.0
avg f1 (class 1&2)        NaN     NaN    97.534      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5228
F1-macro: 0.7429
F1-pn: 0.6685
             label  precision  recall  f1-score  support
                 0     90.199  88.149    89.163    616.0
                 1     71.134  62.162    66.346    111.0
                 2     61.491  74.436    67.347    133.0
          accuracy     82.674     NaN       

Epoch 9 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.70it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:57<14:45,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5708
F1-macro: 0.7501
F1-pn: 0.6813
             label  precision  recall  f1-score  support
                 0     91.111  86.526    88.759    616.0
                 1     62.992  72.072    67.227    111.0
                 2     65.541  72.932    69.039    133.0
          accuracy     82.558     NaN       NaN      NaN
         macro avg     73.215  77.177    75.008    860.0
      weighted avg     83.527  82.558    82.930    860.0
avg f1 (class 1&2)        NaN     NaN    68.133      NaN


Epoch 9 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:46<02:24,  1.70it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:54<10:56,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5681
F1-macro: 0.7376
F1-pn: 0.6644
             label  precision  recall  f1-score  support
                 0     90.339  86.526    88.391    616.0
                 1     65.766  65.766    65.766    111.0
                 2     61.635  73.684    67.123    133.0
          accuracy     81.860     NaN       NaN      NaN
         macro avg     72.580  75.325    73.760    860.0
      weighted avg     82.728  81.860    82.182    860.0
avg f1 (class 1&2)        NaN     NaN    66.445      NaN


Epoch 9 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:16<02:01,  1.70it/s]

evaluate, backprop: Validation


Epoch 9 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:37<01:33,  1.70it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:44<07:07,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5780
F1-macro: 0.7389
F1-pn: 0.6644
             label  precision  recall  f1-score  support
                 0     88.925  88.636    88.780    616.0
                 1     69.149  58.559    63.415    111.0
                 2     65.132  74.436    69.474    133.0
          accuracy     82.558     NaN       NaN      NaN
         macro avg     74.402  73.877    73.890    860.0
      weighted avg     82.693  82.558    82.521    860.0
avg f1 (class 1&2)        NaN     NaN    66.444      NaN


Epoch 9 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:33<00:44,  1.70it/s]

evaluate, backprop: Test


Epoch 9 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:41<03:19,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.5655
F1-macro: 0.7550
F1-pn: 0.6889
             label  precision  recall  f1-score  support
                 0     89.983  87.500    88.724    616.0
                 1     68.142  69.369    68.750    111.0
                 2     65.541  72.932    69.039    133.0
          accuracy     82.907     NaN       NaN      NaN
         macro avg     74.555  76.601    75.504    860.0
      weighted avg     83.384  82.907    83.102    860.0
avg f1 (class 1&2)        NaN     NaN    68.895      NaN


Epoch 9 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:24<00:00,  1.28it/s]
INFO:root:Epoch 9, Loss: 0.0596986051749842, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0298
F1-macro: 0.9960
F1-pn: 0.9949
             label  precision  recall  f1-score  support
                 0     99.895  99.707    99.801   4774.0
                 1     99.302  99.766    99.534    856.0
                 2     99.209  99.702    99.455   1007.0
          accuracy     99.714     NaN       NaN      NaN
         macro avg     99.469  99.725    99.597   6637.0
      weighted avg     99.715  99.714    99.714   6637.0
avg f1 (class 1&2)        NaN     NaN    99.494      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.5639
F1-macro: 0.7550
F1-pn: 0.6845
             label  precision  recall  f1-score  support
                 0     89.739  89.448    89.593    616.0
                 1     71.717  63.964    67.619    111.0
                 2     65.986  72.932    69.286    133.0
          accuracy     83.605     NaN       

Epoch 10 - Batches:  20%|‚ñà‚ñà        | 85/415 [00:49<03:13,  1.70it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  21%|‚ñà‚ñà        | 86/415 [00:57<14:48,  2.70s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6261
F1-macro: 0.7474
F1-pn: 0.6779
             label  precision  recall  f1-score  support
                 0     91.237  86.201    88.648    616.0
                 1     65.000  70.270    67.532    111.0
                 2     62.658  74.436    68.041    133.0
          accuracy     82.326     NaN       NaN      NaN
         macro avg     72.965  76.969    74.740    860.0
      weighted avg     83.431  82.326    82.736    860.0
avg f1 (class 1&2)        NaN     NaN    67.787      NaN


Epoch 10 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 170/415 [01:46<02:23,  1.70it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  41%|‚ñà‚ñà‚ñà‚ñà      | 171/415 [01:54<10:57,  2.69s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6341
F1-macro: 0.7424
F1-pn: 0.6704
             label  precision  recall  f1-score  support
                 0     89.967  87.338    88.633    616.0
                 1     65.766  65.766    65.766    111.0
                 2     64.238  72.932    68.310    133.0
          accuracy     82.326     NaN       NaN      NaN
         macro avg     73.324  75.345    74.236    860.0
      weighted avg     82.864  82.326    82.538    860.0
avg f1 (class 1&2)        NaN     NaN    67.038      NaN


Epoch 10 - Batches:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 209/415 [02:16<02:00,  1.70it/s]

evaluate, backprop: Validation


Epoch 10 - Batches:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 255/415 [03:45<01:49,  1.46it/s]  

evaluate, backprop: Test


Epoch 10 - Batches:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 256/415 [03:54<08:17,  3.13s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6430
F1-macro: 0.7357
F1-pn: 0.6603
             label  precision  recall  f1-score  support
                 0     89.163  88.149    88.653    616.0
                 1     70.968  59.459    64.706    111.0
                 2     62.025  73.684    67.354    133.0
          accuracy     82.209     NaN       NaN      NaN
         macro avg     74.052  73.764    73.571    860.0
      weighted avg     82.617  82.209    82.268    860.0
avg f1 (class 1&2)        NaN     NaN    66.030      NaN


Epoch 10 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 340/415 [04:51<00:49,  1.50it/s]

evaluate, backprop: Test


Epoch 10 - Batches:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 341/415 [04:59<03:47,  3.07s/it]


----------------------------------------
Result evaluate in Test
Loss: 0.6446
F1-macro: 0.7411
F1-pn: 0.6675
             label  precision  recall  f1-score  support
                 0     89.198  88.474    88.835    616.0
                 1     66.355  63.964    65.138    111.0
                 2     66.197  70.677    68.364    133.0
          accuracy     82.558     NaN       NaN      NaN
         macro avg     73.917  74.372    74.112    860.0
      weighted avg     82.693  82.558    82.610    860.0
avg f1 (class 1&2)        NaN     NaN    66.751      NaN


Epoch 10 - Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [05:45<00:00,  1.20it/s]
INFO:root:Epoch 10, Loss: 0.025798580802539477, Step: 415


evaluate, backprop: Train

----------------------------------------
Result evaluate in Train
Loss: 0.0141
F1-macro: 0.9981
F1-pn: 0.9975
             label  precision  recall  f1-score  support
                 0     99.958  99.895    99.927   4774.0
                 1     99.650  99.766    99.708    856.0
                 2     99.703  99.901    99.802   1007.0
          accuracy     99.879     NaN       NaN      NaN
         macro avg     99.770  99.854    99.812   6637.0
      weighted avg     99.880  99.879    99.879   6637.0
avg f1 (class 1&2)        NaN     NaN    99.755      NaN
evaluate, backprop: Test

----------------------------------------
Result evaluate in Test
Loss: 0.6726
F1-macro: 0.7427
F1-pn: 0.6712
             label  precision  recall  f1-score  support
                 0     91.081  86.201    88.574    616.0
                 1     65.789  67.568    66.667    111.0
                 2     61.350  75.188    67.568    133.0
          accuracy     82.093     NaN       

Epoch 11 - Batches:  18%|‚ñà‚ñä        | 76/415 [00:45<03:21,  1.68it/s]


KeyboardInterrupt: 

In [None]:
import pandas as pd

def load_and_display_metrics(metrics_file):
    # –ó–∞–≥—Ä—É–∂–∞–µ–º CSV-—Ñ–∞–π–ª
    df = pd.read_csv(metrics_file)
    df = df[df["backprop"] == "Test"]
    # –û–ø—Ä–µ–¥–µ–ª—è–µ–º –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —ç–ø–æ—Ö
    epochs = df["epoch"].unique()
    print(epochs)
    
    for epoch in epochs:
        # –§–∏–ª—å—Ç—Ä—É–µ–º –¥–∞–Ω–Ω—ã–µ –ø–æ —ç–ø–æ—Ö–µ
        epoch_df = df[df["epoch"] == epoch]
        
        for idx, row in epoch_df.iterrows():
            loss = row["loss"]
            backprop_value = row["backprop"]
            print(f"\nEpoch {epoch} (Backprop: {backprop_value}) (Loss: {loss})\n" + "-"*30)
            # –§–æ—Ä–º–∏—Ä—É–µ–º —Ç–∞–±–ª–∏—Ü—É –≤ —Å—Ç–∏–ª–µ classification_report
            table_data = {}
            class_labels = sorted(
                set(col.split("_")[1] for col in df.columns 
                    if col.startswith("class_") and "precision" in col)
            )
            
            # –§–∏–ª—å—Ç—Ä—É–µ–º —Ç–æ–ª—å–∫–æ —á–∏—Å–ª–æ–≤—ã–µ –º–µ—Ç–∫–∏ –∫–ª–∞—Å—Å–æ–≤
            class_labels = [label for label in class_labels if label.isdigit()]
            
            for label in class_labels:
                table_data[int(label)] = {
                    "precision": row[f"class_{label}_precision"],
                    "recall": row[f"class_{label}_recall"],
                    "f1-score": row[f"class_{label}_f1"],
                }
            
            # –î–æ–±–∞–≤–ª—è–µ–º —Å—Ä–µ–¥–Ω–∏–µ –∑–Ω–∞—á–µ–Ω–∏—è
            table_data["accuracy"] = {"precision": "", "recall": "", "f1-score": row["accuracy"] }
            table_data["macro avg"] = {
                "precision": row["macro_precision"],
                "recall": row["macro_recall"],
                "f1-score": row["macro_f1"],
            }
            table_data["weighted avg"] = {
                "precision": row["weighted_precision"],
                "recall": row["weighted_recall"],
                "f1-score": row["weighted_f1"],
            }
            
            # –í—ã–≤–æ–¥–∏–º —Ç–∞–±–ª–∏—Ü—É
            df_table = pd.DataFrame.from_dict(table_data, orient="index")
            print(df_table.to_string())

# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
metrics_file = "./result_lr_1e-06_test/metrics_class_weights.csv"
load_and_display_metrics(metrics_file)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# –ó–∞–≥—Ä—É–∑–∫–∞ CSV-—Ñ–∞–π–ª–∞
df = pd.read_csv("metrics_class_weights.csv")  # —É–∫–∞–∂–∏ –ø—É—Ç—å –∫ —Ñ–∞–π–ª—É, –µ—Å–ª–∏ –æ–Ω –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è

# –ü–æ—Å—Ç—Ä–æ–µ–Ω–∏–µ –≥—Ä–∞—Ñ–∏–∫–∞ loss
plt.figure(figsize=(10, 6))

for stage in ['Train', 'Validation', 'Test']:
    stage_data = df[df['backprop'] == stage]
    plt.plot(stage_data['epoch'], stage_data['loss'], label=stage)

plt.title('–ó–Ω–∞—á–µ–Ω–∏–µ Loss –ø–æ —ç–ø–æ—Ö–∞–º')
plt.xlabel('–≠–ø–æ—Ö–∞')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
