In [38]:
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score
from transformers import DistilBertModel, DistilBertTokenizerFast, get_linear_schedule_with_warmup
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import ast

In [39]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device

device(type='mps')

In [40]:
data_file = "../movie_dataset_prepared.csv"
df = pd.read_csv(data_file)

In [41]:
df['genres_list'] = df['genres_list'].apply(ast.literal_eval)
df['keywords_list'] = df['keywords_list'].apply(ast.literal_eval)

print(type(df['genres_list'][0]))
df.head()


<class 'list'>


Unnamed: 0,genres_list,keywords_list,overview
0,"[28, 12, 14, 878]","[9951, 209714]",in the 22nd century a paraplegic marine is dis...
1,"[12, 14, 28]",[179430],captain barbossa long believed to be dead has ...
2,"[28, 12, 80]","[818, 9663]",a cryptic message from bonds past sends him on...
3,"[28, 80, 18, 53]",[9715],following the death of district attorney harve...
4,"[28, 12, 878]","[818, 9951, 10685, 209714]",john carter is a warweary former military capt...


In [42]:
def clean_overview(x):
    if isinstance(x, str):
        return x
    if pd.isna(x):
        return ""
    return str(x)

df['overview'] = df['overview'].apply(clean_overview)

In [43]:
print(f"Przed usunieciem pustych keywords: {len(df)}")
df = df[df['keywords_list'].apply(lambda x: len(x) > 0)]
print(f"Po usunieciu pustych keywords: {len(df)}")

Przed usunieciem pustych keywords: 4683
Po usunieciu pustych keywords: 2720


In [44]:
texts = df['overview'].tolist() 
genre_labels = df['genres_list'].tolist()
keywords_labels = df['keywords_list'].tolist()

genre_labels, keywords_labels

([[28, 12, 14, 878],
  [12, 14, 28],
  [28, 12, 80],
  [28, 80, 18, 53],
  [28, 12, 878],
  [14, 28, 12],
  [16, 10751],
  [28, 12, 878],
  [12, 14, 10751],
  [28, 12, 14],
  [12, 14, 28, 878],
  [12, 14, 28],
  [28, 12],
  [28, 12, 14, 878],
  [12, 10751, 14],
  [878, 28, 12],
  [12, 28, 14],
  [28, 35, 878],
  [28, 12, 14],
  [28, 12, 14],
  [18, 10749, 53],
  [12, 28, 878],
  [53, 28, 12, 878],
  [28, 12, 878, 53],
  [28, 12, 14],
  [28, 12, 878],
  [10751, 14, 12],
  [12, 28, 878, 53],
  [16, 10751],
  [878, 28, 12],
  [878, 28, 12],
  [14, 12, 10751],
  [28, 12, 14],
  [12, 28, 878],
  [16, 10751, 12, 35],
  [12, 28, 53, 878],
  [16, 10751, 35],
  [28, 878, 53],
  [28],
  [28, 18, 27, 878, 53],
  [28, 12, 14, 878],
  [28, 12, 878],
  [18, 10749],
  [28, 878, 12],
  [28, 878, 12],
  [12, 16, 10751],
  [16, 12, 35, 10751, 28, 14],
  [28, 12, 878],
  [16, 18],
  [878, 14, 28, 12],
  [12, 10751, 14],
  [878],
  [18, 28, 80, 53],
  [16, 35, 10751, 12],
  [16, 10751, 12, 878],
  [28, 87

In [123]:
mlb = MultiLabelBinarizer()
keywords_labels_encoded = mlb.fit_transform(keywords_labels)
keywords_n_classes = len(mlb.classes_)

genres_labels_encoded = mlb.fit_transform(genre_labels)
genres_n_classes = len(mlb.classes_)

In [124]:
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

In [125]:
class MovieMultiTaskDataset(Dataset):
    def __init__(self, texts, genre_labels, keyword_labels, tokenizer, max_length=256):
        self.texts = texts
        self.genre_labels = genre_labels
        self.keyword_labels = keyword_labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        genre_label = self.genre_labels[idx]
        keyword_label = self.keyword_labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'genre_labels': torch.FloatTensor(genre_label),
            'keyword_labels': torch.FloatTensor(keyword_label)
        }

In [126]:
class BERTMultiTaskClassifier(nn.Module):
    def __init__(self, n_genres, n_keywords, dropout=0.3):
        super().__init__()
        self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.dropout = nn.Dropout(dropout)
        self.genre_classifier = nn.Linear(self.bert.config.hidden_size, n_genres)
        self.keyword_classifier = nn.Linear(self.bert.config.hidden_size, n_keywords)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]  # CLS token
        pooled_output = self.dropout(pooled_output)
        genre_logits = self.genre_classifier(pooled_output)
        keyword_logits = self.keyword_classifier(pooled_output)
        return genre_logits, keyword_logits

In [127]:
def train_epoch(model, data_loader, genre_criterion, keyword_criterion, optimizer, scheduler, device, genre_weight=1.0, keyword_weight=1.0):
    model.train()
    total_loss = 0

    for batch in tqdm(data_loader, desc='Training'):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        genre_labels = batch['genre_labels'].to(device)
        keyword_labels = batch['keyword_labels'].to(device)

        optimizer.zero_grad()
        genre_logits, keyword_logits = model(input_ids, attention_mask)

        loss = genre_weight * genre_criterion(genre_logits, genre_labels) + \
               keyword_weight * keyword_criterion(keyword_logits, keyword_labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

In [150]:
from sklearn.metrics import f1_score, precision_score, recall_score, hamming_loss, jaccard_score

@torch.no_grad()
def evaluate(model, data_loader, genre_criterion, keyword_criterion, device,
                       genre_thresh=0.3, keyword_thresh=0.5):
    model.eval()
    all_genre_preds, all_genre_labels = [], []
    all_keyword_preds, all_keyword_labels = [], []
    total_loss = 0

    for batch in tqdm(data_loader, desc='Evaluating'):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        genre_labels = batch['genre_labels'].to(device)
        keyword_labels = batch['keyword_labels'].to(device)

        genre_logits, keyword_logits = model(input_ids, attention_mask)
        loss = genre_criterion(genre_logits, genre_labels) + keyword_criterion(keyword_logits, keyword_labels)
        total_loss += loss.item()

        genre_preds = (torch.sigmoid(genre_logits) > genre_thresh).cpu().numpy()
        keyword_preds = (torch.sigmoid(keyword_logits) > keyword_thresh).cpu().numpy()

        all_genre_preds.extend(genre_preds)
        all_genre_labels.extend(genre_labels.cpu().numpy())
        all_keyword_preds.extend(keyword_preds)
        all_keyword_labels.extend(keyword_labels.cpu().numpy())

    all_genre_preds = np.array(all_genre_preds)
    all_genre_labels = np.array(all_genre_labels)
    all_keyword_preds = np.array(all_keyword_preds)
    all_keyword_labels = np.array(all_keyword_labels)

    metrics = {
        'loss': total_loss / len(data_loader),

        # Genre metrics
        'genre_f1_score': f1_score(all_genre_labels, all_genre_preds, average='samples', zero_division=0),
        'genre_f1_micro': f1_score(all_genre_labels, all_genre_preds, average='micro', zero_division=0),
        'genre_f1_macro': f1_score(all_genre_labels, all_genre_preds, average='macro', zero_division=0),
        'genre_precision_micro': precision_score(all_genre_labels, all_genre_preds, average='micro', zero_division=0),
        'genre_recall_micro': recall_score(all_genre_labels, all_genre_preds, average='micro', zero_division=0),
        'genre_hamming_loss': hamming_loss(all_genre_labels, all_genre_preds),
        'genre_hamming_accuracy': 1 -hamming_loss(all_genre_labels, all_genre_preds),
        'genre_jaccard_samples': jaccard_score(all_genre_labels, all_genre_preds, average='samples'),

        # Keyword metrics
        'keyword_f1_score': f1_score(all_keyword_labels, all_keyword_preds, average='samples', zero_division=0),
        'keyword_f1_micro': f1_score(all_keyword_labels, all_keyword_preds, average='micro', zero_division=0),
        'keyword_f1_macro': f1_score(all_keyword_labels, all_keyword_preds, average='macro', zero_division=0),
        'keyword_precision_micro': precision_score(all_keyword_labels, all_keyword_preds, average='micro', zero_division=0),
        'keyword_recall_micro': recall_score(all_keyword_labels, all_keyword_preds, average='micro', zero_division=0),
        'keyword_hamming_loss': hamming_loss(all_keyword_labels, all_keyword_preds),
        'keyword_hamming_accuracy': 1 - hamming_loss(all_keyword_labels, all_keyword_preds),
        'keyword_jaccard_samples': jaccard_score(all_keyword_labels, all_keyword_preds, average='samples')
    }

    return metrics


In [129]:
def train_model_multitask(
    texts,
    genre_labels_encoded,
    keyword_labels_encoded,
    genres_n_classes,
    keywords_n_classes,
    epochs=15,
    batch_size=16,
    learning_rate=3e-5,
    max_length=256,
    val_size=0.2,
    genre_threshold=0.3,
    keyword_threshold=0.5,
    model_name="multitask",
    patience=3
):
    # ===== CLASS WEIGHTS =====
    genre_pos_weight = torch.tensor((len(genre_labels_encoded) - genre_labels_encoded.sum(axis=0)) / np.clip(genre_labels_encoded.sum(axis=0), 1, None), dtype=torch.float).to(device)
    keyword_pos_weight = torch.tensor((len(keyword_labels_encoded) - keyword_labels_encoded.sum(axis=0)) / np.clip(keyword_labels_encoded.sum(axis=0), 1, None), dtype=torch.float).to(device)

    # ===== SPLIT =====
    X_train, X_val, y_genre_train, y_genre_val, y_keyword_train, y_keyword_val = train_test_split(
        texts, genre_labels_encoded, keyword_labels_encoded, test_size=val_size, random_state=42, shuffle=True
    )

    # ===== DATASETS =====
    train_loader = DataLoader(MovieMultiTaskDataset(X_train, y_genre_train, y_keyword_train, tokenizer, max_length), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(MovieMultiTaskDataset(X_val, y_genre_val, y_keyword_val, tokenizer, max_length), batch_size=batch_size)

    # ===== MODEL =====
    model = BERTMultiTaskClassifier(n_genres=genres_n_classes, n_keywords=keywords_n_classes).to(device)
    genre_criterion = nn.BCEWithLogitsLoss(pos_weight=genre_pos_weight)
    keyword_criterion = nn.BCEWithLogitsLoss(pos_weight=keyword_pos_weight)
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader)*epochs)

    history = {"train_loss": []}

    best_score = 0.0
    wait = 0

    for epoch in range(epochs):
        # ---- TRAIN ----
        train_loss = train_epoch(model, train_loader, genre_criterion, keyword_criterion, optimizer, scheduler, device)
        val_metrics = evaluate(model, val_loader, genre_criterion, keyword_criterion, device, genre_thresh=genre_threshold, keyword_thresh=keyword_threshold)

        # ---- AUTOMATYCZNE DODANIE WSZYSTKICH METRYK ----
        history["train_loss"].append(train_loss)
        for k, v in val_metrics.items():
            if k not in history:
                history[k] = []
            history[k].append(v)

        # ---- EARLY STOPPING ----
        score = 0.5 * (val_metrics['genre_f1_micro'] + val_metrics['keyword_f1_micro'])
        if score > best_score:
            best_score = score
            wait = 0
            torch.save(model.state_dict(), f"{model_name}.pth")
        else:
            wait += 1
            if wait >= patience:
                print(f"\nEarly stopping po {epoch+1} epokach (najlepszy wynik: {best_score:.4f})")
                break

        print(f"Epoch {epoch+1}: Loss={train_loss:.4f}, Genre F1={val_metrics['genre_f1_micro']:.4f}, Keyword F1={val_metrics['keyword_f1_micro']:.4f}")

    # ---- LOAD BEST MODEL ----
    model.load_state_dict(torch.load(f"{model_name}.pth"))

    return {
        "model": model,
        "history": history
    }


In [131]:
result = train_model_multitask(
    texts,
    genres_labels_encoded,
    keywords_labels_encoded,
    genres_n_classes,
    keywords_n_classes,
    epochs=1,
    batch_size=16,
    learning_rate=3e-5
)

model = result["model"]


Training: 100%|██████████| 136/136 [01:29<00:00,  1.51it/s]
Evaluating: 100%|██████████| 34/34 [00:06<00:00,  5.33it/s]


Epoch 1: Loss=2.3850, Genre F1=0.3103, Keyword F1=0.0917


## PARAM GRID

In [69]:
param_grid = {
    "batch_size": [8, 16],
    "learning_rate": [2e-5, 3e-5],
    "max_length": [128, 256],
    "epochs": [3],
    "genre_threshold": [0.3, 0.35],
    "keyword_threshold": [0.4, 0.5]
}

In [81]:
from itertools import product
import pandas as pd
import os

def param_grid_search_multitask(
    texts,
    genre_labels,
    keyword_labels,
    param_grid,
    subset_frac=0.2,  # <- nowy parametr, 1.0 = pełne dane
    save_csv_path="multitask_param_grid_results.csv",
    model_prefix="param_grid/multitask",
):
    keys = list(param_grid.keys())
    combinations = list(product(*param_grid.values()))

    results = []
    
    if subset_frac < 1.0:
        texts, texts_unused, genre_labels, genre_labels_unused, keyword_labels, keyword_labels_unused = train_test_split(
            texts, genre_labels, keyword_labels,
            test_size=1-subset_frac,
            random_state=42,
            shuffle=True
        )


    print(f"Running {len(combinations)} multitask configurations...\n")

    for i, values in enumerate(combinations, 1):
        config = dict(zip(keys, values))

        print("=" * 70)
        print(f"[{i}/{len(combinations)}] CONFIG: {config}")
        print("=" * 70)

        run = train_model_multitask(
            texts=texts,
            genre_labels=genre_labels,
            keyword_labels=keyword_labels,
            epochs=config["epochs"],
            batch_size=config["batch_size"],
            learning_rate=config["learning_rate"],
            max_length=config["max_length"],
            genre_threshold=config["genre_threshold"],
            keyword_threshold=config["keyword_threshold"],
            model_name=f"{model_prefix}_{i}"
        )

        history = run["history"]

        # Ostatnie metryki walidacyjne
        row = {
            **config,
            "genre_f1_micro": history["genre_f1_micro"][-1],
            "genre_f1_macro": history["genre_f1_macro"][-1],
            "keyword_f1_micro": history["keyword_f1_micro"][-1],
            "keyword_f1_macro": history["keyword_f1_macro"][-1],
        }

        row["mean_f1_micro"] = (
            row["genre_f1_micro"] + row["keyword_f1_micro"]
        ) / 2

        results.append(row)

        # zapis po KAŻDEJ konfiguracji (bezpieczne)
        pd.DataFrame(results).to_csv(save_csv_path, index=False)

        print(
            f"Genre F1: {row['genre_f1_micro']:.4f} | "
            f"Keyword F1: {row['keyword_f1_micro']:.4f} | "
            f"Mean F1: {row['mean_f1_micro']:.4f}\n"
        )

    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values("mean_f1_micro", ascending=False)

    print("\n===== TOP 5 CONFIGURATIONS =====")
    print(results_df.head())

    return results_df


In [72]:
results_df = param_grid_search_multitask(
    texts=texts,
    genre_labels=genre_labels,
    keyword_labels=keywords_labels,
    param_grid=param_grid,
    save_csv_path="param_grid/multitask_grid_results.csv"
)


Running 32 multitask configurations...

[1/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.17it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.90it/s]


Epoch 1: Loss 2.5080, Genre F1 0.2942, Keyword F1 0.0650


Training: 100%|██████████| 55/55 [00:10<00:00,  5.22it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.74it/s]


Epoch 2: Loss 2.3999, Genre F1 0.2947, Keyword F1 0.0670


Training: 100%|██████████| 55/55 [00:10<00:00,  5.23it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.74it/s]


Epoch 3: Loss 2.2821, Genre F1 0.2998, Keyword F1 0.0685
Genre F1: 0.2998 | Keyword F1: 0.0685 | Mean F1: 0.1842

[2/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.21it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.68it/s]


Epoch 1: Loss 2.5054, Genre F1 0.2942, Keyword F1 0.0679


Training: 100%|██████████| 55/55 [00:10<00:00,  5.20it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.77it/s]


Epoch 2: Loss 2.4347, Genre F1 0.2945, Keyword F1 0.0799


Training: 100%|██████████| 55/55 [00:10<00:00,  5.19it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.31it/s]


Epoch 3: Loss 2.3663, Genre F1 0.2975, Keyword F1 0.0778
Genre F1: 0.2975 | Keyword F1: 0.0778 | Mean F1: 0.1876

[3/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.13it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 21.63it/s]


Epoch 1: Loss 2.4942, Genre F1 0.2945, Keyword F1 0.0656


Training: 100%|██████████| 55/55 [00:10<00:00,  5.11it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 23.04it/s]


Epoch 2: Loss 2.3705, Genre F1 0.3023, Keyword F1 0.0684


Training: 100%|██████████| 55/55 [00:10<00:00,  5.04it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 21.53it/s]


Epoch 3: Loss 2.2568, Genre F1 0.3178, Keyword F1 0.0716
Genre F1: 0.3178 | Keyword F1: 0.0716 | Mean F1: 0.1947

[4/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.08it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 21.82it/s]


Epoch 1: Loss 2.5040, Genre F1 0.2954, Keyword F1 0.0860


Training: 100%|██████████| 55/55 [00:10<00:00,  5.03it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 21.77it/s]


Epoch 2: Loss 2.3577, Genre F1 0.3158, Keyword F1 0.0828


Training: 100%|██████████| 55/55 [00:10<00:00,  5.03it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 21.74it/s]


Epoch 3: Loss 2.2587, Genre F1 0.3253, Keyword F1 0.0809
Genre F1: 0.3253 | Keyword F1: 0.0809 | Mean F1: 0.2031

[5/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:19<00:00,  2.83it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.66it/s]


Epoch 1: Loss 2.4978, Genre F1 0.2942, Keyword F1 0.0659


Training: 100%|██████████| 55/55 [00:19<00:00,  2.84it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.33it/s]


Epoch 2: Loss 2.4133, Genre F1 0.2970, Keyword F1 0.0670


Training: 100%|██████████| 55/55 [00:19<00:00,  2.82it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.62it/s]


Epoch 3: Loss 2.3344, Genre F1 0.3002, Keyword F1 0.0685
Genre F1: 0.3002 | Keyword F1: 0.0685 | Mean F1: 0.1843

[6/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:19<00:00,  2.88it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.04it/s]


Epoch 1: Loss 2.5002, Genre F1 0.2942, Keyword F1 0.0660


Training: 100%|██████████| 55/55 [00:19<00:00,  2.80it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.51it/s]


Epoch 2: Loss 2.4065, Genre F1 0.2942, Keyword F1 0.0709


Training: 100%|██████████| 55/55 [00:19<00:00,  2.83it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.66it/s]


Epoch 3: Loss 2.2952, Genre F1 0.3007, Keyword F1 0.0715
Genre F1: 0.3007 | Keyword F1: 0.0715 | Mean F1: 0.1861

[7/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:18<00:00,  2.90it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.32it/s]


Epoch 1: Loss 2.4939, Genre F1 0.2954, Keyword F1 0.0665


Training: 100%|██████████| 55/55 [00:19<00:00,  2.84it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.17it/s]


Epoch 2: Loss 2.3660, Genre F1 0.3041, Keyword F1 0.0692


Training: 100%|██████████| 55/55 [00:18<00:00,  2.91it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.31it/s]


Epoch 3: Loss 2.2842, Genre F1 0.3129, Keyword F1 0.0700
Genre F1: 0.3129 | Keyword F1: 0.0700 | Mean F1: 0.1914

[8/32] CONFIG: {'batch_size': 8, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:19<00:00,  2.81it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.15it/s]


Epoch 1: Loss 2.5079, Genre F1 0.2942, Keyword F1 0.0733


Training: 100%|██████████| 55/55 [00:19<00:00,  2.87it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.75it/s]


Epoch 2: Loss 2.3947, Genre F1 0.3035, Keyword F1 0.0858


Training: 100%|██████████| 55/55 [00:19<00:00,  2.89it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.63it/s]


Epoch 3: Loss 2.2882, Genre F1 0.3151, Keyword F1 0.0890
Genre F1: 0.3151 | Keyword F1: 0.0890 | Mean F1: 0.2021

[9/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.19it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.32it/s]


Epoch 1: Loss 2.4917, Genre F1 0.2942, Keyword F1 0.0661


Training: 100%|██████████| 55/55 [00:10<00:00,  5.26it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 23.54it/s]


Epoch 2: Loss 2.3220, Genre F1 0.3130, Keyword F1 0.0721


Training: 100%|██████████| 55/55 [00:10<00:00,  5.29it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 23.51it/s]


Epoch 3: Loss 2.1419, Genre F1 0.3349, Keyword F1 0.0749
Genre F1: 0.3349 | Keyword F1: 0.0749 | Mean F1: 0.2049

[10/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.19it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.54it/s]


Epoch 1: Loss 2.5106, Genre F1 0.2942, Keyword F1 0.0739


Training: 100%|██████████| 55/55 [00:10<00:00,  5.20it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.49it/s]


Epoch 2: Loss 2.3168, Genre F1 0.3127, Keyword F1 0.0926


Training: 100%|██████████| 55/55 [00:10<00:00,  5.21it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 23.12it/s]


Epoch 3: Loss 2.1398, Genre F1 0.3306, Keyword F1 0.0982
Genre F1: 0.3306 | Keyword F1: 0.0982 | Mean F1: 0.2144

[11/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.22it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.53it/s]


Epoch 1: Loss 2.5154, Genre F1 0.2947, Keyword F1 0.0675


Training: 100%|██████████| 55/55 [00:10<00:00,  5.31it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.37it/s]


Epoch 2: Loss 2.3260, Genre F1 0.3275, Keyword F1 0.0715


Training: 100%|██████████| 55/55 [00:10<00:00,  5.21it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.56it/s]


Epoch 3: Loss 2.1564, Genre F1 0.3611, Keyword F1 0.0753
Genre F1: 0.3611 | Keyword F1: 0.0753 | Mean F1: 0.2182

[12/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:10<00:00,  5.28it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.57it/s]


Epoch 1: Loss 2.5005, Genre F1 0.3068, Keyword F1 0.0787


Training: 100%|██████████| 55/55 [00:10<00:00,  5.23it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 23.43it/s]


Epoch 2: Loss 2.2675, Genre F1 0.3916, Keyword F1 0.0999


Training: 100%|██████████| 55/55 [00:10<00:00,  5.22it/s]
Evaluating: 100%|██████████| 14/14 [00:00<00:00, 22.57it/s]


Epoch 3: Loss 2.0889, Genre F1 0.4060, Keyword F1 0.1075
Genre F1: 0.4060 | Keyword F1: 0.1075 | Mean F1: 0.2567

[13/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:18<00:00,  2.97it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.48it/s]


Epoch 1: Loss 2.5128, Genre F1 0.2942, Keyword F1 0.0664


Training: 100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.56it/s]


Epoch 2: Loss 2.3862, Genre F1 0.3107, Keyword F1 0.0694


Training: 100%|██████████| 55/55 [00:18<00:00,  2.96it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.91it/s]


Epoch 3: Loss 2.1733, Genre F1 0.3394, Keyword F1 0.0738
Genre F1: 0.3394 | Keyword F1: 0.0738 | Mean F1: 0.2066

[14/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:18<00:00,  2.94it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.95it/s]


Epoch 1: Loss 2.5053, Genre F1 0.2945, Keyword F1 0.0775


Training: 100%|██████████| 55/55 [00:18<00:00,  2.94it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.59it/s]


Epoch 2: Loss 2.2950, Genre F1 0.3281, Keyword F1 0.0904


Training: 100%|██████████| 55/55 [00:18<00:00,  2.95it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.11it/s]


Epoch 3: Loss 2.0932, Genre F1 0.3473, Keyword F1 0.0944
Genre F1: 0.3473 | Keyword F1: 0.0944 | Mean F1: 0.2209

[15/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 55/55 [00:18<00:00,  2.93it/s]
Evaluating: 100%|██████████| 14/14 [00:03<00:00,  4.53it/s]


Epoch 1: Loss 2.5044, Genre F1 0.2953, Keyword F1 0.0660


Training: 100%|██████████| 55/55 [00:19<00:00,  2.80it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.67it/s]


Epoch 2: Loss 2.3607, Genre F1 0.3532, Keyword F1 0.0747


Training: 100%|██████████| 55/55 [00:19<00:00,  2.87it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.37it/s]


Epoch 3: Loss 2.1766, Genre F1 0.3648, Keyword F1 0.0746
Genre F1: 0.3648 | Keyword F1: 0.0746 | Mean F1: 0.2197

[16/32] CONFIG: {'batch_size': 8, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 55/55 [00:19<00:00,  2.88it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 10.56it/s]


Epoch 1: Loss 2.4911, Genre F1 0.3005, Keyword F1 0.0753


Training: 100%|██████████| 55/55 [00:19<00:00,  2.84it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.21it/s]


Epoch 2: Loss 2.3180, Genre F1 0.3217, Keyword F1 0.0841


Training: 100%|██████████| 55/55 [00:19<00:00,  2.79it/s]
Evaluating: 100%|██████████| 14/14 [00:01<00:00, 11.33it/s]


Epoch 3: Loss 2.1551, Genre F1 0.3443, Keyword F1 0.0924
Genre F1: 0.3443 | Keyword F1: 0.0924 | Mean F1: 0.2183

[17/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.14it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00,  8.75it/s]


Epoch 1: Loss 2.5324, Genre F1 0.2942, Keyword F1 0.0657


Training: 100%|██████████| 28/28 [00:08<00:00,  3.17it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.13it/s]


Epoch 2: Loss 2.4528, Genre F1 0.2942, Keyword F1 0.0659


Training: 100%|██████████| 28/28 [00:08<00:00,  3.15it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.30it/s]


Epoch 3: Loss 2.4155, Genre F1 0.2942, Keyword F1 0.0658
Genre F1: 0.2942 | Keyword F1: 0.0658 | Mean F1: 0.1800

[18/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.19it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.30it/s]


Epoch 1: Loss 2.4904, Genre F1 0.2942, Keyword F1 0.0669


Training: 100%|██████████| 28/28 [00:08<00:00,  3.22it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.10it/s]


Epoch 2: Loss 2.4425, Genre F1 0.2944, Keyword F1 0.0765


Training: 100%|██████████| 28/28 [00:08<00:00,  3.17it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.23it/s]


Epoch 3: Loss 2.3621, Genre F1 0.2953, Keyword F1 0.0814
Genre F1: 0.2953 | Keyword F1: 0.0814 | Mean F1: 0.1883

[19/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.22it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.36it/s]


Epoch 1: Loss 2.4759, Genre F1 0.2942, Keyword F1 0.0660


Training: 100%|██████████| 28/28 [00:08<00:00,  3.23it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.38it/s]


Epoch 2: Loss 2.4216, Genre F1 0.2942, Keyword F1 0.0661


Training: 100%|██████████| 28/28 [00:08<00:00,  3.25it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.44it/s]


Epoch 3: Loss 2.3984, Genre F1 0.2942, Keyword F1 0.0668
Genre F1: 0.2942 | Keyword F1: 0.0668 | Mean F1: 0.1805

[20/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.27it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.49it/s]


Epoch 1: Loss 2.5175, Genre F1 0.2942, Keyword F1 0.0717


Training: 100%|██████████| 28/28 [00:08<00:00,  3.27it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.42it/s]


Epoch 2: Loss 2.4367, Genre F1 0.2972, Keyword F1 0.0721


Training: 100%|██████████| 28/28 [00:08<00:00,  3.24it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.21it/s]


Epoch 3: Loss 2.3682, Genre F1 0.3010, Keyword F1 0.0741
Genre F1: 0.3010 | Keyword F1: 0.0741 | Mean F1: 0.1875

[21/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.80it/s]


Epoch 1: Loss 2.4854, Genre F1 0.2942, Keyword F1 0.0662


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.85it/s]


Epoch 2: Loss 2.4214, Genre F1 0.2942, Keyword F1 0.0665


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.78it/s]


Epoch 3: Loss 2.4390, Genre F1 0.2942, Keyword F1 0.0669
Genre F1: 0.2942 | Keyword F1: 0.0669 | Mean F1: 0.1806

[22/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.69it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.02it/s]


Epoch 1: Loss 2.4965, Genre F1 0.2942, Keyword F1 0.0689


Training: 100%|██████████| 28/28 [00:16<00:00,  1.65it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.01it/s]


Epoch 2: Loss 2.4717, Genre F1 0.2942, Keyword F1 0.0743


Training: 100%|██████████| 28/28 [00:16<00:00,  1.69it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.94it/s]


Epoch 3: Loss 2.3919, Genre F1 0.2942, Keyword F1 0.0779
Genre F1: 0.2942 | Keyword F1: 0.0779 | Mean F1: 0.1861

[23/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:17<00:00,  1.64it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.90it/s]


Epoch 1: Loss 2.5616, Genre F1 0.2942, Keyword F1 0.0659


Training: 100%|██████████| 28/28 [00:16<00:00,  1.69it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.18it/s]


Epoch 2: Loss 2.4703, Genre F1 0.2942, Keyword F1 0.0660


Training: 100%|██████████| 28/28 [00:17<00:00,  1.64it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.04it/s]


Epoch 3: Loss 2.3971, Genre F1 0.2945, Keyword F1 0.0662
Genre F1: 0.2945 | Keyword F1: 0.0662 | Mean F1: 0.1803

[24/32] CONFIG: {'batch_size': 16, 'learning_rate': 2e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.35it/s]


Epoch 1: Loss 2.5242, Genre F1 0.2942, Keyword F1 0.0587


Training: 100%|██████████| 28/28 [00:16<00:00,  1.71it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.14it/s]


Epoch 2: Loss 2.4445, Genre F1 0.2942, Keyword F1 0.0613


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.81it/s]


Epoch 3: Loss 2.4160, Genre F1 0.2945, Keyword F1 0.0613
Genre F1: 0.2945 | Keyword F1: 0.0613 | Mean F1: 0.1779

[25/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:09<00:00,  3.06it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 11.03it/s]


Epoch 1: Loss 2.5163, Genre F1 0.2942, Keyword F1 0.0660


Training: 100%|██████████| 28/28 [00:09<00:00,  3.05it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.11it/s]


Epoch 2: Loss 2.4054, Genre F1 0.2942, Keyword F1 0.0668


Training: 100%|██████████| 28/28 [00:08<00:00,  3.30it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.92it/s]


Epoch 3: Loss 2.2882, Genre F1 0.2976, Keyword F1 0.0689
Genre F1: 0.2976 | Keyword F1: 0.0689 | Mean F1: 0.1833

[26/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.22it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.15it/s]


Epoch 1: Loss 2.4797, Genre F1 0.2942, Keyword F1 0.0689


Training: 100%|██████████| 28/28 [00:08<00:00,  3.24it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.84it/s]


Epoch 2: Loss 2.3986, Genre F1 0.3024, Keyword F1 0.0806


Training: 100%|██████████| 28/28 [00:08<00:00,  3.31it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.90it/s]


Epoch 3: Loss 2.2797, Genre F1 0.3031, Keyword F1 0.0837
Genre F1: 0.3031 | Keyword F1: 0.0837 | Mean F1: 0.1934

[27/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.32it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 13.00it/s]


Epoch 1: Loss 2.5133, Genre F1 0.2942, Keyword F1 0.0660


Training: 100%|██████████| 28/28 [00:08<00:00,  3.32it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 13.00it/s]


Epoch 2: Loss 2.3855, Genre F1 0.3051, Keyword F1 0.0666


Training: 100%|██████████| 28/28 [00:08<00:00,  3.30it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 11.42it/s]


Epoch 3: Loss 2.2846, Genre F1 0.3252, Keyword F1 0.0677
Genre F1: 0.3252 | Keyword F1: 0.0677 | Mean F1: 0.1965

[28/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 128, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:08<00:00,  3.29it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.89it/s]


Epoch 1: Loss 2.4919, Genre F1 0.2942, Keyword F1 0.0634


Training: 100%|██████████| 28/28 [00:08<00:00,  3.20it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 10.94it/s]


Epoch 2: Loss 2.4312, Genre F1 0.2980, Keyword F1 0.0697


Training: 100%|██████████| 28/28 [00:08<00:00,  3.30it/s]
Evaluating: 100%|██████████| 7/7 [00:00<00:00, 12.50it/s]


Epoch 3: Loss 2.3411, Genre F1 0.3017, Keyword F1 0.0707
Genre F1: 0.3017 | Keyword F1: 0.0707 | Mean F1: 0.1862

[29/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.69it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.07it/s]


Epoch 1: Loss 2.5028, Genre F1 0.2942, Keyword F1 0.0655


Training: 100%|██████████| 28/28 [00:16<00:00,  1.68it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.10it/s]


Epoch 2: Loss 2.3984, Genre F1 0.2942, Keyword F1 0.0660


Training: 100%|██████████| 28/28 [00:16<00:00,  1.67it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.14it/s]


Epoch 3: Loss 2.3078, Genre F1 0.2948, Keyword F1 0.0671
Genre F1: 0.2948 | Keyword F1: 0.0671 | Mean F1: 0.1810

[30/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.3, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.15it/s]


Epoch 1: Loss 2.5153, Genre F1 0.2942, Keyword F1 0.0582


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.90it/s]


Epoch 2: Loss 2.4275, Genre F1 0.2942, Keyword F1 0.0678


Training: 100%|██████████| 28/28 [00:17<00:00,  1.61it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.82it/s]


Epoch 3: Loss 2.3536, Genre F1 0.2944, Keyword F1 0.0729
Genre F1: 0.2944 | Keyword F1: 0.0729 | Mean F1: 0.1836

[31/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.4}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.80it/s]


Epoch 1: Loss 2.5028, Genre F1 0.2942, Keyword F1 0.0665


Training: 100%|██████████| 28/28 [00:16<00:00,  1.65it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.06it/s]


Epoch 2: Loss 2.4381, Genre F1 0.2968, Keyword F1 0.0669


Training: 100%|██████████| 28/28 [00:17<00:00,  1.64it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.92it/s]


Epoch 3: Loss 2.3303, Genre F1 0.3048, Keyword F1 0.0688
Genre F1: 0.3048 | Keyword F1: 0.0688 | Mean F1: 0.1868

[32/32] CONFIG: {'batch_size': 16, 'learning_rate': 3e-05, 'max_length': 256, 'epochs': 3, 'genre_threshold': 0.35, 'keyword_threshold': 0.5}


Training: 100%|██████████| 28/28 [00:16<00:00,  1.68it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.09it/s]


Epoch 1: Loss 2.4939, Genre F1 0.2942, Keyword F1 0.0734


Training: 100%|██████████| 28/28 [00:16<00:00,  1.71it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  6.16it/s]


Epoch 2: Loss 2.4330, Genre F1 0.2964, Keyword F1 0.0732


Training: 100%|██████████| 28/28 [00:16<00:00,  1.66it/s]
Evaluating: 100%|██████████| 7/7 [00:01<00:00,  5.46it/s]


Epoch 3: Loss 2.2992, Genre F1 0.3035, Keyword F1 0.0739
Genre F1: 0.3035 | Keyword F1: 0.0739 | Mean F1: 0.1887


===== TOP 5 CONFIGURATIONS =====
    batch_size  learning_rate  max_length  epochs  genre_threshold  \
11           8        0.00003         128       3             0.35   
13           8        0.00003         256       3             0.30   
14           8        0.00003         256       3             0.35   
15           8        0.00003         256       3             0.35   
10           8        0.00003         128       3             0.35   

    keyword_threshold  genre_f1_micro  genre_f1_macro  keyword_f1_micro  \
11                0.5        0.405993        0.382104          0.107452   
13                0.5        0.347283        0.329544          0.094417   
14                0.4        0.364829        0.346388          0.074578   
15                0.5        0.344313        0.320179          0.092359   
10                0.4        0.361057        0.339436   

In [None]:
def select_optimal_epochs_multitask(
    texts,
    genre_labels_encoded,
    keyword_labels_encoded,
    tokenizer,
    device,
    max_length=256,
    batch_size=16,
    learning_rate=3e-5,
    max_epochs=20,
    patience=3,
    genre_loss_weight=1.0,
    keyword_loss_weight=1.0,
    genre_thresh=0.35,
    keyword_thresh=0.5,
    val_size=0.2
):
    """
    Dobór liczby epok z early stopping dla modelu multi-task (genres + keywords)
    """

    n_genres = genre_labels_encoded.shape[1]   # liczba gatunków

    n_keywords = keyword_labels_encoded.shape[1]  # liczba słów kluczowych
    # Podział na train/val
    X_train, X_val, y_genre_train, y_genre_val, y_keyword_train, y_keyword_val = train_test_split(
        texts, genre_labels_encoded, keyword_labels_encoded,
        test_size=val_size,
        random_state=42,
        shuffle=True
    )
    
    # Dataset i DataLoader
    train_dataset = MovieMultiTaskDataset(
        texts=X_train,
        genre_labels=y_genre_train,
        keyword_labels=y_keyword_train,
        tokenizer=tokenizer,
        max_length=max_length
    )
    val_dataset = MovieMultiTaskDataset(
        texts=X_val,
        genre_labels=y_genre_val,
        keyword_labels=y_keyword_val,
        tokenizer=tokenizer,
        max_length=max_length
    )

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # Model
    model = BERTMultiTaskClassifier(n_genres=n_genres, n_keywords=n_keywords).to(device)

    # Loss i optymalizator
    genre_criterion = torch.nn.BCEWithLogitsLoss()
    keyword_criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)

    best_val_f1 = 0
    best_epoch = 0
    wait = 0

    print("=== Dobór liczby epok z early stopping ===\n")
    
    for epoch in range(1, max_epochs + 1):
        # ---- trening ----
        model.train()
        train_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch}"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            genre_labels_batch = batch['genre_labels'].to(device)
            keyword_labels_batch = batch['keyword_labels'].to(device)

            optimizer.zero_grad()
            genre_logits, keyword_logits = model(input_ids, attention_mask)

            loss = genre_loss_weight * genre_criterion(genre_logits, genre_labels_batch) + \
                   keyword_loss_weight * keyword_criterion(keyword_logits, keyword_labels_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        # ---- walidacja ----
        val_metrics = evaluate(
            model, val_loader, genre_criterion, keyword_criterion,
            device=device, genre_thresh=genre_thresh, keyword_thresh=keyword_thresh
        )

        val_f1_micro = 0.5 * (val_metrics['genre_f1_micro'] + val_metrics['keyword_f1_micro'])

        print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Val F1 Micro={val_f1_micro:.4f}")

        # ---- early stopping ----
        if val_f1_micro > best_val_f1:
            best_val_f1 = val_f1_micro
            best_epoch = epoch
            wait = 0
            torch.save(model.state_dict(), "best_multitask_epoch.pth")
        else:
            wait += 1
            if wait >= patience:
                print(f"\nEarly stopping po {epoch} epokach (najlepsza epoka {best_epoch})")
                break

    print(f"\nOptymalna liczba epok: {best_epoch}, najlepsze F1 Micro: {best_val_f1:.4f}")
    return best_epoch, best_val_f1


In [None]:
best_epoch, best_f1= select_optimal_epochs_multitask(
    texts=texts,
    genres_labels_encoded=genres_labels_encoded,
    keywords_labels_encoded=keywords_labels_encoded,
    tokenizer=tokenizer, 
    device=device,
    max_length=128,
    batch_size=8,
    learning_rate=3e-5,
    max_epochs=20,          # maksymalna liczba epok do sprawdzenia
    patience=3,             # early stopping po 3 epokach bez poprawy
    genre_loss_weight=1.0,
    keyword_loss_weight=1.0,
    genre_thresh=0.35,
    keyword_thresh=0.5,
    val_size=0.2
)


=== Dobór liczby epok z early stopping ===



Training Epoch 1: 100%|██████████| 272/272 [00:52<00:00,  5.20it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.18it/s]


Epoch 1: Train Loss=0.6215, Val F1 Micro=0.2984


Training Epoch 2: 100%|██████████| 272/272 [00:53<00:00,  5.10it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.22it/s]


Epoch 2: Train Loss=0.4379, Val F1 Micro=0.3217


Training Epoch 3: 100%|██████████| 272/272 [00:53<00:00,  5.08it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 21.41it/s]


Epoch 3: Train Loss=0.3678, Val F1 Micro=0.3322


Training Epoch 4: 100%|██████████| 272/272 [00:54<00:00,  5.01it/s]
Evaluating: 100%|██████████| 68/68 [00:02<00:00, 22.78it/s]


Epoch 4: Train Loss=0.3145, Val F1 Micro=0.3347


Training Epoch 5: 100%|██████████| 272/272 [00:54<00:00,  5.02it/s]
Evaluating: 100%|██████████| 68/68 [00:02<00:00, 22.74it/s]


Epoch 5: Train Loss=0.2735, Val F1 Micro=0.3288


Training Epoch 6: 100%|██████████| 272/272 [00:53<00:00,  5.10it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 21.37it/s]


Epoch 6: Train Loss=0.2393, Val F1 Micro=0.3312


Training Epoch 7: 100%|██████████| 272/272 [00:54<00:00,  5.01it/s]
Evaluating: 100%|██████████| 68/68 [01:12<00:00,  1.07s/it]


Epoch 7: Train Loss=0.2120, Val F1 Micro=0.3485


Training Epoch 8: 100%|██████████| 272/272 [00:53<00:00,  5.13it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 21.47it/s]


Epoch 8: Train Loss=0.1915, Val F1 Micro=0.3497


Training Epoch 9: 100%|██████████| 272/272 [00:54<00:00,  5.01it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 21.13it/s]


Epoch 9: Train Loss=0.1753, Val F1 Micro=0.3581


Training Epoch 10: 100%|██████████| 272/272 [00:54<00:00,  4.98it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 20.74it/s]


Epoch 10: Train Loss=0.1621, Val F1 Micro=0.3569


Training Epoch 11: 100%|██████████| 272/272 [00:54<00:00,  4.98it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.10it/s]


Epoch 11: Train Loss=0.1498, Val F1 Micro=0.3606


Training Epoch 12: 100%|██████████| 272/272 [00:54<00:00,  5.00it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.36it/s]


Epoch 12: Train Loss=0.1406, Val F1 Micro=0.3765


Training Epoch 13: 100%|██████████| 272/272 [00:53<00:00,  5.09it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.60it/s]


Epoch 13: Train Loss=0.1325, Val F1 Micro=0.3782


Training Epoch 14: 100%|██████████| 272/272 [00:53<00:00,  5.05it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.18it/s]


Epoch 14: Train Loss=0.1245, Val F1 Micro=0.3730


Training Epoch 15: 100%|██████████| 272/272 [00:53<00:00,  5.11it/s]
Evaluating: 100%|██████████| 68/68 [00:02<00:00, 22.79it/s]


Epoch 15: Train Loss=0.1170, Val F1 Micro=0.3872


Training Epoch 16: 100%|██████████| 272/272 [00:52<00:00,  5.15it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 21.88it/s]


Epoch 16: Train Loss=0.1085, Val F1 Micro=0.3918


Training Epoch 17: 100%|██████████| 272/272 [00:52<00:00,  5.19it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 22.52it/s]


Epoch 17: Train Loss=0.1009, Val F1 Micro=0.4003


Training Epoch 18: 100%|██████████| 272/272 [00:51<00:00,  5.28it/s]
Evaluating: 100%|██████████| 68/68 [00:03<00:00, 21.70it/s]


Epoch 18: Train Loss=0.0925, Val F1 Micro=0.4087


Training Epoch 19: 100%|██████████| 272/272 [00:51<00:00,  5.25it/s]
Evaluating: 100%|██████████| 68/68 [00:02<00:00, 22.91it/s]


Epoch 19: Train Loss=0.0858, Val F1 Micro=0.4049


Training Epoch 20: 100%|██████████| 272/272 [00:52<00:00,  5.20it/s]
Evaluating: 100%|██████████| 68/68 [00:02<00:00, 23.26it/s]

Epoch 20: Train Loss=0.0787, Val F1 Micro=0.4083

Optymalna liczba epok: 18, najlepsze F1 Micro: 0.4087





In [145]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold

def five_fold_cv_multitask_save_config(
    texts,
    genres_labels_encoded,
    keywords_labels_encoded,
    genres_n_classes,
    keywords_n_classes,
    tokenizer,
    device,
    config,
    n_splits=5,
    metrics_csv_path="5fold_multitask_metrics.csv",
    model_save_prefix="multitask_fold"
):
    """
    5-fold CV dla multitask BERT z wywołaniem funkcji train_model_multitask,
    early stopping, zapis modeli i metryk.
    
    config - słownik z parametrami treningu, np.:
        {
            "epochs": 5,
            "batch_size": 8,
            "learning_rate": 3e-5,
            "max_length": 256,
            "genre_loss_weight": 1.0,
            "keyword_loss_weight": 1.0,
            "genre_threshold": 0.35,
            "keyword_threshold": 0.5,
            "patience": 2
        }
    """
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    fold_results = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(texts)):
        print(f"\n=== Fold {fold+1}/{n_splits} ===")

        # ===== Podział danych =====
        X_train = [texts[i] for i in train_idx]
        X_val   = [texts[i] for i in val_idx]
        y_genre_train = genres_labels_encoded[train_idx]
        y_genre_val   = genres_labels_encoded[val_idx]
        y_keyword_train = keywords_labels_encoded[train_idx]
        y_keyword_val   = keywords_labels_encoded[val_idx]

        # ===== Trening foldu przez train_model_multitask =====
        run = train_model_multitask(
            texts=X_train,
            genre_labels_encoded=y_genre_train,
            keyword_labels_encoded=y_keyword_train,
            genres_n_classes=genres_n_classes,
            keywords_n_classes=keywords_n_classes,
            epochs=config.get("epochs", 5),
            batch_size=config.get("batch_size", 8),
            learning_rate=config.get("learning_rate", 3e-5),
            max_length=config.get("max_length", 256),
            val_size=None,  # walidacja fold-specific
            genre_threshold=config.get("genre_threshold", 0.35),
            keyword_threshold=config.get("keyword_threshold", 0.5),
            model_name=f"{model_save_prefix}_fold{fold+1}",
            patience=config.get("patience", 3)
        )

        model = run["model"]
        history = run["history"]

        # ===== Walidacja foldu =====
        val_ds = MovieMultiTaskDataset(
            texts=X_val,
            genre_labels=y_genre_val,
            keyword_labels=y_keyword_val,
            tokenizer=tokenizer,
            max_length=config.get("max_length", 256)
        )
        val_loader = DataLoader(val_ds, batch_size=config.get("batch_size", 8))

        # Loss z pos_weight (dla metryk)
        genre_pos_weight = torch.tensor(
            (len(y_genre_train) - y_genre_train.sum(axis=0)) / np.clip(y_genre_train.sum(axis=0), 1, None),
            dtype=torch.float
        ).to(device)
        keyword_pos_weight = torch.tensor(
            (len(y_keyword_train) - y_keyword_train.sum(axis=0)) / np.clip(y_keyword_train.sum(axis=0), 1, None),
            dtype=torch.float
        ).to(device)

        genre_criterion = torch.nn.BCEWithLogitsLoss(pos_weight=genre_pos_weight)
        keyword_criterion = torch.nn.BCEWithLogitsLoss(pos_weight=keyword_pos_weight)

        val_metrics = evaluate(
            model,
            val_loader,
            genre_criterion,
            keyword_criterion,
            device=device,
            genre_thresh=config.get("genre_threshold", 0.35),
            keyword_thresh=config.get("keyword_threshold", 0.5)
        )

        # Dodaj informacje o foldzie i najlepszej epoce
        val_metrics["fold"] = fold + 1
        val_metrics["best_epoch"] = len(history["train_loss"])
        fold_results.append(val_metrics)

        print(f"Fold {fold+1} - Genre F1 Micro: {val_metrics['genre_f1_micro']:.4f}, "
              f"Keyword F1 Micro: {val_metrics['keyword_f1_micro']:.4f}")

    # ===== Zapis CSV =====
    df_results = pd.DataFrame(fold_results)
    df_results.to_csv(metrics_csv_path, index=False)
    print(f"\nZapisano metryki wszystkich foldów do: {metrics_csv_path}")

    # ===== Średnie po 5 foldach =====
    avg_metrics = {}
    metric_keys = [k for k in fold_results[0].keys() if k not in ["fold", "best_epoch"]]
    for key in metric_keys:
        avg_metrics[key] = np.mean([fold[key] for fold in fold_results])

    print("\n=== Średnie wyniki 5-fold CV ===")
    for key, value in avg_metrics.items():
        print(f"{key}: {value:.4f}")

    return df_results, avg_metrics


In [None]:
best_config = {
    "batch_size": 8,
    "learning_rate": 3e-5,
    "max_length": 256,
    "epochs": 10,
    "patience": 3,
    "genre_loss_weight": 1.0,
    "keyword_loss_weight": 1.0,
    "genre_thresh": 0.35,
    "keyword_thresh": 0.5
}

In [152]:
df_results, avg_metrics = five_fold_cv_multitask_save_config(
    texts=texts,
    genres_labels_encoded=genres_labels_encoded,
    keywords_labels_encoded=keywords_labels_encoded,
    genres_n_classes=genres_n_classes,
    keywords_n_classes=keywords_n_classes,
    tokenizer=tokenizer,
    device=device,
    config=best_config,
    n_splits=5,
    metrics_csv_path="results/cross_val_multitask_metrics.csv",
    model_save_prefix="models/multitask"
)

print("\nŚrednie wyniki po 5-fold CV:")
for key, value in avg_metrics.items():
    print(f"{key}: {value:.4f}")



=== Fold 1/5 ===


Training: 100%|██████████| 204/204 [01:14<00:00,  2.75it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.49it/s]


Epoch 1: Loss=2.3023, Genre F1=0.4382, Keyword F1=0.1236


Training: 100%|██████████| 204/204 [01:12<00:00,  2.80it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.84it/s]


Epoch 2: Loss=1.8425, Genre F1=0.5148, Keyword F1=0.1346


Training: 100%|██████████| 204/204 [01:10<00:00,  2.87it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.68it/s]


Epoch 3: Loss=1.5993, Genre F1=0.5422, Keyword F1=0.1461


Training: 100%|██████████| 204/204 [01:11<00:00,  2.86it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.08it/s]


Epoch 4: Loss=1.4213, Genre F1=0.5610, Keyword F1=0.1572


Training: 100%|██████████| 204/204 [01:10<00:00,  2.90it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.84it/s]


Epoch 5: Loss=1.2930, Genre F1=0.5795, Keyword F1=0.1691


Training: 100%|██████████| 204/204 [01:12<00:00,  2.82it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.35it/s]


Epoch 6: Loss=1.1882, Genre F1=0.5853, Keyword F1=0.1797


Training: 100%|██████████| 204/204 [01:08<00:00,  2.97it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.95it/s]


Epoch 7: Loss=1.1087, Genre F1=0.5921, Keyword F1=0.1867


Training: 100%|██████████| 204/204 [01:08<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.65it/s]


Epoch 8: Loss=1.0463, Genre F1=0.5981, Keyword F1=0.1978


Training: 100%|██████████| 204/204 [01:09<00:00,  2.94it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.94it/s]


Epoch 9: Loss=1.0011, Genre F1=0.6041, Keyword F1=0.1995


Training: 100%|██████████| 204/204 [01:08<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.67it/s]


Epoch 10: Loss=0.9742, Genre F1=0.6053, Keyword F1=0.2021


Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.98it/s]


Fold 1 - Genre F1 Micro: 0.6265, Keyword F1 Micro: 0.2055

=== Fold 2/5 ===


Training: 100%|██████████| 204/204 [01:10<00:00,  2.89it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.43it/s]


Epoch 1: Loss=2.2675, Genre F1=0.4582, Keyword F1=0.1212


Training: 100%|██████████| 204/204 [01:11<00:00,  2.86it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.90it/s]


Epoch 2: Loss=1.8460, Genre F1=0.5290, Keyword F1=0.1270


Training: 100%|██████████| 204/204 [01:07<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.59it/s]


Epoch 3: Loss=1.6094, Genre F1=0.5630, Keyword F1=0.1380


Training: 100%|██████████| 204/204 [01:07<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.61it/s]


Epoch 4: Loss=1.4357, Genre F1=0.5781, Keyword F1=0.1503


Training: 100%|██████████| 204/204 [01:07<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.52it/s]


Epoch 5: Loss=1.3096, Genre F1=0.5966, Keyword F1=0.1639


Training: 100%|██████████| 204/204 [01:08<00:00,  2.96it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.50it/s]


Epoch 6: Loss=1.1981, Genre F1=0.6098, Keyword F1=0.1750


Training: 100%|██████████| 204/204 [01:08<00:00,  2.99it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.34it/s]


Epoch 7: Loss=1.1147, Genre F1=0.6093, Keyword F1=0.1819


Training: 100%|██████████| 204/204 [01:08<00:00,  2.99it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.45it/s]


Epoch 8: Loss=1.0555, Genre F1=0.6201, Keyword F1=0.1918


Training: 100%|██████████| 204/204 [01:09<00:00,  2.94it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.55it/s]


Epoch 9: Loss=1.0126, Genre F1=0.6220, Keyword F1=0.1948


Training: 100%|██████████| 204/204 [01:08<00:00,  2.99it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.49it/s]


Epoch 10: Loss=0.9867, Genre F1=0.6249, Keyword F1=0.1975


Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.52it/s]


Fold 2 - Genre F1 Micro: 0.6139, Keyword F1 Micro: 0.2083

=== Fold 3/5 ===


Training: 100%|██████████| 204/204 [01:08<00:00,  2.98it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.05it/s]


Epoch 1: Loss=2.3098, Genre F1=0.4569, Keyword F1=0.1187


Training: 100%|██████████| 204/204 [01:08<00:00,  2.96it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.48it/s]


Epoch 2: Loss=1.8468, Genre F1=0.5101, Keyword F1=0.1373


Training: 100%|██████████| 204/204 [01:08<00:00,  2.99it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.42it/s]


Epoch 3: Loss=1.5993, Genre F1=0.5661, Keyword F1=0.1538


Training: 100%|██████████| 204/204 [01:10<00:00,  2.91it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.75it/s]


Epoch 4: Loss=1.4246, Genre F1=0.5727, Keyword F1=0.1764


Training: 100%|██████████| 204/204 [01:08<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.39it/s]


Epoch 5: Loss=1.2997, Genre F1=0.5898, Keyword F1=0.1782


Training: 100%|██████████| 204/204 [17:51<00:00,  5.25s/it]   
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.73it/s]


Epoch 6: Loss=1.1923, Genre F1=0.5959, Keyword F1=0.1935


Training: 100%|██████████| 204/204 [01:41<00:00,  2.01it/s]
Evaluating: 100%|██████████| 68/68 [00:07<00:00,  8.97it/s]


Epoch 7: Loss=1.1072, Genre F1=0.6039, Keyword F1=0.2024


Training: 100%|██████████| 204/204 [17:08<00:00,  5.04s/it]   
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.62it/s]


Epoch 8: Loss=1.0506, Genre F1=0.6095, Keyword F1=0.2056


Training: 100%|██████████| 204/204 [07:48<00:00,  2.30s/it]   
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.87it/s]


Epoch 9: Loss=1.0041, Genre F1=0.6149, Keyword F1=0.2202


Training: 100%|██████████| 204/204 [01:07<00:00,  3.00it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.41it/s]


Epoch 10: Loss=0.9825, Genre F1=0.6144, Keyword F1=0.2210


Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.27it/s]


Fold 3 - Genre F1 Micro: 0.6244, Keyword F1 Micro: 0.2040

=== Fold 4/5 ===


Training: 100%|██████████| 204/204 [01:10<00:00,  2.90it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.99it/s]


Epoch 1: Loss=2.3248, Genre F1=0.4519, Keyword F1=0.1168


Training: 100%|██████████| 204/204 [01:10<00:00,  2.88it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.12it/s]


Epoch 2: Loss=1.8673, Genre F1=0.5153, Keyword F1=0.1374


Training: 100%|██████████| 204/204 [01:11<00:00,  2.84it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.55it/s]


Epoch 3: Loss=1.6156, Genre F1=0.5623, Keyword F1=0.1505


Training: 100%|██████████| 204/204 [01:12<00:00,  2.80it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.85it/s]


Epoch 4: Loss=1.4427, Genre F1=0.5965, Keyword F1=0.1598


Training: 100%|██████████| 204/204 [01:12<00:00,  2.81it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.75it/s]


Epoch 5: Loss=1.3065, Genre F1=0.5997, Keyword F1=0.1755


Training: 100%|██████████| 204/204 [01:13<00:00,  2.79it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.96it/s]


Epoch 6: Loss=1.1989, Genre F1=0.6064, Keyword F1=0.1792


Training: 100%|██████████| 204/204 [02:55<00:00,  1.16it/s]
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.58it/s]


Epoch 7: Loss=1.1166, Genre F1=0.6129, Keyword F1=0.1959


Training: 100%|██████████| 204/204 [01:10<00:00,  2.90it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.08it/s]


Epoch 8: Loss=1.0530, Genre F1=0.6241, Keyword F1=0.2029


Training: 100%|██████████| 204/204 [01:10<00:00,  2.88it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.15it/s]


Epoch 9: Loss=1.0069, Genre F1=0.6274, Keyword F1=0.2091


Training: 100%|██████████| 204/204 [01:11<00:00,  2.86it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.92it/s]


Epoch 10: Loss=0.9797, Genre F1=0.6273, Keyword F1=0.2110


Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.04it/s]


Fold 4 - Genre F1 Micro: 0.6287, Keyword F1 Micro: 0.2034

=== Fold 5/5 ===


Training: 100%|██████████| 204/204 [01:12<00:00,  2.83it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.41it/s]


Epoch 1: Loss=2.2730, Genre F1=0.4627, Keyword F1=0.1139


Training: 100%|██████████| 204/204 [01:11<00:00,  2.85it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.06it/s]


Epoch 2: Loss=1.8447, Genre F1=0.5363, Keyword F1=0.1361


Training: 100%|██████████| 204/204 [01:09<00:00,  2.92it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.15it/s]


Epoch 3: Loss=1.6076, Genre F1=0.5747, Keyword F1=0.1527


Training: 100%|██████████| 204/204 [01:08<00:00,  2.96it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 11.22it/s]


Epoch 4: Loss=1.4359, Genre F1=0.5928, Keyword F1=0.1694


Training: 100%|██████████| 204/204 [10:29<00:00,  3.09s/it]   
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.56it/s]


Epoch 5: Loss=1.3033, Genre F1=0.5923, Keyword F1=0.1678


Training: 100%|██████████| 204/204 [16:57<00:00,  4.99s/it]   
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.66it/s]


Epoch 6: Loss=1.1940, Genre F1=0.6085, Keyword F1=0.1843


Training: 100%|██████████| 204/204 [17:05<00:00,  5.03s/it]   
Evaluating: 100%|██████████| 68/68 [00:05<00:00, 11.69it/s]


Epoch 7: Loss=1.1119, Genre F1=0.6235, Keyword F1=0.1956


Training: 100%|██████████| 204/204 [12:14<00:00,  3.60s/it]   
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.73it/s]


Epoch 8: Loss=1.0456, Genre F1=0.6307, Keyword F1=0.2002


Training: 100%|██████████| 204/204 [01:10<00:00,  2.89it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.87it/s]


Epoch 9: Loss=1.0032, Genre F1=0.6319, Keyword F1=0.2025


Training: 100%|██████████| 204/204 [01:11<00:00,  2.85it/s]
Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.92it/s]


Epoch 10: Loss=0.9775, Genre F1=0.6279, Keyword F1=0.2063


Evaluating: 100%|██████████| 68/68 [00:06<00:00, 10.98it/s]


Fold 5 - Genre F1 Micro: 0.6272, Keyword F1 Micro: 0.2118

Zapisano metryki wszystkich foldów do: results/cross_val_multitask_metrics.csv

=== Średnie wyniki 5-fold CV ===
loss: 1.8270
genre_f1_score: 0.6169
genre_f1_micro: 0.6242
genre_f1_macro: 0.5752
genre_precision_micro: 0.4982
genre_recall_micro: 0.8357
genre_hamming_loss: 0.1702
genre_hamming_accuracy: 0.8298
genre_jaccard_samples: 0.4796
keyword_f1_score: 0.2068
keyword_f1_micro: 0.2066
keyword_f1_macro: 0.2081
keyword_precision_micro: 0.1250
keyword_recall_micro: 0.5967
keyword_hamming_loss: 0.1641
keyword_hamming_accuracy: 0.8359
keyword_jaccard_samples: 0.1254

Średnie wyniki po 5-fold CV:
loss: 1.8270
genre_f1_score: 0.6169
genre_f1_micro: 0.6242
genre_f1_macro: 0.5752
genre_precision_micro: 0.4982
genre_recall_micro: 0.8357
genre_hamming_loss: 0.1702
genre_hamming_accuracy: 0.8298
genre_jaccard_samples: 0.4796
keyword_f1_score: 0.2068
keyword_f1_micro: 0.2066
keyword_f1_macro: 0.2081
keyword_precision_micro: 0.1250
keyword