In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv


In [2]:
# -*- coding: utf-8 -*-
"""
Movie Genre Classification with IndoBERT
Environment: Kaggle
"""

!pip install optuna




In [3]:
# BAGIAN PERTAMA - Import dan Konfigurasi
import os
import logging
import datetime
import json
import argparse
import gc
import sys
import codecs
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Union

import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
import torch.nn.functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import re
import psutil
import optuna
from optuna.trial import Trial

# Define Base Path for Kaggle
BASE_PATH = Path('/kaggle/working')
DATASETS_PATH = Path('/kaggle/input/datasets-classificationsynopsis')

# Configuration Constants
class Config:
    # Model Parameters
    MODEL_PARAMS = {
        'EPOCHS': 100,
        'BATCH_SIZE': 10,
        'LEARNING_RATE': 1e-5,
        'MAX_LENGTH': 512,
        'TEST_SIZE': 0.15,
        'WEIGHT_DECAY': 0.05,
        'MIXUP_PROB': 0.5,
        'PATIENCE': 5,
        'SMOOTHING': 0.2
    }

    # Optimization Parameters
    OPTIM_PARAMS = {
        'batch_size': [8, 16, 32],
        'learning_rate': [1e-5, 2e-5, 3e-5],
        'weight_decay': [0.01, 0.02],
        'mixup_prob': [0.2, 0.3],
        'smoothing': [0.1, 0.15]
    }

    # Paths Configuration untuk Kaggle
    BASE_DIR = BASE_PATH
    DATA_PATH = Path('/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv')
    LOG_DIR = BASE_DIR / 'logs'
    BACKUP_DIR = BASE_DIR / 'backups'
    TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    EXPERIMENT_DIR = LOG_DIR / 'experiments' / TIMESTAMP

    # Model and Data Paths
    MODEL_SAVE_DIR = EXPERIMENT_DIR / 'model'
    TOKENIZER_SAVE_DIR = EXPERIMENT_DIR / 'tokenizer'
    METRICS_DIR = EXPERIMENT_DIR / 'metrics'
    PLOTS_DIR = EXPERIMENT_DIR / 'plots'
    CM_DIR = PLOTS_DIR / 'confusion_matrices'

    # Model Files
    MODEL_BEST_ACC = MODEL_SAVE_DIR / "best_accuracy"
    MODEL_BEST_LOSS = MODEL_SAVE_DIR / "best_loss"
    TOKENIZER_BEST_ACC = TOKENIZER_SAVE_DIR / "best_accuracy"
    TOKENIZER_BEST_LOSS = TOKENIZER_SAVE_DIR / "best_loss"
    DATA_PATH = Path('/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv')  # Path langsung ke file CSV

    # Device Configuration
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    SAMPLE_SIZE: Optional[int] = None

    @classmethod
    def create_directories(cls) -> None:
        """Create all necessary directories in Kaggle working directory"""
        directories = [
            cls.LOG_DIR, cls.BACKUP_DIR,
            cls.EXPERIMENT_DIR, cls.MODEL_SAVE_DIR, cls.TOKENIZER_SAVE_DIR,
            cls.METRICS_DIR, cls.PLOTS_DIR, cls.CM_DIR
        ]
        for dir_path in directories:
            dir_path.mkdir(parents=True, exist_ok=True)
            print(f"Created directory: {dir_path}")

    @classmethod
    def setup_logging(cls) -> None:
        """Setup logging configuration untuk Kaggle"""
        log_file = cls.EXPERIMENT_DIR / 'training.log'
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file, encoding='utf-8', mode='a'),
                logging.StreamHandler(sys.stdout)
            ]
        )
        logging.info(f"Log file created at: {log_file}")

# Environment Check Function
def check_environment() -> bool:
    """Verify Kaggle environment and paths"""
    try:
        # Check if datasets directory exists
        if not DATASETS_PATH.exists():
            raise RuntimeError(f"Dataset directory tidak ditemukan di: {DATASETS_PATH}")
        
        # List available files in dataset directory
        print("\nFiles in dataset directory:")
        for file in DATASETS_PATH.glob('*'):
            print(f"- {file.name}")

        # Check if dataset exists
        if not Config.DATA_PATH.exists():
            raise RuntimeError(f"Dataset tidak ditemukan di: {Config.DATA_PATH}")

        # Check GPU availability
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert to GB
            print(f"GPU tersedia: {gpu_name}")
            print(f"GPU Memory: {gpu_memory:.2f} GB")
        else:
            print("WARNING: GPU tidak tersedia, menggunakan CPU")

        return True
    except Exception as e:
        print(f"Error dalam setup environment: {str(e)}")
        return False
class DynamicThresholdOptimizer:
    """Class untuk mengoptimalkan threshold per-class secara dinamis"""
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.thresholds = [0.5] * num_classes  # Initial thresholds
        self.performance_history = {i: [] for i in range(num_classes)}
        self.best_thresholds = [0.5] * num_classes
        self.best_f1_scores = [0.0] * num_classes

    def optimize_thresholds(self, true_labels, predictions, class_names):
        """Optimize thresholds based on F1 score"""
        logging.info("Optimizing classification thresholds...")
        
        for class_idx in range(self.num_classes):
            best_threshold = 0.5
            best_f1 = 0.0
            
            # Test different thresholds
            for threshold in np.arange(0.3, 0.8, 0.05):
                class_preds = (predictions[:, class_idx] > threshold).astype(int)
                f1 = f1_score(true_labels[:, class_idx], class_preds, zero_division=0)
                
                if f1 > best_f1:
                    best_f1 = f1
                    best_threshold = threshold
            
            self.thresholds[class_idx] = best_threshold
            self.performance_history[class_idx].append({
                'threshold': best_threshold,
                'f1_score': best_f1
            })
            
            if best_f1 > self.best_f1_scores[class_idx]:
                self.best_f1_scores[class_idx] = best_f1
                self.best_thresholds[class_idx] = best_threshold
            
            logging.info(f"Class '{class_names[class_idx]}': Optimal threshold = {best_threshold:.3f}, F1 Score = {best_f1:.3f}")

    def apply_thresholds(self, predictions):
        """Apply optimized thresholds to predictions"""
        thresholded_preds = np.zeros_like(predictions)
        for i in range(self.num_classes):
            thresholded_preds[:, i] = (predictions[:, i] > self.thresholds[i]).astype(int)
        return thresholded_preds

    def save_threshold_history(self, save_path, class_names):
        """Save threshold optimization history"""
        history_data = {
            'class_thresholds': {
                class_names[i]: {
                    'current_threshold': self.thresholds[i],
                    'best_threshold': self.best_thresholds[i],
                    'best_f1_score': self.best_f1_scores[i],
                    'history': self.performance_history[i]
                } for i in range(self.num_classes)
            }
        }
        
        with open(save_path / 'threshold_history.json', 'w') as f:
            json.dump(history_data, f, indent=4)

class PerformanceTracker:
    """Class untuk melacak performa per-class selama training"""
    def __init__(self, num_classes, class_names):
        self.num_classes = num_classes
        self.class_names = class_names
        self.metrics_history = {name: {
            'f1_scores': [],
            'precisions': [],
            'recalls': [],
            'accuracies': []
        } for name in class_names}
        self.best_metrics = {name: {
            'f1_score': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'accuracy': 0.0,
            'epoch': 0
        } for name in class_names}

    def update_metrics(self, true_labels, predictions, epoch):
        """Update performance metrics for each class"""
        for i, class_name in enumerate(self.class_names):
            # Calculate metrics
            accuracy = accuracy_score(true_labels[:, i], predictions[:, i])
            precision = precision_score(true_labels[:, i], predictions[:, i], zero_division=0)
            recall = recall_score(true_labels[:, i], predictions[:, i], zero_division=0)
            f1 = f1_score(true_labels[:, i], predictions[:, i], zero_division=0)
            
            # Update history
            self.metrics_history[class_name]['accuracies'].append(accuracy)
            self.metrics_history[class_name]['precisions'].append(precision)
            self.metrics_history[class_name]['recalls'].append(recall)
            self.metrics_history[class_name]['f1_scores'].append(f1)
            
            # Update best metrics if necessary
            if f1 > self.best_metrics[class_name]['f1_score']:
                self.best_metrics[class_name].update({
                    'f1_score': f1,
                    'precision': precision,
                    'recall': recall,
                    'accuracy': accuracy,
                    'epoch': epoch
                })

    def plot_performance_trends(self, save_path):
        """Plot performance trends untuk setiap class"""
        for metric in ['f1_scores', 'precisions', 'recalls', 'accuracies']:
            plt.figure(figsize=(12, 6))
            for class_name in self.class_names:
                plt.plot(
                    self.metrics_history[class_name][metric],
                    label=class_name
                )
            
            plt.title(f'{metric.replace("_", " ").title()} Trends per Class')
            plt.xlabel('Epoch')
            plt.ylabel('Score')
            plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(save_path / f'{metric}_trends.png')
            plt.close()

    def save_performance_history(self, save_path):
        """Save performance history ke file"""
        history_data = {
            'metrics_history': self.metrics_history,
            'best_metrics': self.best_metrics
        }
        
        with open(save_path / 'performance_history.json', 'w') as f:
            json.dump(history_data, f, indent=4)

# Memory Management
class ModelManager:
    """Context manager for model memory management"""
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def __enter__(self):
        return self.model, self.tokenizer

    def __exit__(self, exc_type, exc_val, exc_tb):
        del self.model
        del self.tokenizer
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

# Error Handling
def error_handler(func):
    """Decorator for consistent error handling"""
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            logging.error(f"Error in {func.__name__}: {str(e)}")
            raise
    return wrapper

# Utility Functions
def get_memory_usage() -> float:
    """Get current memory usage of the program"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024  # in MB

def log_memory(step_name: str) -> None:
    """Log memory usage with consistent format"""
    memory = get_memory_usage()
    logging.info(f"Memory usage after {step_name}: {memory:.2f} MB")

def log_system_info() -> None:
    """Log system information including GPU details"""
    logging.info("System Information:")
    logging.info(f"Python Version: {sys.version}")
    logging.info(f"CPU Count: {os.cpu_count()}")
    logging.info(f"Initial Memory Usage: {get_memory_usage():.2f} MB")
    if torch.cuda.is_available():
        logging.info(f"GPU Device: {torch.cuda.get_device_name(0)}")
        logging.info(f"GPU Memory Total: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        logging.info(f"CUDA Version: {torch.version.cuda}")

# Create directories and setup logging
if check_environment():
    Config.create_directories()
    Config.setup_logging()
    log_system_info()
else:
    print("Failed to initialize environment. Please check the setup.")
    sys.exit(1)


Files in dataset directory:
- final_combined_movies_5genres.csv
GPU tersedia: Tesla T4
GPU Memory: 15.83 GB
Created directory: /kaggle/working/logs
Created directory: /kaggle/working/backups
Created directory: /kaggle/working/logs/experiments/20250215_214222
Created directory: /kaggle/working/logs/experiments/20250215_214222/model
Created directory: /kaggle/working/logs/experiments/20250215_214222/tokenizer
Created directory: /kaggle/working/logs/experiments/20250215_214222/metrics
Created directory: /kaggle/working/logs/experiments/20250215_214222/plots
Created directory: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:42:23,036 - INFO - Log file created at: /kaggle/working/logs/experiments/20250215_214222/training.log
2025-02-15 21:42:23,036 - INFO - System Information:
2025-02-15 21:42:23,037 - INFO - Python Version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
2025-02-15 21:42:23,044 - INFO - CPU Count: 4
2025-02-15 21:42:23,046 - INFO

In [4]:
# BAGIAN KEDUA - Dataset dan Data Processing

class MovieDataset(Dataset):
    """Dataset class untuk movie genre classification"""
    def __init__(self, texts: Union[List, np.ndarray],
                 labels: Union[List, np.ndarray],
                 tokenizer,
                 max_length: int = 512):
        # Input validation
        if not isinstance(texts, (list, np.ndarray)):
            raise ValueError("texts must be a list or numpy array")
        if not isinstance(labels, (list, np.ndarray)):
            raise ValueError("labels must be a list or numpy array")
        if len(texts) != len(labels):
            raise ValueError("texts and labels must have the same length")

        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self) -> int:
        return len(self.texts)

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten().long(),
            'attention_mask': encoding['attention_mask'].flatten().long(),
            'labels': torch.FloatTensor(self.labels[idx])
        }

class DataProcessor:
    """Class for handling data preprocessing and loading"""
    @staticmethod
    def clean_text(text: str) -> str:
        """Clean and preprocess text data"""
        if isinstance(text, str):
            text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
            text = re.sub(r'\S+@\S+', '', text)
            text = re.sub(r'[^\w\s]', ' ', text)
            text = re.sub(r'\s+', ' ', text)
            return text.strip().lower()
        return ''

    @staticmethod
    @error_handler
    def load_and_preprocess_data(data_path: Path, sample_size: Optional[int] = None) -> pd.DataFrame:
        """Load and preprocess data with proper encoding handling"""
        if not data_path.exists():
            raise FileNotFoundError(f"Data file not found: {data_path}")

        if sample_size is not None and (not isinstance(sample_size, int) or sample_size <= 0):
            raise ValueError("sample_size must be a positive integer")

        logging.info("Loading and preprocessing data...")
        log_memory("start")
        initial_size = None

        # Try different encodings for Google Drive compatibility
        encodings_to_try = ['utf-8', 'utf-8-sig', 'latin1', 'iso-8859-1', 'cp1252']
        df = None

        for encoding in encodings_to_try:
            try:
                df = pd.read_csv(data_path, encoding=encoding)
                logging.info(f"Successfully loaded data using {encoding} encoding")
                initial_size = len(df)
                break
            except (UnicodeDecodeError, UnicodeError):
                continue

        if df is None:
            raise UnicodeError(f"Failed to read file with any of these encodings: {encodings_to_try}")

        log_memory("data loading")

        # Sample data if requested
        if sample_size:
            if sample_size > initial_size:
                logging.warning(f"Requested sample_size ({sample_size}) is larger than dataset size ({initial_size})")
                sample_size = initial_size
            logging.info(f"Taking sample of {sample_size} from {initial_size} total samples")
            df = df.head(sample_size)
        else:
            logging.info(f"Using full dataset with {initial_size} samples")

        # Log sample data
        logging.info("\nSample data:")
        for i, row in df.head(3).iterrows():
            logging.info(f"\nSample {i+1}:")
            logging.info(f"Synopsis: {row['sinopsis'][:100]}...")
            logging.info(f"Genre: {row['genre']}")

        # Preprocess data
        logging.info("\nPreprocessing text data...")
        tqdm.pandas()
        df['sinopsis'] = df['sinopsis'].progress_apply(DataProcessor.clean_text)
        df['genre'] = df['genre'].str.split(',')
        df = df.dropna(subset=['sinopsis', 'genre'])

        log_memory("preprocessing")
        return df

    @staticmethod
    def prepare_data(df: pd.DataFrame, mlb: MultiLabelBinarizer) -> Tuple:
        """Prepare data for training"""
        genre_labels = mlb.fit_transform(df['genre'])
        return train_test_split(
            df['sinopsis'].values,
            genre_labels,
            test_size=Config.MODEL_PARAMS['TEST_SIZE'],
            random_state=42,
            stratify=genre_labels if len(genre_labels.shape) == 1 else None
        )

    @staticmethod
    def create_weighted_sampler(genre_labels: np.ndarray) -> WeightedRandomSampler:
        """Create weighted sampler for balanced batch sampling"""
        logging.info("Creating weighted sampler for balanced batch sampling...")

        sample_weights = np.zeros(len(genre_labels))
        for i in range(genre_labels.shape[1]):
            sample_weights += genre_labels[:, i] * (1.0 / np.sum(genre_labels[:, i]))

        sample_weights = sample_weights / sample_weights.sum()
        sampler = WeightedRandomSampler(
            weights=sample_weights,
            num_samples=len(sample_weights),
            replacement=True
        )

        logging.info(f"Created sampler with {len(sample_weights)} weights")
        return sampler

    @staticmethod
    def calculate_class_weights(genre_labels: np.ndarray, mlb: MultiLabelBinarizer) -> torch.Tensor:
        """Calculate class weights for handling imbalanced data"""
        class_weights = []
        logging.info("\nCalculating class weights for handling imbalanced data...")

        for i in range(genre_labels.shape[1]):
            genre = mlb.classes_[i]
            positive_samples = np.sum(genre_labels[:, i])
            total_samples = len(genre_labels)

            weights = compute_class_weight(
                class_weight='balanced',
                classes=np.array([0, 1]),
                y=genre_labels[:, i]
            )
            class_weights.append(weights[1])

            logging.info(f"{genre}:")
            logging.info(f"  Positive samples: {positive_samples}")
            logging.info(f"  Negative samples: {total_samples - positive_samples}")
            logging.info(f"  Weight: {weights[1]:.2f}")

        return torch.FloatTensor(class_weights).to(Config.DEVICE)

class ModelSetup:
    """Class for handling model setup and data loaders"""
    @staticmethod
    def setup_model_and_tokenizer(num_labels: int) -> Tuple:
        """Setup model dan tokenizer"""
        logging.info("Setting up model and tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
        model = AutoModelForSequenceClassification.from_pretrained(
            'indobenchmark/indobert-base-p1',
            num_labels=num_labels,
            problem_type="multi_label_classification"
        ).to(Config.DEVICE)
        logging.info("Model and tokenizer setup completed")
        return model, tokenizer

    @staticmethod
    def setup_dataloaders(X_train: np.ndarray,
                         X_test: np.ndarray,
                         y_train: np.ndarray,
                         y_test: np.ndarray,
                         tokenizer,
                         batch_size: int) -> Tuple:
        """Setup data loaders"""
        logging.info("Setting up data loaders...")
        train_dataset = MovieDataset(X_train, y_train, tokenizer)
        val_dataset = MovieDataset(X_test, y_test, tokenizer)
        sampler = DataProcessor.create_weighted_sampler(y_train)

        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            sampler=sampler,
            num_workers=0,  # Set to 0 for Colab compatibility
            pin_memory=True if torch.cuda.is_available() else False
        )

        val_loader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            num_workers=0,  # Set to 0 for Colab compatibility
            pin_memory=True if torch.cuda.is_available() else False
        )

        logging.info(f"Created data loaders with batch size {batch_size}")
        return train_loader, val_loader

In [5]:
# BAGIAN KETIGA - Loss Functions dan Training

class LossFunctions:
    """Class untuk menangani berbagai loss functions"""

    @staticmethod
    def focal_loss(predictions: torch.Tensor,
                  targets: torch.Tensor,
                  gamma: float = 2.0,
                  alpha: float = 0.25) -> torch.Tensor:
        """Calculate focal loss for multi-label classification"""
        ce_loss = F.binary_cross_entropy_with_logits(predictions, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = alpha * (1-pt)**gamma * ce_loss
        return focal_loss.mean()

    @staticmethod
    def label_smoothing_loss(outputs: torch.Tensor,
                           targets: torch.Tensor,
                           smoothing: float) -> torch.Tensor:
        """Calculate loss with label smoothing"""
        log_probs = F.log_softmax(outputs, dim=-1)
        targets = torch.clamp(targets * (1.0 - smoothing), min=smoothing / (targets.size(-1) - 1))
        return torch.mean(torch.sum(-targets * log_probs, dim=-1))

class DataAugmentation:
    """Class untuk menangani augmentasi data"""

    @staticmethod
    def apply_mixup(batch: Dict[str, torch.Tensor], alpha: float = 0.2) -> Dict[str, torch.Tensor]:
        """Apply mixup augmentation to batch"""
        # Move tensors to device
        input_ids = batch['input_ids'].to(Config.DEVICE)
        attention_mask = batch['attention_mask'].to(Config.DEVICE)
        labels = batch['labels'].to(Config.DEVICE)

        lam = np.random.beta(alpha, alpha)
        mixed_input_ids = lam * input_ids + (1 - lam) * input_ids.flip(0)
        mixed_attention_mask = lam * attention_mask + (1 - lam) * attention_mask.flip(0)
        mixed_labels = lam * labels + (1 - lam) * labels.flip(0)

        return {
            'input_ids': mixed_input_ids.long(),
            'attention_mask': mixed_attention_mask.long(),
            'labels': mixed_labels
        }

class Visualization:
    """Class untuk menangani visualisasi"""

    @staticmethod
    def plot_confusion_matrices(predictions: np.ndarray,
                              labels: np.ndarray,
                              classes: List[str]) -> None:
        """Plot detailed confusion matrices for each genre"""
        logging.info("Generating detailed confusion matrices for each genre...")

        # Pastikan input dalam format yang benar
        predictions = np.array(predictions)
        labels = np.array(labels)

        if len(predictions.shape) == 1:
            predictions = predictions.reshape(-1, 1)
        if len(labels.shape) == 1:
            labels = labels.reshape(-1, 1)

        for i, genre in enumerate(classes):
            try:
                genre_preds = predictions[:, i]
                genre_labels = labels[:, i]

                # Calculate confusion matrix
                cm = confusion_matrix(genre_labels, genre_preds)

                # Extract values
                TN, FP = cm[0]
                FN, TP = cm[1]

                # Calculate metrics
                accuracy = (TP + TN) / (TP + TN + FP + FN)
                precision = TP / (TP + FP) if (TP + FP) > 0 else 0
                recall = TP / (TP + FN) if (TP + FN) > 0 else 0
                f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

                # Create plot
                plt.figure(figsize=(12, 8))

                # Main confusion matrix plot
                main_ax = plt.subplot2grid((3, 3), (0, 0), rowspan=2, colspan=2)

                # Plot heatmap
                plot_labels = [f'Non-{genre}', genre]
                sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                          xticklabels=plot_labels,
                          yticklabels=plot_labels,
                          ax=main_ax)

                main_ax.set_title(f'Confusion Matrix - {genre}')
                main_ax.set_ylabel('True Label')
                main_ax.set_xlabel('Predicted Label')

                # Create text box for detailed metrics
                plt.subplot2grid((3, 3), (0, 2), rowspan=3)
                plt.axis('off')

                metrics_text = [
                    f'Detailed Metrics for {genre}:\n',
                    f'\nConfusion Matrix Values:',
                    f'True Negative (TN): {TN}',
                    f'False Positive (FP): {FP}',
                    f'False Negative (FN): {FN}',
                    f'True Positive (TP): {TP}',
                    f'\nPerformance Metrics:',
                    f'Accuracy: {accuracy:.3f}',
                    f'Precision: {precision:.3f}',
                    f'Recall: {recall:.3f}',
                    f'F1 Score: {f1:.3f}',
                    f'\nAdditional Information:',
                    f'Total Samples: {len(genre_labels)}',
                    f'Positive Samples: {np.sum(genre_labels)}',
                    f'Negative Samples: {len(genre_labels) - np.sum(genre_labels)}'
                ]

                plt.text(0, 0.95, '\n'.join(metrics_text),
                        fontsize=10,
                        verticalalignment='top',
                        bbox=dict(boxstyle='round,pad=1', facecolor='white', alpha=0.8))

                # Add interpretation text
                interpretation_ax = plt.subplot2grid((3, 3), (2, 0), colspan=2)
                interpretation_ax.axis('off')

                interpretation_text = [
                    'Matrix Interpretation:',
                    f'• Model correctly identified {TN} non-{genre} movies (True Negatives)',
                    f'• Model correctly identified {TP} {genre} movies (True Positives)',
                    f'• Model incorrectly classified {FP} non-{genre} movies as {genre} (False Positives)',
                    f'• Model failed to identify {FN} {genre} movies (False Negatives)'
                ]

                interpretation_ax.text(0, 0.5, '\n'.join(interpretation_text),
                                    fontsize=9,
                                    verticalalignment='center',
                                    bbox=dict(boxstyle='round,pad=1', facecolor='lightyellow', alpha=0.3))

                plt.tight_layout()
                plot_path = Config.CM_DIR / f'confusion_matrix_{genre}.png'
                plt.savefig(plot_path, dpi=300, bbox_inches='tight')
                plt.close()

            except Exception as e:
                logging.error(f"Error plotting confusion matrix for genre {genre}: {str(e)}")
                continue

        logging.info(f"Confusion matrices saved in: {Config.CM_DIR}")

    @staticmethod
    def plot_training_history(history_data: Dict) -> None:
        """Plot and save training metrics"""
        plt.figure(figsize=(15, 5))

        # Plot Loss
        plt.subplot(1, 3, 1)
        plt.plot(history_data['epochs'], history_data['training_loss'],
                label='Training Loss', marker='o')
        plt.plot(history_data['epochs'], history_data['validation_loss'],
                label='Validation Loss', marker='o')
        plt.title('Training History - Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)

        # Plot Accuracy
        plt.subplot(1, 3, 2)
        plt.plot(history_data['epochs'], history_data['accuracy'],
                label='Accuracy', marker='o', color='green')
        plt.title('Training History - Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.grid(True)

        # Plot Loss Difference
        plt.subplot(1, 3, 3)
        loss_diff = np.array(history_data['training_loss']) - np.array(history_data['validation_loss'])
        plt.plot(history_data['epochs'], loss_diff,
                label='Loss Difference', marker='o', color='red')
        plt.title('Learning Curve (Train-Val Loss)')
        plt.xlabel('Epoch')
        plt.ylabel('Difference')
        plt.legend()
        plt.grid(True)

        plt.tight_layout()
        plot_path = Config.PLOTS_DIR / 'training_history.png'
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        plt.close()
        logging.info(f"Saved training history plots to {plot_path}")

class ModelEvaluator:
    """Class untuk evaluasi model"""

    @staticmethod
    @error_handler
    def evaluate_model(model: torch.nn.Module,
                      val_loader: DataLoader,
                      mlb: MultiLabelBinarizer,
                      threshold_optimizer: DynamicThresholdOptimizer = None,
                      performance_tracker: PerformanceTracker = None,
                      epoch: int = None) -> Dict:
        """Evaluate model performance with dynamic thresholding"""
        model.eval()
        all_preds = []
        all_labels = []
        raw_predictions = []

        logging.info("Starting model evaluation...")
        log_memory("evaluation start")

        try:
            with torch.no_grad(), tqdm(val_loader, desc="Evaluating") as pbar:
                for batch in pbar:
                    input_ids = batch['input_ids'].to(Config.DEVICE)
                    attention_mask = batch['attention_mask'].to(Config.DEVICE)
                    labels = batch['labels'].cpu().numpy()

                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    logits = outputs.logits
                    probs = torch.sigmoid(logits).cpu().numpy()
                    
                    raw_predictions.extend(probs)
                    all_labels.extend(labels)

            raw_predictions = np.array(raw_predictions)
            all_labels = np.array(all_labels)

            # Apply dynamic thresholding if available
            if threshold_optimizer is not None:
                threshold_optimizer.optimize_thresholds(all_labels, raw_predictions, mlb.classes_)
                all_preds = threshold_optimizer.apply_thresholds(raw_predictions)
            else:
                all_preds = (raw_predictions > 0.5).astype(int)

            # Update performance tracker if available
            if performance_tracker is not None and epoch is not None:
                performance_tracker.update_metrics(all_labels, all_preds, epoch)

            # Calculate all metrics
            correct_predictions = np.sum(all_preds == all_labels)
            total_predictions = all_labels.size
            accuracy = correct_predictions / total_predictions

            # Calculate per-genre metrics
            genre_metrics = ModelEvaluator._calculate_genre_metrics(
                all_preds, all_labels, mlb.classes_
            )

            # Calculate macro metrics
            macro_metrics = ModelEvaluator._calculate_macro_metrics(genre_metrics)

            # Save metrics
            evaluation_metrics = {
                'overall': macro_metrics,
                'per_genre': genre_metrics
            }

            metrics_file = Config.METRICS_DIR / 'evaluation_metrics.json'
            with open(metrics_file, 'w', encoding='utf-8') as f:
                json.dump(evaluation_metrics, f, indent=4, ensure_ascii=False)

            # Plot confusion matrices
            Visualization.plot_confusion_matrices(all_preds, all_labels, mlb.classes_)

            log_memory("evaluation end")

            return {
                'accuracy': float(accuracy),
                'macro_f1': float(macro_metrics['macro_f1']),
                'genre_metrics': genre_metrics,
                'raw_predictions': raw_predictions,
                'true_labels': all_labels
            }

        except Exception as e:
            logging.error(f"Error during model evaluation: {str(e)}")
            raise

    @staticmethod
    def _calculate_macro_metrics(genre_metrics: Dict) -> Dict:
        """Calculate macro-averaged metrics"""
        return {
            'accuracy': np.mean([metrics['accuracy'] for metrics in genre_metrics.values()]),
            'macro_f1': np.mean([metrics['f1_score'] for metrics in genre_metrics.values()]),
            'macro_precision': np.mean([metrics['precision'] for metrics in genre_metrics.values()]),
            'macro_recall': np.mean([metrics['recall'] for metrics in genre_metrics.values()])
        }

    @staticmethod
    def _calculate_genre_metrics(predictions: np.ndarray,
                               labels: np.ndarray,
                               classes: List[str]) -> Dict:
        """Calculate metrics for each genre"""
        genre_metrics = {}
        logging.info("\nPer-genre Performance Metrics:")

        for i, genre in enumerate(classes):
            genre_preds = predictions[:, i]
            genre_labels = labels[:, i]

            metrics = {
                'accuracy': float(np.mean(genre_preds == genre_labels)),
                'f1_score': float(f1_score(genre_labels, genre_preds, zero_division=0)),
                'precision': float(precision_score(genre_labels, genre_preds, zero_division=0)),
                'recall': float(recall_score(genre_labels, genre_preds, zero_division=0))
            }

            genre_metrics[genre] = metrics
            logging.info(f"\nMetrics for {genre}:")
            for metric_name, value in metrics.items():
                logging.info(f"{metric_name.capitalize()}: {value:.4f}")

        return genre_metrics

In [6]:
# BAGIAN KEEMPAT - Training dan Hyperparameter Optimization

class ModelTrainer:
    @staticmethod
    @error_handler
    def train_model(sample_size: Optional[int] = None) -> Tuple:
        Config.SAMPLE_SIZE = sample_size
        logging.info("Starting model training")
        log_memory("training start")

        try:
            # Load dan preprocess data
            df = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, Config.SAMPLE_SIZE)
            logging.info(f"\nDataset statistics:")
            logging.info(f"Total samples after preprocessing: {len(df)}")

            # Prepare data
            mlb = MultiLabelBinarizer()
            X_train, X_test, y_train, y_test = DataProcessor.prepare_data(df, mlb)

            # Initialize threshold optimizer and performance tracker
            threshold_optimizer = DynamicThresholdOptimizer(len(mlb.classes_))
            performance_tracker = PerformanceTracker(len(mlb.classes_), mlb.classes_)

            # Log genre distribution
            genre_labels = mlb.fit_transform(df['genre'])
            genre_counts = genre_labels.sum(axis=0)
            for genre, count in zip(mlb.classes_, genre_counts):
                logging.info(f"Genre '{genre}': {count} samples")

            logging.info(f"\nTraining set size: {len(X_train)}")
            logging.info(f"Testing set size: {len(X_test)}")

            # Setup model dan data loaders
            with ModelManager(*ModelSetup.setup_model_and_tokenizer(len(mlb.classes_))) as (model, tokenizer):
                train_loader, val_loader = ModelSetup.setup_dataloaders(
                    X_train, X_test, y_train, y_test,
                    tokenizer, Config.MODEL_PARAMS['BATCH_SIZE']
                )

                # Training loop
                best_val_loss = float('inf')
                best_accuracy = 0.0
                patience_counter = 0
                training_losses = []
                validation_losses = []
                accuracies = []

                optimizer = torch.optim.AdamW(
                    model.parameters(),
                    lr=Config.MODEL_PARAMS['LEARNING_RATE'],
                    weight_decay=Config.MODEL_PARAMS['WEIGHT_DECAY']
                )

                scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, mode='min', factor=0.5, patience=2, verbose=True
                )

                epochs_range = tqdm(range(Config.MODEL_PARAMS['EPOCHS']), 
                                  desc="Training Progress",
                                  position=0, leave=True)
                
                for epoch in epochs_range:
                    try:
                        # Training phase
                        model.train()
                        epoch_metrics = ModelTrainer._train_epoch(
                            model, train_loader, optimizer,
                            epoch + 1
                        )
                        training_losses.append(epoch_metrics['train_loss'])

                        # Evaluation phase with dynamic thresholding
                        validation_metrics = ModelEvaluator.evaluate_model(
                            model, val_loader, mlb,
                            threshold_optimizer=threshold_optimizer,
                            performance_tracker=performance_tracker,
                            epoch=epoch
                        )
                        
                        current_accuracy = validation_metrics['accuracy']
                        accuracies.append(current_accuracy)

                        # Validation loss calculation
                        avg_val_loss = ModelTrainer._calculate_validation_loss(
                            model, val_loader
                        )
                        validation_losses.append(avg_val_loss)

                        # Update progress bar
                        epochs_range.set_postfix({
                            'Train Loss': f"{epoch_metrics['train_loss']:.4f}",
                            'Val Loss': f"{avg_val_loss:.4f}",
                            'Accuracy': f"{current_accuracy:.4f}"
                        })

                        # Model improvement check
                        model_improved = ModelTrainer._check_model_improvement(
                            model, tokenizer, current_accuracy, avg_val_loss,
                            best_accuracy, best_val_loss
                        )

                        if model_improved:
                            best_accuracy = max(best_accuracy, current_accuracy)
                            best_val_loss = min(best_val_loss, avg_val_loss)
                            patience_counter = 0
                        else:
                            patience_counter += 1

                        # Early stopping check
                        if patience_counter >= Config.MODEL_PARAMS['PATIENCE']:
                            logging.info(f"\nEarly stopping triggered after {epoch + 1} epochs")
                            break

                        scheduler.step(avg_val_loss)
                        logging.info(f"Learning rate: {optimizer.param_groups[0]['lr']}")

                        # Clear GPU cache periodically
                        if torch.cuda.is_available() and (epoch + 1) % 5 == 0:
                            torch.cuda.empty_cache()
                            gc.collect()

                    except Exception as e:
                        logging.error(f"Error during epoch {epoch + 1}: {str(e)}")
                        raise

                # Save performance history and plots
                performance_tracker.plot_performance_trends(Config.PLOTS_DIR)
                performance_tracker.save_performance_history(Config.METRICS_DIR)
                threshold_optimizer.save_threshold_history(Config.METRICS_DIR, mlb.classes_)

                # Save training history
                ModelTrainer._save_training_history(
                    training_losses, validation_losses, accuracies,
                    best_accuracy, best_val_loss, df, mlb,
                    threshold_optimizer, performance_tracker
                )

                return model, tokenizer, mlb, threshold_optimizer

        except Exception as e:
            logging.error(f"Error in training: {str(e)}")
            raise
        finally:
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

    @staticmethod
    def _save_training_history(training_losses: List[float],
                             validation_losses: List[float],
                             accuracies: List[float],
                             best_accuracy: float,
                             best_val_loss: float,
                             df: pd.DataFrame,
                             mlb: MultiLabelBinarizer,
                             threshold_optimizer: DynamicThresholdOptimizer,
                             performance_tracker: PerformanceTracker) -> None:
        """Save training history and plot results with threshold and performance info"""
        try:
            # Initialize genre_labels
            genre_labels = mlb.fit_transform(df['genre'])
            
            history_data = {
                'model_info': {
                    'classes': mlb.classes_.tolist(),
                    'total_samples': len(df),
                    'genre_distribution': {
                        genre: int(count) for genre, count in zip(mlb.classes_, genre_labels.sum(axis=0))
                    }
                },
                'training_config': {
                    'batch_size': Config.MODEL_PARAMS['BATCH_SIZE'],
                    'learning_rate': Config.MODEL_PARAMS['LEARNING_RATE'],
                    'max_length': Config.MODEL_PARAMS['MAX_LENGTH'],
                    'weight_decay': Config.MODEL_PARAMS['WEIGHT_DECAY'],
                    'early_stopping': Config.MODEL_PARAMS['PATIENCE'],
                    'mixup_prob': Config.MODEL_PARAMS['MIXUP_PROB'],
                    'train_split': 1-Config.MODEL_PARAMS['TEST_SIZE'],
                    'test_split': Config.MODEL_PARAMS['TEST_SIZE']
                },
                'training_history': {
                    'epochs': list(range(1, len(training_losses) + 1)),
                    'training_loss': [float(loss) for loss in training_losses],
                    'validation_loss': [float(loss) for loss in validation_losses],
                    'accuracy': [float(acc) for acc in accuracies],
                    'best_accuracy': float(best_accuracy),
                    'best_val_loss': float(best_val_loss)
                },
                'thresholding_info': {
                    'final_thresholds': {
                        mlb.classes_[i]: thresh for i, thresh in enumerate(threshold_optimizer.thresholds)
                    },
                    'best_thresholds': {
                        mlb.classes_[i]: thresh for i, thresh in enumerate(threshold_optimizer.best_thresholds)
                    },
                    'best_f1_scores': {
                        mlb.classes_[i]: score for i, score in enumerate(threshold_optimizer.best_f1_scores)
                    }
                },
                'per_class_performance': performance_tracker.best_metrics
            }
    
            # Save history to JSON
            history_file = Config.METRICS_DIR / 'training_history.json'
            with open(history_file, 'w', encoding='utf-8') as f:
                json.dump(history_data, f, indent=4, ensure_ascii=False)
    
            # Plot training history
            Visualization.plot_training_history(history_data['training_history'])
            logging.info("Training history saved successfully")
    
        except Exception as e:
            logging.error(f"Error saving training history: {str(e)}")
            raise
    @staticmethod
    def _train_epoch(model: torch.nn.Module,
                    train_loader: DataLoader,
                    optimizer: torch.optim.Optimizer,
                    epoch: int) -> Dict:
        """Train model for one epoch"""
        total_loss = 0
        steps = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}",
                          position=1, leave=False)

        for batch in progress_bar:
            try:
                optimizer.zero_grad()

                if np.random.random() < Config.MODEL_PARAMS['MIXUP_PROB']:
                    batch = DataAugmentation.apply_mixup(batch)

                input_ids = batch['input_ids'].to(Config.DEVICE)
                attention_mask = batch['attention_mask'].to(Config.DEVICE)
                labels = batch['labels'].to(Config.DEVICE)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = LossFunctions.label_smoothing_loss(
                    outputs.logits, labels, Config.MODEL_PARAMS['SMOOTHING']
                )

                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

                total_loss += loss.item()
                steps += 1
                progress_bar.set_postfix({'training_loss': f'{loss.item():.4f}'})

            except RuntimeError as e:
                if "out of memory" in str(e):
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                    logging.error("GPU out of memory during training. Try reducing batch size.")
                raise

        return {'train_loss': total_loss / steps}

    @staticmethod
    def _calculate_validation_loss(model: torch.nn.Module,
                                 val_loader: DataLoader) -> float:
        """Calculate validation loss"""
        model.eval()
        total_loss = 0
        steps = 0

        with torch.no_grad():
            for batch in val_loader:
                try:
                    input_ids = batch['input_ids'].to(Config.DEVICE)
                    attention_mask = batch['attention_mask'].to(Config.DEVICE)
                    labels = batch['labels'].to(Config.DEVICE)

                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    loss = LossFunctions.focal_loss(outputs.logits, labels)
                    total_loss += loss.item()
                    steps += 1

                except RuntimeError as e:
                    if "out of memory" in str(e):
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()
                        logging.error("GPU out of memory during validation. Try reducing batch size.")
                    raise

        return total_loss / steps

    @staticmethod
    def _check_model_improvement(model: torch.nn.Module,
                               tokenizer,
                               current_accuracy: float,
                               current_loss: float,
                               best_accuracy: float,
                               best_loss: float) -> bool:
        """Check if model improved and save if necessary"""
        improved = False

        try:
            if current_accuracy > best_accuracy:
                logging.info(f"New best accuracy: {current_accuracy:.4f}")
                model.save_pretrained(str(Config.MODEL_BEST_ACC))  # Convert to string for Colab
                tokenizer.save_pretrained(str(Config.TOKENIZER_BEST_ACC))
                improved = True

            if current_loss < best_loss:
                logging.info(f"New best loss: {current_loss:.4f}")
                model.save_pretrained(str(Config.MODEL_BEST_LOSS))
                tokenizer.save_pretrained(str(Config.TOKENIZER_BEST_LOSS))
                improved = True

        except Exception as e:
            logging.error(f"Error saving model: {str(e)}")
            raise

        return improved


class HyperparameterOptimizer:
    @staticmethod
    @error_handler
    def objective(trial: Trial, df: pd.DataFrame, mlb: MultiLabelBinarizer) -> float:
        """Objective function untuk Optuna optimization"""
        try:
            # Get trial parameters
            params = HyperparameterOptimizer._get_trial_parameters(trial)

            # Prepare data
            X_train, X_test, y_train, y_test = DataProcessor.prepare_data(df, mlb)

            # Setup model dan data loaders
            with ModelManager(*ModelSetup.setup_model_and_tokenizer(len(mlb.classes_))) as (model, tokenizer):
                train_loader, val_loader = ModelSetup.setup_dataloaders(
                    X_train, X_test, y_train, y_test,
                    tokenizer, params['batch_size']
                )

                # Training loop singkat untuk optimasi
                best_val_metrics = HyperparameterOptimizer._train_trial(
                    trial, model, train_loader, val_loader, mlb, params
                )

                # Clear GPU cache after each trial
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    gc.collect()

            return best_val_metrics['macro_f1']

        except Exception as e:
            logging.error(f"Error in optimization objective: {str(e)}")
            raise

    @staticmethod
    def _get_trial_parameters(trial: Trial) -> Dict:
        """Get parameters for trial"""
        params = {}
        try:
            # Batch size dari list nilai diskrit
            params['batch_size'] = trial.suggest_categorical('batch_size',
                Config.OPTIM_PARAMS['batch_size'])

            # Learning rate dari list nilai diskrit
            params['learning_rate'] = trial.suggest_categorical('learning_rate',
                Config.OPTIM_PARAMS['learning_rate'])

            # Weight decay dari list nilai diskrit
            params['weight_decay'] = trial.suggest_categorical('weight_decay',
                Config.OPTIM_PARAMS['weight_decay'])

            # Mixup probability dari list nilai diskrit
            params['mixup_prob'] = trial.suggest_categorical('mixup_prob',
                Config.OPTIM_PARAMS['mixup_prob'])

            # Smoothing dari list nilai diskrit
            params['smoothing'] = trial.suggest_categorical('smoothing',
                Config.OPTIM_PARAMS['smoothing'])

            logging.info(f"Trial parameter set: {params}")

        except Exception as e:
            logging.error(f"Error getting trial parameters: {str(e)}")
            raise
        return params

    @staticmethod
    def _train_trial(trial: Trial,
                    model: torch.nn.Module,
                    train_loader: DataLoader,
                    val_loader: DataLoader,
                    mlb: MultiLabelBinarizer,
                    params: Dict) -> Dict:
        """Train model for one trial"""
        try:
            optimizer = torch.optim.AdamW(
                model.parameters(),
                lr=params['learning_rate'],
                weight_decay=params['weight_decay']
            )

            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', factor=0.5, patience=2, verbose=True
            )

            best_val_metrics = None

            for epoch in range(3):  # Reduced epochs for optimization
                # Training
                model.train()
                total_loss = 0
                steps = 0

                progress_bar = tqdm(train_loader,
                                  desc=f"Epoch {epoch+1}/3",
                                  position=0,
                                  leave=False)

                for batch in progress_bar:
                    try:
                        optimizer.zero_grad()

                        if np.random.random() < params['mixup_prob']:
                            batch = DataAugmentation.apply_mixup(batch)

                        input_ids = batch['input_ids'].to(Config.DEVICE)
                        attention_mask = batch['attention_mask'].to(Config.DEVICE)
                        labels = batch['labels'].to(Config.DEVICE)

                        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                        loss = LossFunctions.label_smoothing_loss(
                            outputs.logits, labels, params['smoothing']
                        )

                        loss.backward()
                        optimizer.step()

                        total_loss += loss.item()
                        steps += 1

                        # Update progress bar
                        progress_bar.set_postfix({
                            'loss': f'{loss.item():.4f}',
                            'avg_loss': f'{(total_loss/steps):.4f}'
                        })

                    except RuntimeError as e:
                        if "out of memory" in str(e):
                            if torch.cuda.is_available():
                                torch.cuda.empty_cache()
                            logging.error("GPU OOM in trial. Trying to recover...")
                            continue
                        raise

                avg_loss = total_loss / steps

                # Evaluation
                metrics = ModelEvaluator.evaluate_model(model, val_loader, mlb)
                current_f1 = metrics['macro_f1']

                logging.info(f"Trial {trial.number}, Epoch {epoch+1}: "
                           f"Loss = {avg_loss:.4f}, F1 = {current_f1:.4f}")

                if best_val_metrics is None or current_f1 > best_val_metrics['macro_f1']:
                    best_val_metrics = metrics

                scheduler.step(metrics['macro_f1'])

                # Report intermediate value
                trial.report(metrics['macro_f1'], epoch)

                # Handle pruning based on the intermediate value
                if trial.should_prune():
                    raise optuna.TrialPruned()

                # Clear GPU cache
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    gc.collect()

            return best_val_metrics

        except Exception as e:
            logging.error(f"Error in trial training: {str(e)}")
            raise

    @staticmethod
    def run_optimization(df: pd.DataFrame,
                        mlb: MultiLabelBinarizer,
                        n_trials: int = 30) -> Dict:

        try:
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(seed=42),
                pruner=optuna.pruners.MedianPruner()
            )

            objective_func = lambda trial: HyperparameterOptimizer.objective(trial, df, mlb)

            logging.info("Starting hyperparameter optimization...")
            study.optimize(objective_func, n_trials=n_trials,
                         callbacks=[lambda study, trial: gc.collect()])

            # Log results
            logging.info("\nHyperparameter Optimization Results:")
            logging.info(f"Best trial number: {study.best_trial.number}")
            logging.info(f"Best F1-score: {study.best_trial.value:.4f}")
            logging.info("\nBest hyperparameters:")
            for param, value in study.best_trial.params.items():
                logging.info(f"{param}: {value}")

            # Save study results
            results_file = Config.METRICS_DIR / 'optuna_results.json'
            results = {
                'best_trial': {
                    'number': study.best_trial.number,
                    'value': study.best_trial.value,
                    'params': study.best_trial.params
                },
                'all_trials': [
                    {
                        'number': trial.number,
                        'value': trial.value,
                        'params': trial.params
                    }
                    for trial in study.trials if trial.value is not None
                ]
            }

            with open(results_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, indent=4, ensure_ascii=False)

            # Save visualizations
            try:
                # Optimization history plot
                fig1 = optuna.visualization.plot_optimization_history(study)
                fig1.write_image(str(Config.PLOTS_DIR / "optuna_optimization_history.png"))

                # Parameter importance plot
                fig2 = optuna.visualization.plot_param_importances(study)
                fig2.write_image(str(Config.PLOTS_DIR / "optuna_param_importances.png"))

                # Parameter relationships plot
                fig3 = optuna.visualization.plot_parallel_coordinate(study)
                fig3.write_image(str(Config.PLOTS_DIR / "optuna_param_relationships.png"))

            except Exception as e:
                logging.warning(f"Could not create optimization plots: {str(e)}")

            return study.best_trial.params

        except Exception as e:
            logging.error(f"Error during optimization: {str(e)}")
            raise
        finally:
            # Clean up
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

# Fungsi get_args untuk Colab
class Args:
    def __init__(self):
        # Training parameters
        self.sample_size = None  # Number of samples to use (default: use all data)
        self.epochs = Config.MODEL_PARAMS['EPOCHS']
        self.batch_size = Config.MODEL_PARAMS['BATCH_SIZE']
        self.learning_rate = Config.MODEL_PARAMS['LEARNING_RATE']
        self.max_length = Config.MODEL_PARAMS['MAX_LENGTH']

        # Model configuration
        self.test_size = Config.MODEL_PARAMS['TEST_SIZE']
        self.weight_decay = Config.MODEL_PARAMS['WEIGHT_DECAY']
        self.mixup_prob = Config.MODEL_PARAMS['MIXUP_PROB']
        self.patience = Config.MODEL_PARAMS['PATIENCE']

        # System configuration
        self.output_dir = None
        self.no_cuda = False
        self.seed = 42

        # Label Smoothing
        self.smoothing = Config.MODEL_PARAMS['SMOOTHING']

        # Optuna specific
        self.n_trials = 20

# Modifikasi fungsi get_args
def get_args():
    return Args()

def main(args: Args) -> None:
    """Main function"""
    try:
        # Check environment first
        if not check_environment():
            raise RuntimeError("Environment check failed!")

        # Update configuration
        Config.MODEL_PARAMS.update({
            'EPOCHS': args.epochs,
            'BATCH_SIZE': args.batch_size,
            'LEARNING_RATE': args.learning_rate,
            'MAX_LENGTH': args.max_length,
            'TEST_SIZE': args.test_size,
            'WEIGHT_DECAY': args.weight_decay,
            'MIXUP_PROB': args.mixup_prob,
            'PATIENCE': args.patience,
            'SMOOTHING': args.smoothing
        })

        if args.no_cuda:
            Config.DEVICE = torch.device('cpu')
            logging.info("CUDA disabled by user")

        # Initialize logging dan experiment info
        log_system_info()

        logging.info("Starting movie genre classification with hyperparameter optimization")
        logging.info(f"Using device: {Config.DEVICE}")

        # Log initial configuration
        logging.info("\nInitial Configuration:")
        logging.info(f"Sample Size: {Config.SAMPLE_SIZE if Config.SAMPLE_SIZE else 'Full Dataset'}")
        for param, value in Config.MODEL_PARAMS.items():
            logging.info(f"{param}: {value}")

        # Load and preprocess data
        logging.info("\nLoading and preprocessing data...")
        df = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, Config.SAMPLE_SIZE)
        mlb = MultiLabelBinarizer()

        # Run hyperparameter optimization
        logging.info("\nStarting hyperparameter optimization...")
        best_params = HyperparameterOptimizer.run_optimization(df, mlb, args.n_trials)

        # Update configuration with best parameters
        logging.info("\nBest Hyperparameters found:")
        for param, value in best_params.items():
            logging.info(f"{param}: {value}")
            if param in Config.MODEL_PARAMS:
                Config.MODEL_PARAMS[param] = value

        # Train final model with best parameters
        logging.info("\nTraining final model with optimized parameters...")
        model, tokenizer, mlb, threshold_optimizer = ModelTrainer.train_model(Config.SAMPLE_SIZE)

        # Test on a sample
        logging.info("\nTesting model on a sample...")
        df_sample = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, sample_size=1)
        sample_text = df_sample['sinopsis'].iloc[0]

        model.eval()
        inputs = tokenizer(
            sample_text,
            return_tensors='pt',
            max_length=Config.MODEL_PARAMS['MAX_LENGTH'],
            padding='max_length',
            truncation=True
        )

        input_ids = inputs['input_ids'].to(Config.DEVICE)
        attention_mask = inputs['attention_mask'].to(Config.DEVICE)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            probs = torch.sigmoid(outputs.logits)
            
            # Gunakan threshold_optimizer untuk prediksi
            raw_predictions = probs.cpu().numpy()
            thresholded_preds = threshold_optimizer.apply_thresholds(raw_predictions)

        # Buat prediksi dengan threshold yang dioptimalkan
        predictions = []
        for idx, pred in enumerate(thresholded_preds[0]):
            if pred > 0:  # Karena sudah di-threshold
                predictions.append({
                    'genre': mlb.classes_[idx],
                    'probability': float(probs[0][idx].item()),
                    'threshold_used': threshold_optimizer.thresholds[idx]
                })

        predictions.sort(key=lambda x: x['probability'], reverse=True)

        # Log prediction results dengan informasi threshold
        logging.info("\nSample prediction results:")
        logging.info(f"Sample text: {sample_text[:100]}...")
        for pred in predictions:
            logging.info(
                f"Genre: {pred['genre']}, "
                f"Probability: {pred['probability']:.4f}, "
                f"Threshold Used: {pred['threshold_used']:.3f}"
            )

        # Save final configuration
        final_config = {
            'hyperparameters': best_params,
            'model_info': {
                'num_classes': len(mlb.classes_),
                'classes': mlb.classes_.tolist()
            },
            'training_info': {
                'device': str(Config.DEVICE),
                'final_sample_size': len(df),
                'optimization_trials': args.n_trials
            },
            'threshold_info': {
                'final_thresholds': {
                    class_name: float(thresh) 
                    for class_name, thresh in zip(mlb.classes_, threshold_optimizer.thresholds)
                },
                'best_f1_scores': {
                    class_name: float(score)
                    for class_name, score in zip(mlb.classes_, threshold_optimizer.best_f1_scores)
                }
            }
        }

        with open(Config.EXPERIMENT_DIR / 'final_configuration.json', 'w', encoding='utf-8') as f:
            json.dump(final_config, f, indent=4, ensure_ascii=False)

        logging.info("\nTraining completed successfully!")
        logging.info(f"All results and models saved in: {Config.EXPERIMENT_DIR}")

    except KeyboardInterrupt:
        logging.info("\nTraining interrupted by user")
        raise
    except Exception as e:
        logging.error(f"\nError during execution: {str(e)}")
        raise
    finally:
        logging.info("\nCleaning up resources...")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

# Entry point for Colab
if __name__ == "__main__":
    # Set random seeds for reproducibility
    args = get_args()

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    try:
        main(args)
    except KeyboardInterrupt:
        logging.info("\nTraining interrupted by user")
    except Exception as e:
        logging.error(f"Training failed: {str(e)}")
        raise
    finally:
        logging.info("Cleaning up resources...")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()


Files in dataset directory:
- final_combined_movies_5genres.csv
GPU tersedia: Tesla T4
GPU Memory: 15.83 GB
2025-02-15 21:42:23,178 - INFO - System Information:
2025-02-15 21:42:23,179 - INFO - Python Version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
2025-02-15 21:42:23,180 - INFO - CPU Count: 4
2025-02-15 21:42:23,182 - INFO - Initial Memory Usage: 635.80 MB
2025-02-15 21:42:23,182 - INFO - GPU Device: Tesla T4
2025-02-15 21:42:23,183 - INFO - GPU Memory Total: 15.83 GB
2025-02-15 21:42:23,184 - INFO - CUDA Version: 12.1
2025-02-15 21:42:23,185 - INFO - Starting movie genre classification with hyperparameter optimization
2025-02-15 21:42:23,185 - INFO - Using device: cuda
2025-02-15 21:42:23,187 - INFO - 
Initial Configuration:
2025-02-15 21:42:23,187 - INFO - Sample Size: Full Dataset
2025-02-15 21:42:23,188 - INFO - EPOCHS: 100
2025-02-15 21:42:23,189 - INFO - BATCH_SIZE: 10
2025-02-15 21:42:23,190 - INFO - LEARNING_RATE: 1e-05
2025-02-15 21:42:23,190 - INFO - MAX_LENGTH:

100%|██████████| 1738/1738 [00:00<00:00, 15232.61it/s]

2025-02-15 21:42:23,408 - INFO - Memory usage after preprocessing: 639.64 MB
2025-02-15 21:42:23,408 - INFO - 
Starting hyperparameter optimization...



[I 2025-02-15 21:42:23,410] A new study created in memory with name: no-name-b9af8e92-60c3-4f4b-a6dd-ad92d1fb8d69


2025-02-15 21:42:23,410 - INFO - Starting hyperparameter optimization...
2025-02-15 21:42:23,412 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 1e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}
2025-02-15 21:42:23,419 - INFO - Setting up model and tokenizer...


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/229k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 21:42:44,286 - INFO - Model and tokenizer setup completed
2025-02-15 21:42:44,287 - INFO - Setting up data loaders...
2025-02-15 21:42:44,288 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 21:42:44,289 - INFO - Created sampler with 1477 weights
2025-02-15 21:42:44,291 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 21:44:58,959 - INFO - Starting model evaluation...
2025-02-15 21:44:58,961 - INFO - Memory usage after evaluation start: 1768.10 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.79it/s]

2025-02-15 21:45:08,439 - INFO - 
Per-genre Performance Metrics:
2025-02-15 21:45:08,448 - INFO - 
Metrics for Drama:
2025-02-15 21:45:08,449 - INFO - Accuracy: 0.4598
2025-02-15 21:45:08,450 - INFO - F1_score: 0.4835
2025-02-15 21:45:08,450 - INFO - Precision: 0.3317
2025-02-15 21:45:08,452 - INFO - Recall: 0.8919
2025-02-15 21:45:08,457 - INFO - 
Metrics for Horor:
2025-02-15 21:45:08,458 - INFO - Accuracy: 0.7356
2025-02-15 21:45:08,459 - INFO - F1_score: 0.5714
2025-02-15 21:45:08,459 - INFO - Precision: 0.4423
2025-02-15 21:45:08,460 - INFO - Recall: 0.8070





2025-02-15 21:45:08,466 - INFO - 
Metrics for Komedi:
2025-02-15 21:45:08,468 - INFO - Accuracy: 0.5249
2025-02-15 21:45:08,468 - INFO - F1_score: 0.4095
2025-02-15 21:45:08,469 - INFO - Precision: 0.2829
2025-02-15 21:45:08,470 - INFO - Recall: 0.7414
2025-02-15 21:45:08,476 - INFO - 
Metrics for Laga:
2025-02-15 21:45:08,477 - INFO - Accuracy: 0.2490
2025-02-15 21:45:08,478 - INFO - F1_score: 0.2632
2025-02-15 21:45:08,479 - INFO - Precision: 0.1535
2025-02-15 21:45:08,480 - INFO - Recall: 0.9211
2025-02-15 21:45:08,486 - INFO - 
Metrics for Romantis:
2025-02-15 21:45:08,487 - INFO - Accuracy: 0.7663
2025-02-15 21:45:08,487 - INFO - F1_score: 0.3711
2025-02-15 21:45:08,489 - INFO - Precision: 0.2857
2025-02-15 21:45:08,490 - INFO - Recall: 0.5294
2025-02-15 21:45:08,492 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 21:45:13,072 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:4

                                                                                        

2025-02-15 21:47:48,441 - INFO - Starting model evaluation...
2025-02-15 21:47:48,443 - INFO - Memory usage after evaluation start: 1807.68 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.80it/s]

2025-02-15 21:47:57,911 - INFO - 
Per-genre Performance Metrics:
2025-02-15 21:47:57,917 - INFO - 
Metrics for Drama:
2025-02-15 21:47:57,918 - INFO - Accuracy: 0.6169
2025-02-15 21:47:57,919 - INFO - F1_score: 0.5192
2025-02-15 21:47:57,920 - INFO - Precision: 0.4030
2025-02-15 21:47:57,921 - INFO - Recall: 0.7297
2025-02-15 21:47:57,927 - INFO - 
Metrics for Horor:
2025-02-15 21:47:57,927 - INFO - Accuracy: 0.8238
2025-02-15 21:47:57,928 - INFO - F1_score: 0.6933
2025-02-15 21:47:57,928 - INFO - Precision: 0.5591
2025-02-15 21:47:57,929 - INFO - Recall: 0.9123





2025-02-15 21:47:57,937 - INFO - 
Metrics for Komedi:
2025-02-15 21:47:57,937 - INFO - Accuracy: 0.5364
2025-02-15 21:47:57,938 - INFO - F1_score: 0.4475
2025-02-15 21:47:57,939 - INFO - Precision: 0.3043
2025-02-15 21:47:57,940 - INFO - Recall: 0.8448
2025-02-15 21:47:57,946 - INFO - 
Metrics for Laga:
2025-02-15 21:47:57,947 - INFO - Accuracy: 0.5249
2025-02-15 21:47:57,947 - INFO - F1_score: 0.3111
2025-02-15 21:47:57,948 - INFO - Precision: 0.1972
2025-02-15 21:47:57,950 - INFO - Recall: 0.7368
2025-02-15 21:47:57,955 - INFO - 
Metrics for Romantis:
2025-02-15 21:47:57,956 - INFO - Accuracy: 0.5556
2025-02-15 21:47:57,957 - INFO - F1_score: 0.3256
2025-02-15 21:47:57,958 - INFO - Precision: 0.2029
2025-02-15 21:47:57,959 - INFO - Recall: 0.8235
2025-02-15 21:47:57,961 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 21:48:02,176 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:4

                                                                                        

2025-02-15 21:50:37,580 - INFO - Starting model evaluation...
2025-02-15 21:50:37,581 - INFO - Memory usage after evaluation start: 1837.09 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 21:50:47,132 - INFO - 
Per-genre Performance Metrics:
2025-02-15 21:50:47,138 - INFO - 
Metrics for Drama:
2025-02-15 21:50:47,138 - INFO - Accuracy: 0.5172
2025-02-15 21:50:47,139 - INFO - F1_score: 0.5116
2025-02-15 21:50:47,139 - INFO - Precision: 0.3587
2025-02-15 21:50:47,140 - INFO - Recall: 0.8919
2025-02-15 21:50:47,147 - INFO - 
Metrics for Horor:
2025-02-15 21:50:47,148 - INFO - Accuracy: 0.7931
2025-02-15 21:50:47,148 - INFO - F1_score: 0.6667
2025-02-15 21:50:47,149 - INFO - Precision: 0.5143
2025-02-15 21:50:47,150 - INFO - Recall: 0.9474





2025-02-15 21:50:47,158 - INFO - 
Metrics for Komedi:
2025-02-15 21:50:47,158 - INFO - Accuracy: 0.3563
2025-02-15 21:50:47,159 - INFO - F1_score: 0.4000
2025-02-15 21:50:47,159 - INFO - Precision: 0.2523
2025-02-15 21:50:47,161 - INFO - Recall: 0.9655
2025-02-15 21:50:47,167 - INFO - 
Metrics for Laga:
2025-02-15 21:50:47,167 - INFO - Accuracy: 0.7931
2025-02-15 21:50:47,168 - INFO - F1_score: 0.4130
2025-02-15 21:50:47,169 - INFO - Precision: 0.3519
2025-02-15 21:50:47,169 - INFO - Recall: 0.5000
2025-02-15 21:50:47,176 - INFO - 
Metrics for Romantis:
2025-02-15 21:50:47,177 - INFO - Accuracy: 0.6973
2025-02-15 21:50:47,177 - INFO - F1_score: 0.3471
2025-02-15 21:50:47,178 - INFO - Precision: 0.2414
2025-02-15 21:50:47,180 - INFO - Recall: 0.6176
2025-02-15 21:50:47,181 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 21:50:51,376 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:5

[I 2025-02-15 21:50:52,475] Trial 0 finished with value: 0.46768909798416186 and parameters: {'batch_size': 16, 'learning_rate': 1e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 0 with value: 0.46768909798416186.


2025-02-15 21:50:52,790 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}
2025-02-15 21:50:52,795 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 21:50:53,821 - INFO - Model and tokenizer setup completed
2025-02-15 21:50:53,822 - INFO - Setting up data loaders...
2025-02-15 21:50:53,823 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 21:50:53,825 - INFO - Created sampler with 1477 weights
2025-02-15 21:50:53,827 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-15 21:53:30,005 - INFO - Starting model evaluation...
2025-02-15 21:53:30,007 - INFO - Memory usage after evaluation start: 1870.05 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.71it/s]

2025-02-15 21:53:38,914 - INFO - 
Per-genre Performance Metrics:
2025-02-15 21:53:38,921 - INFO - 
Metrics for Drama:
2025-02-15 21:53:38,922 - INFO - Accuracy: 0.6092
2025-02-15 21:53:38,922 - INFO - F1_score: 0.5049
2025-02-15 21:53:38,923 - INFO - Precision: 0.3939
2025-02-15 21:53:38,924 - INFO - Recall: 0.7027
2025-02-15 21:53:38,930 - INFO - 
Metrics for Horor:
2025-02-15 21:53:38,931 - INFO - Accuracy: 0.7739
2025-02-15 21:53:38,931 - INFO - F1_score: 0.6467
2025-02-15 21:53:38,932 - INFO - Precision: 0.4909
2025-02-15 21:53:38,933 - INFO - Recall: 0.9474
2025-02-15 21:53:38,939 - INFO - 
Metrics for Komedi:
2025-02-15 21:53:38,940 - INFO - Accuracy: 0.4674
2025-02-15 21:53:38,941 - INFO - F1_score: 0.4232
2025-02-15 21:53:38,941 - INFO - Precision: 0.2787





2025-02-15 21:53:38,942 - INFO - Recall: 0.8793
2025-02-15 21:53:38,950 - INFO - 
Metrics for Laga:
2025-02-15 21:53:38,950 - INFO - Accuracy: 0.6398
2025-02-15 21:53:38,951 - INFO - F1_score: 0.3380
2025-02-15 21:53:38,951 - INFO - Precision: 0.2308
2025-02-15 21:53:38,952 - INFO - Recall: 0.6316
2025-02-15 21:53:38,959 - INFO - 
Metrics for Romantis:
2025-02-15 21:53:38,959 - INFO - Accuracy: 0.6782
2025-02-15 21:53:38,960 - INFO - F1_score: 0.3913
2025-02-15 21:53:38,961 - INFO - Precision: 0.2596
2025-02-15 21:53:38,962 - INFO - Recall: 0.7941
2025-02-15 21:53:38,964 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 21:53:43,108 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:53:43,109 - INFO - Memory usage after evaluation end: 1891.36 MB
2025-02-15 21:53:43,110 - INFO - Trial 1, Epoch 1: Loss = 1.4934, F1 = 0.4608


                                                                                          

2025-02-15 21:56:19,377 - INFO - Starting model evaluation...
2025-02-15 21:56:19,379 - INFO - Memory usage after evaluation start: 1891.86 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.70it/s]

2025-02-15 21:56:28,308 - INFO - 
Per-genre Performance Metrics:
2025-02-15 21:56:28,315 - INFO - 
Metrics for Drama:
2025-02-15 21:56:28,316 - INFO - Accuracy: 0.4483
2025-02-15 21:56:28,316 - INFO - F1_score: 0.4894
2025-02-15 21:56:28,317 - INFO - Precision: 0.3317
2025-02-15 21:56:28,318 - INFO - Recall: 0.9324
2025-02-15 21:56:28,324 - INFO - 
Metrics for Horor:
2025-02-15 21:56:28,325 - INFO - Accuracy: 0.8008
2025-02-15 21:56:28,325 - INFO - F1_score: 0.6790
2025-02-15 21:56:28,327 - INFO - Precision: 0.5238
2025-02-15 21:56:28,328 - INFO - Recall: 0.9649
2025-02-15 21:56:28,333 - INFO - 
Metrics for Komedi:
2025-02-15 21:56:28,334 - INFO - Accuracy: 0.6207
2025-02-15 21:56:28,335 - INFO - F1_score: 0.5075
2025-02-15 21:56:28,336 - INFO - Precision: 0.3566
2025-02-15 21:56:28,336 - INFO - Recall: 0.8793





2025-02-15 21:56:28,344 - INFO - 
Metrics for Laga:
2025-02-15 21:56:28,345 - INFO - Accuracy: 0.7510
2025-02-15 21:56:28,346 - INFO - F1_score: 0.3925
2025-02-15 21:56:28,347 - INFO - Precision: 0.3043
2025-02-15 21:56:28,347 - INFO - Recall: 0.5526
2025-02-15 21:56:28,355 - INFO - 
Metrics for Romantis:
2025-02-15 21:56:28,356 - INFO - Accuracy: 0.8046
2025-02-15 21:56:28,357 - INFO - F1_score: 0.3855
2025-02-15 21:56:28,358 - INFO - Precision: 0.3265
2025-02-15 21:56:28,359 - INFO - Recall: 0.4706
2025-02-15 21:56:28,361 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 21:56:32,507 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:56:32,509 - INFO - Memory usage after evaluation end: 1920.10 MB
2025-02-15 21:56:32,509 - INFO - Trial 1, Epoch 2: Loss = 1.2915, F1 = 0.4908


                                                                                          

2025-02-15 21:59:09,004 - INFO - Starting model evaluation...
2025-02-15 21:59:09,007 - INFO - Memory usage after evaluation start: 1920.10 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.69it/s]

2025-02-15 21:59:17,951 - INFO - 
Per-genre Performance Metrics:
2025-02-15 21:59:17,958 - INFO - 
Metrics for Drama:
2025-02-15 21:59:17,958 - INFO - Accuracy: 0.5517
2025-02-15 21:59:17,959 - INFO - F1_score: 0.5145
2025-02-15 21:59:17,960 - INFO - Precision: 0.3713
2025-02-15 21:59:17,961 - INFO - Recall: 0.8378
2025-02-15 21:59:17,968 - INFO - 
Metrics for Horor:
2025-02-15 21:59:17,969 - INFO - Accuracy: 0.7548
2025-02-15 21:59:17,969 - INFO - F1_score: 0.6279
2025-02-15 21:59:17,970 - INFO - Precision: 0.4696
2025-02-15 21:59:17,971 - INFO - Recall: 0.9474





2025-02-15 21:59:17,981 - INFO - 
Metrics for Komedi:
2025-02-15 21:59:17,982 - INFO - Accuracy: 0.6015
2025-02-15 21:59:17,984 - INFO - F1_score: 0.4583
2025-02-15 21:59:17,985 - INFO - Precision: 0.3284
2025-02-15 21:59:17,985 - INFO - Recall: 0.7586
2025-02-15 21:59:17,994 - INFO - 
Metrics for Laga:
2025-02-15 21:59:17,995 - INFO - Accuracy: 0.7510
2025-02-15 21:59:17,996 - INFO - F1_score: 0.3434
2025-02-15 21:59:17,997 - INFO - Precision: 0.2787
2025-02-15 21:59:17,998 - INFO - Recall: 0.4474
2025-02-15 21:59:18,007 - INFO - 
Metrics for Romantis:
2025-02-15 21:59:18,009 - INFO - Accuracy: 0.7165
2025-02-15 21:59:18,009 - INFO - F1_score: 0.3833
2025-02-15 21:59:18,010 - INFO - Precision: 0.2674
2025-02-15 21:59:18,010 - INFO - Recall: 0.6765
2025-02-15 21:59:18,013 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 21:59:22,201 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 21:5

[I 2025-02-15 21:59:23,439] Trial 1 finished with value: 0.49078045350690325 and parameters: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 1 with value: 0.49078045350690325.


2025-02-15 21:59:23,813 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-15 21:59:23,817 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 21:59:24,793 - INFO - Model and tokenizer setup completed
2025-02-15 21:59:24,795 - INFO - Setting up data loaders...
2025-02-15 21:59:24,796 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 21:59:24,797 - INFO - Created sampler with 1477 weights
2025-02-15 21:59:24,799 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 22:01:59,840 - INFO - Starting model evaluation...
2025-02-15 22:01:59,842 - INFO - Memory usage after evaluation start: 1953.53 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.79it/s]

2025-02-15 22:02:09,369 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:02:09,375 - INFO - 
Metrics for Drama:
2025-02-15 22:02:09,376 - INFO - Accuracy: 0.5249
2025-02-15 22:02:09,376 - INFO - F1_score: 0.5040
2025-02-15 22:02:09,377 - INFO - Precision: 0.3580
2025-02-15 22:02:09,379 - INFO - Recall: 0.8514
2025-02-15 22:02:09,384 - INFO - 
Metrics for Horor:
2025-02-15 22:02:09,385 - INFO - Accuracy: 0.8238
2025-02-15 22:02:09,386 - INFO - F1_score: 0.7013
2025-02-15 22:02:09,387 - INFO - Precision: 0.5567
2025-02-15 22:02:09,388 - INFO - Recall: 0.9474
2025-02-15 22:02:09,393 - INFO - 
Metrics for Komedi:
2025-02-15 22:02:09,394 - INFO - Accuracy: 0.5211





2025-02-15 22:02:09,395 - INFO - F1_score: 0.4541
2025-02-15 22:02:09,396 - INFO - Precision: 0.3041
2025-02-15 22:02:09,396 - INFO - Recall: 0.8966
2025-02-15 22:02:09,403 - INFO - 
Metrics for Laga:
2025-02-15 22:02:09,403 - INFO - Accuracy: 0.8008
2025-02-15 22:02:09,404 - INFO - F1_score: 0.4468
2025-02-15 22:02:09,404 - INFO - Precision: 0.3750
2025-02-15 22:02:09,406 - INFO - Recall: 0.5526
2025-02-15 22:02:09,412 - INFO - 
Metrics for Romantis:
2025-02-15 22:02:09,412 - INFO - Accuracy: 0.5057
2025-02-15 22:02:09,413 - INFO - F1_score: 0.3246
2025-02-15 22:02:09,413 - INFO - Precision: 0.1975
2025-02-15 22:02:09,414 - INFO - Recall: 0.9118
2025-02-15 22:02:09,416 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:02:13,523 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:02:13,525 - INFO - Memory usage after evaluation end: 1976.34 MB
2025-02-15 22:02:13,526 - INFO - Trial 2

                                                                                        

2025-02-15 22:04:49,140 - INFO - Starting model evaluation...
2025-02-15 22:04:49,142 - INFO - Memory usage after evaluation start: 1976.34 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.80it/s]

2025-02-15 22:04:58,566 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:04:58,571 - INFO - 
Metrics for Drama:
2025-02-15 22:04:58,572 - INFO - Accuracy: 0.5364
2025-02-15 22:04:58,572 - INFO - F1_score: 0.5141
2025-02-15 22:04:58,574 - INFO - Precision: 0.3657
2025-02-15 22:04:58,575 - INFO - Recall: 0.8649
2025-02-15 22:04:58,580 - INFO - 
Metrics for Horor:
2025-02-15 22:04:58,581 - INFO - Accuracy: 0.7165
2025-02-15 22:04:58,582 - INFO - F1_score: 0.5889
2025-02-15 22:04:58,583 - INFO - Precision: 0.4309
2025-02-15 22:04:58,584 - INFO - Recall: 0.9298





2025-02-15 22:04:58,591 - INFO - 
Metrics for Komedi:
2025-02-15 22:04:58,591 - INFO - Accuracy: 0.5824
2025-02-15 22:04:58,592 - INFO - F1_score: 0.4785
2025-02-15 22:04:58,592 - INFO - Precision: 0.3311
2025-02-15 22:04:58,593 - INFO - Recall: 0.8621
2025-02-15 22:04:58,599 - INFO - 
Metrics for Laga:
2025-02-15 22:04:58,599 - INFO - Accuracy: 0.7471
2025-02-15 22:04:58,600 - INFO - F1_score: 0.4107
2025-02-15 22:04:58,601 - INFO - Precision: 0.3108
2025-02-15 22:04:58,601 - INFO - Recall: 0.6053
2025-02-15 22:04:58,608 - INFO - 
Metrics for Romantis:
2025-02-15 22:04:58,608 - INFO - Accuracy: 0.7395
2025-02-15 22:04:58,609 - INFO - F1_score: 0.4138
2025-02-15 22:04:58,609 - INFO - Precision: 0.2927
2025-02-15 22:04:58,610 - INFO - Recall: 0.7059
2025-02-15 22:04:58,612 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:05:02,771 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:0

                                                                                        

2025-02-15 22:07:38,421 - INFO - Starting model evaluation...
2025-02-15 22:07:38,423 - INFO - Memory usage after evaluation start: 2005.65 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 22:07:47,970 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:07:47,978 - INFO - 
Metrics for Drama:
2025-02-15 22:07:47,979 - INFO - Accuracy: 0.6092
2025-02-15 22:07:47,980 - INFO - F1_score: 0.5603
2025-02-15 22:07:47,981 - INFO - Precision: 0.4114
2025-02-15 22:07:47,982 - INFO - Recall: 0.8784
2025-02-15 22:07:47,990 - INFO - 
Metrics for Horor:
2025-02-15 22:07:47,991 - INFO - Accuracy: 0.7050
2025-02-15 22:07:47,993 - INFO - F1_score: 0.5792
2025-02-15 22:07:47,994 - INFO - Precision: 0.4206
2025-02-15 22:07:47,995 - INFO - Recall: 0.9298





2025-02-15 22:07:48,003 - INFO - 
Metrics for Komedi:
2025-02-15 22:07:48,004 - INFO - Accuracy: 0.5709
2025-02-15 22:07:48,005 - INFO - F1_score: 0.4862
2025-02-15 22:07:48,006 - INFO - Precision: 0.3312
2025-02-15 22:07:48,007 - INFO - Recall: 0.9138
2025-02-15 22:07:48,015 - INFO - 
Metrics for Laga:
2025-02-15 22:07:48,016 - INFO - Accuracy: 0.7893
2025-02-15 22:07:48,017 - INFO - F1_score: 0.3678
2025-02-15 22:07:48,018 - INFO - Precision: 0.3265
2025-02-15 22:07:48,019 - INFO - Recall: 0.4211
2025-02-15 22:07:48,026 - INFO - 
Metrics for Romantis:
2025-02-15 22:07:48,027 - INFO - Accuracy: 0.7854
2025-02-15 22:07:48,028 - INFO - F1_score: 0.4717
2025-02-15 22:07:48,029 - INFO - Precision: 0.3472
2025-02-15 22:07:48,030 - INFO - Recall: 0.7353
2025-02-15 22:07:48,032 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:07:52,178 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:0

[I 2025-02-15 22:07:53,634] Trial 2 finished with value: 0.4930665075070929 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}. Best is trial 2 with value: 0.4930665075070929.


2025-02-15 22:07:54,057 - INFO - Trial parameter set: {'batch_size': 32, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}
2025-02-15 22:07:54,061 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:07:55,286 - INFO - Model and tokenizer setup completed
2025-02-15 22:07:55,287 - INFO - Setting up data loaders...
2025-02-15 22:07:55,288 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:07:55,290 - INFO - Created sampler with 1477 weights
2025-02-15 22:07:55,292 - INFO - Created data loaders with batch size 32


                                                                                        

2025-02-15 22:10:30,471 - INFO - Starting model evaluation...
2025-02-15 22:10:30,472 - INFO - Memory usage after evaluation start: 2037.86 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.07s/it]

2025-02-15 22:10:40,065 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:10:40,072 - INFO - 
Metrics for Drama:
2025-02-15 22:10:40,072 - INFO - Accuracy: 0.5249
2025-02-15 22:10:40,073 - INFO - F1_score: 0.4959
2025-02-15 22:10:40,073 - INFO - Precision: 0.3547
2025-02-15 22:10:40,074 - INFO - Recall: 0.8243
2025-02-15 22:10:40,081 - INFO - 
Metrics for Horor:
2025-02-15 22:10:40,081 - INFO - Accuracy: 0.7088
2025-02-15 22:10:40,082 - INFO - F1_score: 0.5824
2025-02-15 22:10:40,082 - INFO - Precision: 0.4240
2025-02-15 22:10:40,083 - INFO - Recall: 0.9298





2025-02-15 22:10:40,090 - INFO - 
Metrics for Komedi:
2025-02-15 22:10:40,091 - INFO - Accuracy: 0.4559
2025-02-15 22:10:40,092 - INFO - F1_score: 0.4228
2025-02-15 22:10:40,093 - INFO - Precision: 0.2766
2025-02-15 22:10:40,093 - INFO - Recall: 0.8966
2025-02-15 22:10:40,099 - INFO - 
Metrics for Laga:
2025-02-15 22:10:40,099 - INFO - Accuracy: 0.6207
2025-02-15 22:10:40,100 - INFO - F1_score: 0.3851
2025-02-15 22:10:40,101 - INFO - Precision: 0.2520
2025-02-15 22:10:40,102 - INFO - Recall: 0.8158
2025-02-15 22:10:40,108 - INFO - 
Metrics for Romantis:
2025-02-15 22:10:40,109 - INFO - Accuracy: 0.7088
2025-02-15 22:10:40,109 - INFO - F1_score: 0.3667
2025-02-15 22:10:40,110 - INFO - Precision: 0.2558
2025-02-15 22:10:40,111 - INFO - Recall: 0.6471
2025-02-15 22:10:40,113 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:10:44,219 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:1

                                                                                        

2025-02-15 22:13:19,569 - INFO - Starting model evaluation...
2025-02-15 22:13:19,571 - INFO - Memory usage after evaluation start: 2059.70 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.07s/it]

2025-02-15 22:13:29,174 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:13:29,180 - INFO - 
Metrics for Drama:
2025-02-15 22:13:29,181 - INFO - Accuracy: 0.3142
2025-02-15 22:13:29,181 - INFO - F1_score: 0.4424
2025-02-15 22:13:29,182 - INFO - Precision: 0.2874
2025-02-15 22:13:29,183 - INFO - Recall: 0.9595
2025-02-15 22:13:29,189 - INFO - 
Metrics for Horor:
2025-02-15 22:13:29,190 - INFO - Accuracy: 0.6667
2025-02-15 22:13:29,191 - INFO - F1_score: 0.5538
2025-02-15 22:13:29,192 - INFO - Precision: 0.3913
2025-02-15 22:13:29,193 - INFO - Recall: 0.9474





2025-02-15 22:13:29,200 - INFO - 
Metrics for Komedi:
2025-02-15 22:13:29,201 - INFO - Accuracy: 0.6935
2025-02-15 22:13:29,202 - INFO - F1_score: 0.5238
2025-02-15 22:13:29,202 - INFO - Precision: 0.4000
2025-02-15 22:13:29,203 - INFO - Recall: 0.7586
2025-02-15 22:13:29,209 - INFO - 
Metrics for Laga:
2025-02-15 22:13:29,209 - INFO - Accuracy: 0.7586
2025-02-15 22:13:29,210 - INFO - F1_score: 0.3505
2025-02-15 22:13:29,211 - INFO - Precision: 0.2881
2025-02-15 22:13:29,212 - INFO - Recall: 0.4474
2025-02-15 22:13:29,218 - INFO - 
Metrics for Romantis:
2025-02-15 22:13:29,219 - INFO - Accuracy: 0.5939
2025-02-15 22:13:29,220 - INFO - F1_score: 0.3614
2025-02-15 22:13:29,220 - INFO - Precision: 0.2273
2025-02-15 22:13:29,221 - INFO - Recall: 0.8824
2025-02-15 22:13:29,223 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:13:33,419 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:1

                                                                                        

2025-02-15 22:16:09,041 - INFO - Starting model evaluation...
2025-02-15 22:16:09,043 - INFO - Memory usage after evaluation start: 2089.07 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:16:18,580 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:16:18,586 - INFO - 
Metrics for Drama:
2025-02-15 22:16:18,587 - INFO - Accuracy: 0.3755
2025-02-15 22:16:18,587 - INFO - F1_score: 0.4585
2025-02-15 22:16:18,588 - INFO - Precision: 0.3040
2025-02-15 22:16:18,588 - INFO - Recall: 0.9324
2025-02-15 22:16:18,595 - INFO - 
Metrics for Horor:
2025-02-15 22:16:18,595 - INFO - Accuracy: 0.6054
2025-02-15 22:16:18,596 - INFO - F1_score: 0.5118
2025-02-15 22:16:18,597 - INFO - Precision: 0.3506
2025-02-15 22:16:18,598 - INFO - Recall: 0.9474
2025-02-15 22:16:18,605 - INFO - 
Metrics for Komedi:





2025-02-15 22:16:18,605 - INFO - Accuracy: 0.5594
2025-02-15 22:16:18,606 - INFO - F1_score: 0.4749
2025-02-15 22:16:18,608 - INFO - Precision: 0.3230
2025-02-15 22:16:18,608 - INFO - Recall: 0.8966
2025-02-15 22:16:18,614 - INFO - 
Metrics for Laga:
2025-02-15 22:16:18,615 - INFO - Accuracy: 0.8084
2025-02-15 22:16:18,615 - INFO - F1_score: 0.3590
2025-02-15 22:16:18,616 - INFO - Precision: 0.3500
2025-02-15 22:16:18,618 - INFO - Recall: 0.3684
2025-02-15 22:16:18,623 - INFO - 
Metrics for Romantis:
2025-02-15 22:16:18,624 - INFO - Accuracy: 0.8238
2025-02-15 22:16:18,624 - INFO - F1_score: 0.4889
2025-02-15 22:16:18,625 - INFO - Precision: 0.3929
2025-02-15 22:16:18,626 - INFO - Recall: 0.6471
2025-02-15 22:16:18,628 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:16:22,782 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:16:22,783 - INFO - Memory usage after evaluation end: 2

[I 2025-02-15 22:16:24,351] Trial 3 finished with value: 0.45861383892833524 and parameters: {'batch_size': 32, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 2 with value: 0.4930665075070929.


2025-02-15 22:16:24,822 - INFO - Trial parameter set: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}
2025-02-15 22:16:24,826 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:16:25,818 - INFO - Model and tokenizer setup completed
2025-02-15 22:16:25,820 - INFO - Setting up data loaders...
2025-02-15 22:16:25,821 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:16:25,823 - INFO - Created sampler with 1477 weights
2025-02-15 22:16:25,824 - INFO - Created data loaders with batch size 32


                                                                                        

2025-02-15 22:19:00,822 - INFO - Starting model evaluation...
2025-02-15 22:19:00,823 - INFO - Memory usage after evaluation start: 2150.29 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:19:10,347 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:19:10,353 - INFO - 
Metrics for Drama:
2025-02-15 22:19:10,354 - INFO - Accuracy: 0.6590
2025-02-15 22:19:10,355 - INFO - F1_score: 0.4972
2025-02-15 22:19:10,356 - INFO - Precision: 0.4272
2025-02-15 22:19:10,357 - INFO - Recall: 0.5946
2025-02-15 22:19:10,363 - INFO - 
Metrics for Horor:
2025-02-15 22:19:10,364 - INFO - Accuracy: 0.8123
2025-02-15 22:19:10,365 - INFO - F1_score: 0.6711
2025-02-15 22:19:10,366 - INFO - Precision: 0.5435
2025-02-15 22:19:10,367 - INFO - Recall: 0.8772
2025-02-15 22:19:10,372 - INFO - 
Metrics for Komedi:
2025-02-15 22:19:10,373 - INFO - Accuracy: 0.5479
2025-02-15 22:19:10,374 - INFO - F1_score: 0.4100





2025-02-15 22:19:10,375 - INFO - Precision: 0.2887
2025-02-15 22:19:10,376 - INFO - Recall: 0.7069
2025-02-15 22:19:10,382 - INFO - 
Metrics for Laga:
2025-02-15 22:19:10,383 - INFO - Accuracy: 0.5287
2025-02-15 22:19:10,384 - INFO - F1_score: 0.3128
2025-02-15 22:19:10,385 - INFO - Precision: 0.1986
2025-02-15 22:19:10,385 - INFO - Recall: 0.7368
2025-02-15 22:19:10,391 - INFO - 
Metrics for Romantis:
2025-02-15 22:19:10,391 - INFO - Accuracy: 0.6207
2025-02-15 22:19:10,392 - INFO - F1_score: 0.3356
2025-02-15 22:19:10,394 - INFO - Precision: 0.2174
2025-02-15 22:19:10,394 - INFO - Recall: 0.7353
2025-02-15 22:19:10,396 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:19:14,476 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:19:14,477 - INFO - Memory usage after evaluation end: 2170.79 MB
2025-02-15 22:19:14,478 - INFO - Trial 4, Epoch 1: Loss = 1.5581, F1 = 0.4453


                                                                                        

2025-02-15 22:21:49,977 - INFO - Starting model evaluation...
2025-02-15 22:21:49,979 - INFO - Memory usage after evaluation start: 2170.79 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:21:59,523 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:21:59,529 - INFO - 
Metrics for Drama:
2025-02-15 22:21:59,530 - INFO - Accuracy: 0.6207
2025-02-15 22:21:59,530 - INFO - F1_score: 0.4762
2025-02-15 22:21:59,531 - INFO - Precision: 0.3913
2025-02-15 22:21:59,532 - INFO - Recall: 0.6081
2025-02-15 22:21:59,538 - INFO - 
Metrics for Horor:
2025-02-15 22:21:59,539 - INFO - Accuracy: 0.7778
2025-02-15 22:21:59,540 - INFO - F1_score: 0.6506
2025-02-15 22:21:59,540 - INFO - Precision: 0.4954
2025-02-15 22:21:59,541 - INFO - Recall: 0.9474





2025-02-15 22:21:59,548 - INFO - 
Metrics for Komedi:
2025-02-15 22:21:59,549 - INFO - Accuracy: 0.4444
2025-02-15 22:21:59,550 - INFO - F1_score: 0.4177
2025-02-15 22:21:59,550 - INFO - Precision: 0.2723
2025-02-15 22:21:59,551 - INFO - Recall: 0.8966
2025-02-15 22:21:59,558 - INFO - 
Metrics for Laga:
2025-02-15 22:21:59,558 - INFO - Accuracy: 0.7739
2025-02-15 22:21:59,559 - INFO - F1_score: 0.4040
2025-02-15 22:21:59,559 - INFO - Precision: 0.3279
2025-02-15 22:21:59,560 - INFO - Recall: 0.5263
2025-02-15 22:21:59,568 - INFO - 
Metrics for Romantis:
2025-02-15 22:21:59,568 - INFO - Accuracy: 0.7011
2025-02-15 22:21:59,569 - INFO - F1_score: 0.3906
2025-02-15 22:21:59,569 - INFO - Precision: 0.2660
2025-02-15 22:21:59,570 - INFO - Recall: 0.7353
2025-02-15 22:21:59,573 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:22:03,803 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:2

                                                                                        

2025-02-15 22:24:39,253 - INFO - Starting model evaluation...
2025-02-15 22:24:39,255 - INFO - Memory usage after evaluation start: 2172.27 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:24:48,840 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:24:48,846 - INFO - 
Metrics for Drama:
2025-02-15 22:24:48,847 - INFO - Accuracy: 0.5747
2025-02-15 22:24:48,847 - INFO - F1_score: 0.5067
2025-02-15 22:24:48,848 - INFO - Precision: 0.3775
2025-02-15 22:24:48,850 - INFO - Recall: 0.7703
2025-02-15 22:24:48,856 - INFO - 
Metrics for Horor:
2025-02-15 22:24:48,856 - INFO - Accuracy: 0.8582
2025-02-15 22:24:48,857 - INFO - F1_score: 0.7376
2025-02-15 22:24:48,858 - INFO - Precision: 0.6190
2025-02-15 22:24:48,859 - INFO - Recall: 0.9123
2025-02-15 22:24:48,866 - INFO - 
Metrics for Komedi:
2025-02-15 22:24:48,866 - INFO - Accuracy: 0.6092
2025-02-15 22:24:48,867 - INFO - F1_score: 0.4796
2025-02-15 22:24:48,867 - INFO - Precision: 0.3406





2025-02-15 22:24:48,868 - INFO - Recall: 0.8103
2025-02-15 22:24:48,875 - INFO - 
Metrics for Laga:
2025-02-15 22:24:48,875 - INFO - Accuracy: 0.7126
2025-02-15 22:24:48,876 - INFO - F1_score: 0.4000
2025-02-15 22:24:48,877 - INFO - Precision: 0.2874
2025-02-15 22:24:48,879 - INFO - Recall: 0.6579
2025-02-15 22:24:48,884 - INFO - 
Metrics for Romantis:
2025-02-15 22:24:48,884 - INFO - Accuracy: 0.6858
2025-02-15 22:24:48,885 - INFO - F1_score: 0.4058
2025-02-15 22:24:48,886 - INFO - Precision: 0.2692
2025-02-15 22:24:48,887 - INFO - Recall: 0.8235
2025-02-15 22:24:48,889 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:24:53,130 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:24:53,132 - INFO - Memory usage after evaluation end: 2196.78 MB
2025-02-15 22:24:53,133 - INFO - Trial 4, Epoch 3: Loss = 1.2507, F1 = 0.5059


[I 2025-02-15 22:24:54,849] Trial 4 finished with value: 0.5059288514665811 and parameters: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 4 with value: 0.5059288514665811.


2025-02-15 22:24:55,374 - INFO - Trial parameter set: {'batch_size': 32, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-15 22:24:55,378 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:24:56,531 - INFO - Model and tokenizer setup completed
2025-02-15 22:24:56,532 - INFO - Setting up data loaders...
2025-02-15 22:24:56,532 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:24:56,535 - INFO - Created sampler with 1477 weights
2025-02-15 22:24:56,536 - INFO - Created data loaders with batch size 32


                                                                                        

2025-02-15 22:27:31,923 - INFO - Starting model evaluation...
2025-02-15 22:27:31,925 - INFO - Memory usage after evaluation start: 2318.92 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:27:41,427 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:27:41,433 - INFO - 
Metrics for Drama:
2025-02-15 22:27:41,434 - INFO - Accuracy: 0.5134
2025-02-15 22:27:41,434 - INFO - F1_score: 0.5058
2025-02-15 22:27:41,435 - INFO - Precision: 0.3552
2025-02-15 22:27:41,436 - INFO - Recall: 0.8784
2025-02-15 22:27:41,442 - INFO - 
Metrics for Horor:
2025-02-15 22:27:41,442 - INFO - Accuracy: 0.8046
2025-02-15 22:27:41,444 - INFO - F1_score: 0.6623
2025-02-15 22:27:41,444 - INFO - Precision: 0.5319
2025-02-15 22:27:41,445 - INFO - Recall: 0.8772
2025-02-15 22:27:41,450 - INFO - 
Metrics for Komedi:
2025-02-15 22:27:41,451 - INFO - Accuracy: 0.7241
2025-02-15 22:27:41,451 - INFO - F1_score: 0.3333
2025-02-15 22:27:41,452 - INFO - Precision: 0.3600
2025-02-15 22:27:41,453 - INFO - Recall: 0.3103





2025-02-15 22:27:41,460 - INFO - 
Metrics for Laga:
2025-02-15 22:27:41,461 - INFO - Accuracy: 0.5939
2025-02-15 22:27:41,461 - INFO - F1_score: 0.3375
2025-02-15 22:27:41,462 - INFO - Precision: 0.2213
2025-02-15 22:27:41,463 - INFO - Recall: 0.7105
2025-02-15 22:27:41,469 - INFO - 
Metrics for Romantis:
2025-02-15 22:27:41,469 - INFO - Accuracy: 0.4674
2025-02-15 22:27:41,470 - INFO - F1_score: 0.2944
2025-02-15 22:27:41,470 - INFO - Precision: 0.1779
2025-02-15 22:27:41,472 - INFO - Recall: 0.8529
2025-02-15 22:27:41,474 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:27:45,504 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:27:45,506 - INFO - Memory usage after evaluation end: 2321.60 MB
2025-02-15 22:27:45,507 - INFO - Trial 5, Epoch 1: Loss = 1.5420, F1 = 0.4267
2025-02-15 22:27:45,512 - ERROR - Error in trial training: 
2025-02-15 22:27:46,264 - ERROR - Error in optimiza

[I 2025-02-15 22:27:46,266] Trial 5 pruned. 


2025-02-15 22:27:46,811 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 22:27:46,816 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:27:47,787 - INFO - Model and tokenizer setup completed
2025-02-15 22:27:47,788 - INFO - Setting up data loaders...
2025-02-15 22:27:47,789 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:27:47,792 - INFO - Created sampler with 1477 weights
2025-02-15 22:27:47,793 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 22:30:22,980 - INFO - Starting model evaluation...
2025-02-15 22:30:22,982 - INFO - Memory usage after evaluation start: 2373.46 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.80it/s]

2025-02-15 22:30:32,440 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:30:32,446 - INFO - 
Metrics for Drama:
2025-02-15 22:30:32,447 - INFO - Accuracy: 0.4943
2025-02-15 22:30:32,447 - INFO - F1_score: 0.4884
2025-02-15 22:30:32,448 - INFO - Precision: 0.3424
2025-02-15 22:30:32,449 - INFO - Recall: 0.8514
2025-02-15 22:30:32,455 - INFO - 
Metrics for Horor:
2025-02-15 22:30:32,456 - INFO - Accuracy: 0.7280
2025-02-15 22:30:32,456 - INFO - F1_score: 0.6077
2025-02-15 22:30:32,457 - INFO - Precision: 0.4435
2025-02-15 22:30:32,457 - INFO - Recall: 0.9649





2025-02-15 22:30:32,465 - INFO - 
Metrics for Komedi:
2025-02-15 22:30:32,465 - INFO - Accuracy: 0.6360
2025-02-15 22:30:32,466 - INFO - F1_score: 0.4379
2025-02-15 22:30:32,467 - INFO - Precision: 0.3333
2025-02-15 22:30:32,467 - INFO - Recall: 0.6379
2025-02-15 22:30:32,474 - INFO - 
Metrics for Laga:
2025-02-15 22:30:32,474 - INFO - Accuracy: 0.7165
2025-02-15 22:30:32,475 - INFO - F1_score: 0.3934
2025-02-15 22:30:32,477 - INFO - Precision: 0.2857
2025-02-15 22:30:32,477 - INFO - Recall: 0.6316
2025-02-15 22:30:32,483 - INFO - 
Metrics for Romantis:
2025-02-15 22:30:32,484 - INFO - Accuracy: 0.6513
2025-02-15 22:30:32,485 - INFO - F1_score: 0.3546
2025-02-15 22:30:32,485 - INFO - Precision: 0.2336
2025-02-15 22:30:32,487 - INFO - Recall: 0.7353
2025-02-15 22:30:32,489 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:30:36,305 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:3

                                                                                        

2025-02-15 22:33:11,808 - INFO - Starting model evaluation...
2025-02-15 22:33:11,810 - INFO - Memory usage after evaluation start: 2376.84 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 22:33:21,357 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:33:21,363 - INFO - 
Metrics for Drama:
2025-02-15 22:33:21,364 - INFO - Accuracy: 0.4751
2025-02-15 22:33:21,364 - INFO - F1_score: 0.4982
2025-02-15 22:33:21,365 - INFO - Precision: 0.3417
2025-02-15 22:33:21,366 - INFO - Recall: 0.9189
2025-02-15 22:33:21,372 - INFO - 
Metrics for Horor:
2025-02-15 22:33:21,373 - INFO - Accuracy: 0.8046
2025-02-15 22:33:21,373 - INFO - F1_score: 0.6752
2025-02-15 22:33:21,374 - INFO - Precision: 0.5300
2025-02-15 22:33:21,375 - INFO - Recall: 0.9298
2025-02-15 22:33:21,381 - INFO - 
Metrics for Komedi:





2025-02-15 22:33:21,381 - INFO - Accuracy: 0.4598
2025-02-15 22:33:21,382 - INFO - F1_score: 0.4471
2025-02-15 22:33:21,383 - INFO - Precision: 0.2893
2025-02-15 22:33:21,384 - INFO - Recall: 0.9828
2025-02-15 22:33:21,390 - INFO - 
Metrics for Laga:
2025-02-15 22:33:21,390 - INFO - Accuracy: 0.7088
2025-02-15 22:33:21,391 - INFO - F1_score: 0.3333
2025-02-15 22:33:21,391 - INFO - Precision: 0.2500
2025-02-15 22:33:21,392 - INFO - Recall: 0.5000
2025-02-15 22:33:21,398 - INFO - 
Metrics for Romantis:
2025-02-15 22:33:21,399 - INFO - Accuracy: 0.7816
2025-02-15 22:33:21,399 - INFO - F1_score: 0.4466
2025-02-15 22:33:21,401 - INFO - Precision: 0.3333
2025-02-15 22:33:21,401 - INFO - Recall: 0.6765
2025-02-15 22:33:21,403 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:33:25,227 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:33:25,228 - INFO - Memory usage after evaluation end: 2

                                                                                        

2025-02-15 22:36:00,513 - INFO - Starting model evaluation...
2025-02-15 22:36:00,514 - INFO - Memory usage after evaluation start: 2398.59 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 22:36:10,069 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:36:10,075 - INFO - 
Metrics for Drama:
2025-02-15 22:36:10,076 - INFO - Accuracy: 0.5517
2025-02-15 22:36:10,077 - INFO - F1_score: 0.5301
2025-02-15 22:36:10,078 - INFO - Precision: 0.3771
2025-02-15 22:36:10,079 - INFO - Recall: 0.8919
2025-02-15 22:36:10,085 - INFO - 
Metrics for Horor:
2025-02-15 22:36:10,086 - INFO - Accuracy: 0.8506
2025-02-15 22:36:10,087 - INFO - F1_score: 0.7194
2025-02-15 22:36:10,087 - INFO - Precision: 0.6098
2025-02-15 22:36:10,088 - INFO - Recall: 0.8772





2025-02-15 22:36:10,095 - INFO - 
Metrics for Komedi:
2025-02-15 22:36:10,095 - INFO - Accuracy: 0.4981
2025-02-15 22:36:10,096 - INFO - F1_score: 0.4609
2025-02-15 22:36:10,096 - INFO - Precision: 0.3027
2025-02-15 22:36:10,098 - INFO - Recall: 0.9655
2025-02-15 22:36:10,104 - INFO - 
Metrics for Laga:
2025-02-15 22:36:10,104 - INFO - Accuracy: 0.6782
2025-02-15 22:36:10,105 - INFO - F1_score: 0.3731
2025-02-15 22:36:10,106 - INFO - Precision: 0.2604
2025-02-15 22:36:10,106 - INFO - Recall: 0.6579
2025-02-15 22:36:10,113 - INFO - 
Metrics for Romantis:
2025-02-15 22:36:10,114 - INFO - Accuracy: 0.7816
2025-02-15 22:36:10,114 - INFO - F1_score: 0.4571
2025-02-15 22:36:10,115 - INFO - Precision: 0.3380
2025-02-15 22:36:10,116 - INFO - Recall: 0.7059
2025-02-15 22:36:10,118 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:36:13,941 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:3

[I 2025-02-15 22:36:15,794] Trial 6 finished with value: 0.5081454955309341 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 6 with value: 0.5081454955309341.


2025-02-15 22:36:16,360 - INFO - Trial parameter set: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 22:36:16,365 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:36:17,416 - INFO - Model and tokenizer setup completed
2025-02-15 22:36:17,418 - INFO - Setting up data loaders...
2025-02-15 22:36:17,419 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:36:17,420 - INFO - Created sampler with 1477 weights
2025-02-15 22:36:17,422 - INFO - Created data loaders with batch size 32


                                                                                        

2025-02-15 22:38:52,574 - INFO - Starting model evaluation...
2025-02-15 22:38:52,576 - INFO - Memory usage after evaluation start: 2389.67 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.05s/it]

2025-02-15 22:39:02,075 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:39:02,082 - INFO - 
Metrics for Drama:
2025-02-15 22:39:02,082 - INFO - Accuracy: 0.5096
2025-02-15 22:39:02,083 - INFO - F1_score: 0.5077
2025-02-15 22:39:02,084 - INFO - Precision: 0.3548
2025-02-15 22:39:02,084 - INFO - Recall: 0.8919
2025-02-15 22:39:02,091 - INFO - 
Metrics for Horor:
2025-02-15 22:39:02,091 - INFO - Accuracy: 0.6130
2025-02-15 22:39:02,092 - INFO - F1_score: 0.5213
2025-02-15 22:39:02,093 - INFO - Precision: 0.3571
2025-02-15 22:39:02,094 - INFO - Recall: 0.9649





2025-02-15 22:39:02,102 - INFO - 
Metrics for Komedi:
2025-02-15 22:39:02,103 - INFO - Accuracy: 0.5402
2025-02-15 22:39:02,103 - INFO - F1_score: 0.4340
2025-02-15 22:39:02,105 - INFO - Precision: 0.2987
2025-02-15 22:39:02,105 - INFO - Recall: 0.7931
2025-02-15 22:39:02,112 - INFO - 
Metrics for Laga:
2025-02-15 22:39:02,112 - INFO - Accuracy: 0.7050
2025-02-15 22:39:02,113 - INFO - F1_score: 0.3937
2025-02-15 22:39:02,114 - INFO - Precision: 0.2809
2025-02-15 22:39:02,115 - INFO - Recall: 0.6579
2025-02-15 22:39:02,121 - INFO - 
Metrics for Romantis:
2025-02-15 22:39:02,122 - INFO - Accuracy: 0.6130
2025-02-15 22:39:02,123 - INFO - F1_score: 0.3311
2025-02-15 22:39:02,124 - INFO - Precision: 0.2137
2025-02-15 22:39:02,124 - INFO - Recall: 0.7353
2025-02-15 22:39:02,126 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:39:06,076 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:3

[I 2025-02-15 22:39:06,893] Trial 7 pruned. 


2025-02-15 22:39:07,507 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.15}
2025-02-15 22:39:07,512 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:39:08,522 - INFO - Model and tokenizer setup completed
2025-02-15 22:39:08,523 - INFO - Setting up data loaders...
2025-02-15 22:39:08,523 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:39:08,525 - INFO - Created sampler with 1477 weights
2025-02-15 22:39:08,527 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-15 22:41:44,977 - INFO - Starting model evaluation...
2025-02-15 22:41:44,980 - INFO - Memory usage after evaluation start: 2594.57 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.74it/s]

2025-02-15 22:41:53,800 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:41:53,806 - INFO - 
Metrics for Drama:
2025-02-15 22:41:53,807 - INFO - Accuracy: 0.5862
2025-02-15 22:41:53,808 - INFO - F1_score: 0.5345
2025-02-15 22:41:53,809 - INFO - Precision: 0.3924
2025-02-15 22:41:53,810 - INFO - Recall: 0.8378
2025-02-15 22:41:53,815 - INFO - 
Metrics for Horor:
2025-02-15 22:41:53,816 - INFO - Accuracy: 0.8927
2025-02-15 22:41:53,817 - INFO - F1_score: 0.7778
2025-02-15 22:41:53,818 - INFO - Precision: 0.7101
2025-02-15 22:41:53,819 - INFO - Recall: 0.8596
2025-02-15 22:41:53,825 - INFO - 
Metrics for Komedi:
2025-02-15 22:41:53,825 - INFO - Accuracy: 0.5939
2025-02-15 22:41:53,826 - INFO - F1_score: 0.4804
2025-02-15 22:41:53,827 - INFO - Precision: 0.3356
2025-02-15 22:41:53,828 - INFO - Recall: 0.8448





2025-02-15 22:41:53,835 - INFO - 
Metrics for Laga:
2025-02-15 22:41:53,835 - INFO - Accuracy: 0.6552
2025-02-15 22:41:53,836 - INFO - F1_score: 0.3750
2025-02-15 22:41:53,837 - INFO - Precision: 0.2547
2025-02-15 22:41:53,839 - INFO - Recall: 0.7105
2025-02-15 22:41:53,844 - INFO - 
Metrics for Romantis:
2025-02-15 22:41:53,845 - INFO - Accuracy: 0.6897
2025-02-15 22:41:53,845 - INFO - F1_score: 0.3520
2025-02-15 22:41:53,846 - INFO - Precision: 0.2418
2025-02-15 22:41:53,847 - INFO - Recall: 0.6471
2025-02-15 22:41:53,849 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:41:57,642 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:41:57,643 - INFO - Memory usage after evaluation end: 2599.19 MB
2025-02-15 22:41:57,644 - INFO - Trial 8, Epoch 1: Loss = 1.4480, F1 = 0.5039


                                                                                          

2025-02-15 22:44:34,663 - INFO - Starting model evaluation...
2025-02-15 22:44:34,665 - INFO - Memory usage after evaluation start: 2599.19 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.69it/s]

2025-02-15 22:44:43,615 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:44:43,621 - INFO - 
Metrics for Drama:
2025-02-15 22:44:43,621 - INFO - Accuracy: 0.6513
2025-02-15 22:44:43,622 - INFO - F1_score: 0.5561
2025-02-15 22:44:43,623 - INFO - Precision: 0.4351
2025-02-15 22:44:43,624 - INFO - Recall: 0.7703
2025-02-15 22:44:43,630 - INFO - 
Metrics for Horor:
2025-02-15 22:44:43,631 - INFO - Accuracy: 0.8199
2025-02-15 22:44:43,631 - INFO - F1_score: 0.6803
2025-02-15 22:44:43,632 - INFO - Precision: 0.5556
2025-02-15 22:44:43,632 - INFO - Recall: 0.8772
2025-02-15 22:44:43,639 - INFO - 
Metrics for Komedi:
2025-02-15 22:44:43,640 - INFO - Accuracy: 0.7318
2025-02-15 22:44:43,640 - INFO - F1_score: 0.5000





2025-02-15 22:44:43,642 - INFO - Precision: 0.4268
2025-02-15 22:44:43,642 - INFO - Recall: 0.6034
2025-02-15 22:44:43,649 - INFO - 
Metrics for Laga:
2025-02-15 22:44:43,649 - INFO - Accuracy: 0.7816
2025-02-15 22:44:43,650 - INFO - F1_score: 0.3736
2025-02-15 22:44:43,652 - INFO - Precision: 0.3208
2025-02-15 22:44:43,652 - INFO - Recall: 0.4474
2025-02-15 22:44:43,658 - INFO - 
Metrics for Romantis:
2025-02-15 22:44:43,658 - INFO - Accuracy: 0.6245
2025-02-15 22:44:43,659 - INFO - F1_score: 0.3553
2025-02-15 22:44:43,660 - INFO - Precision: 0.2288
2025-02-15 22:44:43,660 - INFO - Recall: 0.7941
2025-02-15 22:44:43,663 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:44:47,454 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:44:47,455 - INFO - Memory usage after evaluation end: 2604.69 MB
2025-02-15 22:44:47,456 - INFO - Trial 8, Epoch 2: Loss = 1.2073, F1 = 0.4931


                                                                                          

2025-02-15 22:47:24,561 - INFO - Starting model evaluation...
2025-02-15 22:47:24,563 - INFO - Memory usage after evaluation start: 2604.69 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.67it/s]

2025-02-15 22:47:33,563 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:47:33,569 - INFO - 
Metrics for Drama:
2025-02-15 22:47:33,570 - INFO - Accuracy: 0.7280
2025-02-15 22:47:33,571 - INFO - F1_score: 0.5359
2025-02-15 22:47:33,572 - INFO - Precision: 0.5190
2025-02-15 22:47:33,573 - INFO - Recall: 0.5541
2025-02-15 22:47:33,579 - INFO - 
Metrics for Horor:
2025-02-15 22:47:33,579 - INFO - Accuracy: 0.7893
2025-02-15 22:47:33,580 - INFO - F1_score: 0.6309
2025-02-15 22:47:33,580 - INFO - Precision: 0.5109
2025-02-15 22:47:33,581 - INFO - Recall: 0.8246
2025-02-15 22:47:33,587 - INFO - 
Metrics for Komedi:
2025-02-15 22:47:33,588 - INFO - Accuracy: 0.6513
2025-02-15 22:47:33,588 - INFO - F1_score: 0.5185





2025-02-15 22:47:33,589 - INFO - Precision: 0.3740
2025-02-15 22:47:33,590 - INFO - Recall: 0.8448
2025-02-15 22:47:33,596 - INFO - 
Metrics for Laga:
2025-02-15 22:47:33,597 - INFO - Accuracy: 0.5977
2025-02-15 22:47:33,597 - INFO - F1_score: 0.3478
2025-02-15 22:47:33,598 - INFO - Precision: 0.2276
2025-02-15 22:47:33,599 - INFO - Recall: 0.7368
2025-02-15 22:47:33,605 - INFO - 
Metrics for Romantis:
2025-02-15 22:47:33,605 - INFO - Accuracy: 0.8238
2025-02-15 22:47:33,606 - INFO - F1_score: 0.4250
2025-02-15 22:47:33,606 - INFO - Precision: 0.3696
2025-02-15 22:47:33,608 - INFO - Recall: 0.5000
2025-02-15 22:47:33,610 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:47:37,434 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:47:37,435 - INFO - Memory usage after evaluation end: 2610.32 MB
2025-02-15 22:47:37,437 - INFO - Trial 8, Epoch 3: Loss = 1.0890, F1 = 0.4916


[I 2025-02-15 22:47:39,468] Trial 8 finished with value: 0.5039305386522426 and parameters: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 6 with value: 0.5081454955309341.


2025-02-15 22:47:40,116 - INFO - Trial parameter set: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 22:47:40,120 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:47:41,208 - INFO - Model and tokenizer setup completed
2025-02-15 22:47:41,209 - INFO - Setting up data loaders...
2025-02-15 22:47:41,210 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:47:41,212 - INFO - Created sampler with 1477 weights
2025-02-15 22:47:41,214 - INFO - Created data loaders with batch size 32


                                                                                        

2025-02-15 22:50:16,351 - INFO - Starting model evaluation...
2025-02-15 22:50:16,353 - INFO - Memory usage after evaluation start: 2556.34 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:50:25,877 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:50:25,884 - INFO - 
Metrics for Drama:
2025-02-15 22:50:25,884 - INFO - Accuracy: 0.5632
2025-02-15 22:50:25,885 - INFO - F1_score: 0.4956
2025-02-15 22:50:25,886 - INFO - Precision: 0.3684
2025-02-15 22:50:25,887 - INFO - Recall: 0.7568
2025-02-15 22:50:25,893 - INFO - 
Metrics for Horor:
2025-02-15 22:50:25,894 - INFO - Accuracy: 0.8774
2025-02-15 22:50:25,894 - INFO - F1_score: 0.7576
2025-02-15 22:50:25,895 - INFO - Precision: 0.6667
2025-02-15 22:50:25,897 - INFO - Recall: 0.8772
2025-02-15 22:50:25,902 - INFO - 
Metrics for Komedi:
2025-02-15 22:50:25,903 - INFO - Accuracy: 0.5402





2025-02-15 22:50:25,904 - INFO - F1_score: 0.4444
2025-02-15 22:50:25,904 - INFO - Precision: 0.3038
2025-02-15 22:50:25,905 - INFO - Recall: 0.8276
2025-02-15 22:50:25,912 - INFO - 
Metrics for Laga:
2025-02-15 22:50:25,913 - INFO - Accuracy: 0.6667
2025-02-15 22:50:25,913 - INFO - F1_score: 0.3741
2025-02-15 22:50:25,914 - INFO - Precision: 0.2574
2025-02-15 22:50:25,915 - INFO - Recall: 0.6842
2025-02-15 22:50:25,921 - INFO - 
Metrics for Romantis:
2025-02-15 22:50:25,922 - INFO - Accuracy: 0.4253
2025-02-15 22:50:25,922 - INFO - F1_score: 0.2788
2025-02-15 22:50:25,923 - INFO - Precision: 0.1667
2025-02-15 22:50:25,924 - INFO - Recall: 0.8529
2025-02-15 22:50:25,926 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:50:29,859 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:50:29,861 - INFO - Memory usage after evaluation end: 2560.05 MB
2025-02-15 22:50:29,862 - INFO - Trial 9

                                                                                        

2025-02-15 22:53:05,824 - INFO - Starting model evaluation...
2025-02-15 22:53:05,825 - INFO - Memory usage after evaluation start: 2560.17 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:53:15,347 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:53:15,353 - INFO - 
Metrics for Drama:
2025-02-15 22:53:15,354 - INFO - Accuracy: 0.5785
2025-02-15 22:53:15,354 - INFO - F1_score: 0.5259
2025-02-15 22:53:15,355 - INFO - Precision: 0.3861
2025-02-15 22:53:15,356 - INFO - Recall: 0.8243
2025-02-15 22:53:15,362 - INFO - 
Metrics for Horor:
2025-02-15 22:53:15,363 - INFO - Accuracy: 0.7433
2025-02-15 22:53:15,363 - INFO - F1_score: 0.6171
2025-02-15 22:53:15,365 - INFO - Precision: 0.4576
2025-02-15 22:53:15,366 - INFO - Recall: 0.9474





2025-02-15 22:53:15,373 - INFO - 
Metrics for Komedi:
2025-02-15 22:53:15,373 - INFO - Accuracy: 0.5977
2025-02-15 22:53:15,374 - INFO - F1_score: 0.4670
2025-02-15 22:53:15,374 - INFO - Precision: 0.3309
2025-02-15 22:53:15,375 - INFO - Recall: 0.7931
2025-02-15 22:53:15,381 - INFO - 
Metrics for Laga:
2025-02-15 22:53:15,382 - INFO - Accuracy: 0.6858
2025-02-15 22:53:15,382 - INFO - F1_score: 0.4058
2025-02-15 22:53:15,383 - INFO - Precision: 0.2800
2025-02-15 22:53:15,384 - INFO - Recall: 0.7368
2025-02-15 22:53:15,389 - INFO - 
Metrics for Romantis:
2025-02-15 22:53:15,390 - INFO - Accuracy: 0.7816
2025-02-15 22:53:15,392 - INFO - F1_score: 0.4242
2025-02-15 22:53:15,393 - INFO - Precision: 0.3231
2025-02-15 22:53:15,393 - INFO - Recall: 0.6176
2025-02-15 22:53:15,395 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:53:19,374 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:5

                                                                                        

2025-02-15 22:55:55,223 - INFO - Starting model evaluation...
2025-02-15 22:55:55,225 - INFO - Memory usage after evaluation start: 2565.85 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 22:56:04,794 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:56:04,800 - INFO - 
Metrics for Drama:
2025-02-15 22:56:04,800 - INFO - Accuracy: 0.3333
2025-02-15 22:56:04,801 - INFO - F1_score: 0.4563
2025-02-15 22:56:04,802 - INFO - Precision: 0.2967
2025-02-15 22:56:04,803 - INFO - Recall: 0.9865
2025-02-15 22:56:04,809 - INFO - 
Metrics for Horor:
2025-02-15 22:56:04,810 - INFO - Accuracy: 0.8199
2025-02-15 22:56:04,810 - INFO - F1_score: 0.6846
2025-02-15 22:56:04,811 - INFO - Precision: 0.5543
2025-02-15 22:56:04,813 - INFO - Recall: 0.8947





2025-02-15 22:56:04,819 - INFO - 
Metrics for Komedi:
2025-02-15 22:56:04,820 - INFO - Accuracy: 0.6820
2025-02-15 22:56:04,821 - INFO - F1_score: 0.5202
2025-02-15 22:56:04,822 - INFO - Precision: 0.3913
2025-02-15 22:56:04,823 - INFO - Recall: 0.7759
2025-02-15 22:56:04,829 - INFO - 
Metrics for Laga:
2025-02-15 22:56:04,830 - INFO - Accuracy: 0.6437
2025-02-15 22:56:04,831 - INFO - F1_score: 0.3841
2025-02-15 22:56:04,832 - INFO - Precision: 0.2566
2025-02-15 22:56:04,833 - INFO - Recall: 0.7632
2025-02-15 22:56:04,838 - INFO - 
Metrics for Romantis:
2025-02-15 22:56:04,839 - INFO - Accuracy: 0.8429
2025-02-15 22:56:04,840 - INFO - F1_score: 0.4938
2025-02-15 22:56:04,841 - INFO - Precision: 0.4255
2025-02-15 22:56:04,842 - INFO - Recall: 0.5882
2025-02-15 22:56:04,844 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:56:08,776 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:5

[I 2025-02-15 22:56:10,996] Trial 9 finished with value: 0.5077956186041643 and parameters: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 6 with value: 0.5081454955309341.


2025-02-15 22:56:11,696 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 1e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 22:56:11,700 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 22:56:13,305 - INFO - Model and tokenizer setup completed
2025-02-15 22:56:13,306 - INFO - Setting up data loaders...
2025-02-15 22:56:13,307 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 22:56:13,309 - INFO - Created sampler with 1477 weights
2025-02-15 22:56:13,310 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 22:58:48,417 - INFO - Starting model evaluation...
2025-02-15 22:58:48,419 - INFO - Memory usage after evaluation start: 2574.05 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.79it/s]

2025-02-15 22:58:57,897 - INFO - 
Per-genre Performance Metrics:
2025-02-15 22:58:57,904 - INFO - 
Metrics for Drama:
2025-02-15 22:58:57,905 - INFO - Accuracy: 0.4713
2025-02-15 22:58:57,906 - INFO - F1_score: 0.5036
2025-02-15 22:58:57,906 - INFO - Precision: 0.3431
2025-02-15 22:58:57,907 - INFO - Recall: 0.9459
2025-02-15 22:58:57,914 - INFO - 
Metrics for Horor:
2025-02-15 22:58:57,914 - INFO - Accuracy: 0.7280
2025-02-15 22:58:57,915 - INFO - F1_score: 0.6077
2025-02-15 22:58:57,916 - INFO - Precision: 0.4435
2025-02-15 22:58:57,917 - INFO - Recall: 0.9649





2025-02-15 22:58:57,923 - INFO - 
Metrics for Komedi:
2025-02-15 22:58:57,924 - INFO - Accuracy: 0.6360
2025-02-15 22:58:57,926 - INFO - F1_score: 0.5226
2025-02-15 22:58:57,926 - INFO - Precision: 0.3688
2025-02-15 22:58:57,927 - INFO - Recall: 0.8966
2025-02-15 22:58:57,934 - INFO - 
Metrics for Laga:
2025-02-15 22:58:57,934 - INFO - Accuracy: 0.5747
2025-02-15 22:58:57,935 - INFO - F1_score: 0.3353
2025-02-15 22:58:57,936 - INFO - Precision: 0.2171
2025-02-15 22:58:57,936 - INFO - Recall: 0.7368
2025-02-15 22:58:57,943 - INFO - 
Metrics for Romantis:
2025-02-15 22:58:57,943 - INFO - Accuracy: 0.6705
2025-02-15 22:58:57,945 - INFO - F1_score: 0.3582
2025-02-15 22:58:57,946 - INFO - Precision: 0.2400
2025-02-15 22:58:57,946 - INFO - Recall: 0.7059
2025-02-15 22:58:57,948 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 22:59:01,917 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 22:5

                                                                                        

2025-02-15 23:01:37,837 - INFO - Starting model evaluation...
2025-02-15 23:01:37,839 - INFO - Memory usage after evaluation start: 2577.62 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:01:47,387 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:01:47,393 - INFO - 
Metrics for Drama:
2025-02-15 23:01:47,393 - INFO - Accuracy: 0.4176
2025-02-15 23:01:47,394 - INFO - F1_score: 0.4865
2025-02-15 23:01:47,394 - INFO - Precision: 0.3243
2025-02-15 23:01:47,396 - INFO - Recall: 0.9730
2025-02-15 23:01:47,401 - INFO - 
Metrics for Horor:
2025-02-15 23:01:47,402 - INFO - Accuracy: 0.8046
2025-02-15 23:01:47,403 - INFO - F1_score: 0.6792
2025-02-15 23:01:47,403 - INFO - Precision: 0.5294
2025-02-15 23:01:47,404 - INFO - Recall: 0.9474





2025-02-15 23:01:47,411 - INFO - 
Metrics for Komedi:
2025-02-15 23:01:47,412 - INFO - Accuracy: 0.5211
2025-02-15 23:01:47,413 - INFO - F1_score: 0.4493
2025-02-15 23:01:47,413 - INFO - Precision: 0.3018
2025-02-15 23:01:47,415 - INFO - Recall: 0.8793
2025-02-15 23:01:47,421 - INFO - 
Metrics for Laga:
2025-02-15 23:01:47,421 - INFO - Accuracy: 0.6437
2025-02-15 23:01:47,422 - INFO - F1_score: 0.3841
2025-02-15 23:01:47,423 - INFO - Precision: 0.2566
2025-02-15 23:01:47,423 - INFO - Recall: 0.7632
2025-02-15 23:01:47,429 - INFO - 
Metrics for Romantis:
2025-02-15 23:01:47,430 - INFO - Accuracy: 0.7395
2025-02-15 23:01:47,430 - INFO - F1_score: 0.4138
2025-02-15 23:01:47,431 - INFO - Precision: 0.2927
2025-02-15 23:01:47,431 - INFO - Recall: 0.7059
2025-02-15 23:01:47,434 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:01:51,410 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:0

                                                                                        

2025-02-15 23:04:27,453 - INFO - Starting model evaluation...
2025-02-15 23:04:27,455 - INFO - Memory usage after evaluation start: 2594.25 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:04:36,999 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:04:37,005 - INFO - 
Metrics for Drama:
2025-02-15 23:04:37,006 - INFO - Accuracy: 0.5249
2025-02-15 23:04:37,006 - INFO - F1_score: 0.5231
2025-02-15 23:04:37,007 - INFO - Precision: 0.3656
2025-02-15 23:04:37,008 - INFO - Recall: 0.9189
2025-02-15 23:04:37,014 - INFO - 
Metrics for Horor:
2025-02-15 23:04:37,015 - INFO - Accuracy: 0.8429
2025-02-15 23:04:37,015 - INFO - F1_score: 0.7248
2025-02-15 23:04:37,016 - INFO - Precision: 0.5870
2025-02-15 23:04:37,017 - INFO - Recall: 0.9474
2025-02-15 23:04:37,023 - INFO - 
Metrics for Komedi:
2025-02-15 23:04:37,024 - INFO - Accuracy: 0.5326
2025-02-15 23:04:37,024 - INFO - F1_score: 0.4554
2025-02-15 23:04:37,025 - INFO - Precision: 0.3072
2025-02-15 23:04:37,026 - INFO - Recall: 0.8793





2025-02-15 23:04:37,033 - INFO - 
Metrics for Laga:
2025-02-15 23:04:37,033 - INFO - Accuracy: 0.6897
2025-02-15 23:04:37,034 - INFO - F1_score: 0.3721
2025-02-15 23:04:37,034 - INFO - Precision: 0.2637
2025-02-15 23:04:37,035 - INFO - Recall: 0.6316
2025-02-15 23:04:37,041 - INFO - 
Metrics for Romantis:
2025-02-15 23:04:37,042 - INFO - Accuracy: 0.6284
2025-02-15 23:04:37,043 - INFO - F1_score: 0.3742
2025-02-15 23:04:37,043 - INFO - Precision: 0.2397
2025-02-15 23:04:37,045 - INFO - Recall: 0.8529
2025-02-15 23:04:37,047 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:04:41,068 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:04:41,069 - INFO - Memory usage after evaluation end: 2618.24 MB
2025-02-15 23:04:41,070 - INFO - Trial 10, Epoch 3: Loss = 1.1449, F1 = 0.4899
2025-02-15 23:04:41,072 - ERROR - Error in trial training: 
2025-02-15 23:04:41,924 - ERROR - Error in optimiz

[I 2025-02-15 23:04:41,926] Trial 10 pruned. 


2025-02-15 23:04:42,673 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:04:42,678 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:04:43,659 - INFO - Model and tokenizer setup completed
2025-02-15 23:04:43,660 - INFO - Setting up data loaders...
2025-02-15 23:04:43,661 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:04:43,663 - INFO - Created sampler with 1477 weights
2025-02-15 23:04:43,664 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 23:07:18,302 - INFO - Starting model evaluation...
2025-02-15 23:07:18,304 - INFO - Memory usage after evaluation start: 2918.68 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.80it/s]

2025-02-15 23:07:27,761 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:07:27,767 - INFO - 
Metrics for Drama:
2025-02-15 23:07:27,767 - INFO - Accuracy: 0.5824
2025-02-15 23:07:27,768 - INFO - F1_score: 0.5068
2025-02-15 23:07:27,769 - INFO - Precision: 0.3810
2025-02-15 23:07:27,769 - INFO - Recall: 0.7568
2025-02-15 23:07:27,775 - INFO - 
Metrics for Horor:
2025-02-15 23:07:27,776 - INFO - Accuracy: 0.6513
2025-02-15 23:07:27,777 - INFO - F1_score: 0.5473
2025-02-15 23:07:27,777 - INFO - Precision: 0.3819
2025-02-15 23:07:27,778 - INFO - Recall: 0.9649
2025-02-15 23:07:27,784 - INFO - 
Metrics for Komedi:
2025-02-15 23:07:27,784 - INFO - Accuracy: 0.7663
2025-02-15 23:07:27,785 - INFO - F1_score: 0.4874
2025-02-15 23:07:27,785 - INFO - Precision: 0.4754
2025-02-15 23:07:27,786 - INFO - Recall: 0.5000





2025-02-15 23:07:27,793 - INFO - 
Metrics for Laga:
2025-02-15 23:07:27,794 - INFO - Accuracy: 0.7471
2025-02-15 23:07:27,794 - INFO - F1_score: 0.4000
2025-02-15 23:07:27,795 - INFO - Precision: 0.3056
2025-02-15 23:07:27,796 - INFO - Recall: 0.5789
2025-02-15 23:07:27,802 - INFO - 
Metrics for Romantis:
2025-02-15 23:07:27,803 - INFO - Accuracy: 0.7203
2025-02-15 23:07:27,803 - INFO - F1_score: 0.3652
2025-02-15 23:07:27,804 - INFO - Precision: 0.2593
2025-02-15 23:07:27,804 - INFO - Recall: 0.6176
2025-02-15 23:07:27,807 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:07:31,667 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:07:31,669 - INFO - Memory usage after evaluation end: 2923.30 MB
2025-02-15 23:07:31,669 - INFO - Trial 11, Epoch 1: Loss = 1.4508, F1 = 0.4613


                                                                                        

2025-02-15 23:10:07,418 - INFO - Starting model evaluation...
2025-02-15 23:10:07,421 - INFO - Memory usage after evaluation start: 2923.43 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:10:16,979 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:10:16,984 - INFO - 
Metrics for Drama:
2025-02-15 23:10:16,985 - INFO - Accuracy: 0.5402
2025-02-15 23:10:16,985 - INFO - F1_score: 0.5122
2025-02-15 23:10:16,986 - INFO - Precision: 0.3663
2025-02-15 23:10:16,987 - INFO - Recall: 0.8514
2025-02-15 23:10:16,993 - INFO - 
Metrics for Horor:
2025-02-15 23:10:16,994 - INFO - Accuracy: 0.8238
2025-02-15 23:10:16,994 - INFO - F1_score: 0.6974
2025-02-15 23:10:16,995 - INFO - Precision: 0.5579
2025-02-15 23:10:16,995 - INFO - Recall: 0.9298
2025-02-15 23:10:17,002 - INFO - 
Metrics for Komedi:
2025-02-15 23:10:17,002 - INFO - Accuracy: 0.6054
2025-02-15 23:10:17,003 - INFO - F1_score: 0.4824
2025-02-15 23:10:17,004 - INFO - Precision: 0.3404
2025-02-15 23:10:17,005 - INFO - Recall: 0.8276





2025-02-15 23:10:17,011 - INFO - 
Metrics for Laga:
2025-02-15 23:10:17,011 - INFO - Accuracy: 0.7586
2025-02-15 23:10:17,012 - INFO - F1_score: 0.3368
2025-02-15 23:10:17,013 - INFO - Precision: 0.2807
2025-02-15 23:10:17,014 - INFO - Recall: 0.4211
2025-02-15 23:10:17,020 - INFO - 
Metrics for Romantis:
2025-02-15 23:10:17,020 - INFO - Accuracy: 0.7395
2025-02-15 23:10:17,021 - INFO - F1_score: 0.4237
2025-02-15 23:10:17,022 - INFO - Precision: 0.2976
2025-02-15 23:10:17,023 - INFO - Recall: 0.7353
2025-02-15 23:10:17,024 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:10:20,828 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:10:20,830 - INFO - Memory usage after evaluation end: 2929.18 MB
2025-02-15 23:10:20,831 - INFO - Trial 11, Epoch 2: Loss = 1.2584, F1 = 0.4905


                                                                                        

2025-02-15 23:12:56,686 - INFO - Starting model evaluation...
2025-02-15 23:12:56,689 - INFO - Memory usage after evaluation start: 2929.18 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:13:06,242 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:13:06,247 - INFO - 
Metrics for Drama:
2025-02-15 23:13:06,248 - INFO - Accuracy: 0.6322
2025-02-15 23:13:06,248 - INFO - F1_score: 0.5152
2025-02-15 23:13:06,250 - INFO - Precision: 0.4113
2025-02-15 23:13:06,251 - INFO - Recall: 0.6892
2025-02-15 23:13:06,256 - INFO - 
Metrics for Horor:
2025-02-15 23:13:06,257 - INFO - Accuracy: 0.7586
2025-02-15 23:13:06,257 - INFO - F1_score: 0.6228
2025-02-15 23:13:06,258 - INFO - Precision: 0.4727
2025-02-15 23:13:06,259 - INFO - Recall: 0.9123
2025-02-15 23:13:06,265 - INFO - 
Metrics for Komedi:
2025-02-15 23:13:06,266 - INFO - Accuracy: 0.5134
2025-02-15 23:13:06,266 - INFO - F1_score: 0.4730





2025-02-15 23:13:06,267 - INFO - Precision: 0.3115
2025-02-15 23:13:06,269 - INFO - Recall: 0.9828
2025-02-15 23:13:06,275 - INFO - 
Metrics for Laga:
2025-02-15 23:13:06,275 - INFO - Accuracy: 0.6935
2025-02-15 23:13:06,276 - INFO - F1_score: 0.3651
2025-02-15 23:13:06,276 - INFO - Precision: 0.2614
2025-02-15 23:13:06,277 - INFO - Recall: 0.6053
2025-02-15 23:13:06,283 - INFO - 
Metrics for Romantis:
2025-02-15 23:13:06,283 - INFO - Accuracy: 0.8774
2025-02-15 23:13:06,284 - INFO - F1_score: 0.5556
2025-02-15 23:13:06,285 - INFO - Precision: 0.5263
2025-02-15 23:13:06,285 - INFO - Recall: 0.5882
2025-02-15 23:13:06,287 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:13:10,054 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:13:10,056 - INFO - Memory usage after evaluation end: 2934.80 MB
2025-02-15 23:13:10,057 - INFO - Trial 11, Epoch 3: Loss = 1.0463, F1 = 0.5063


[I 2025-02-15 23:13:12,614] Trial 11 finished with value: 0.5063139944895106 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 6 with value: 0.5081454955309341.


2025-02-15 23:13:13,414 - INFO - Trial parameter set: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:13:13,419 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:13:14,509 - INFO - Model and tokenizer setup completed
2025-02-15 23:13:14,510 - INFO - Setting up data loaders...
2025-02-15 23:13:14,511 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:13:14,513 - INFO - Created sampler with 1477 weights
2025-02-15 23:13:14,514 - INFO - Created data loaders with batch size 32


                                                                                        

2025-02-15 23:15:49,681 - INFO - Starting model evaluation...
2025-02-15 23:15:49,683 - INFO - Memory usage after evaluation start: 3157.71 MB


Evaluating: 100%|██████████| 9/9 [00:09<00:00,  1.06s/it]

2025-02-15 23:15:59,215 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:15:59,222 - INFO - 
Metrics for Drama:
2025-02-15 23:15:59,222 - INFO - Accuracy: 0.5785
2025-02-15 23:15:59,223 - INFO - F1_score: 0.5175
2025-02-15 23:15:59,224 - INFO - Precision: 0.3831
2025-02-15 23:15:59,225 - INFO - Recall: 0.7973
2025-02-15 23:15:59,230 - INFO - 
Metrics for Horor:
2025-02-15 23:15:59,231 - INFO - Accuracy: 0.5249
2025-02-15 23:15:59,232 - INFO - F1_score: 0.4746
2025-02-15 23:15:59,233 - INFO - Precision: 0.3128
2025-02-15 23:15:59,234 - INFO - Recall: 0.9825
2025-02-15 23:15:59,239 - INFO - 
Metrics for Komedi:
2025-02-15 23:15:59,240 - INFO - Accuracy: 0.4598
2025-02-15 23:15:59,240 - INFO - F1_score: 0.4245
2025-02-15 23:15:59,241 - INFO - Precision: 0.2781





2025-02-15 23:15:59,242 - INFO - Recall: 0.8966
2025-02-15 23:15:59,248 - INFO - 
Metrics for Laga:
2025-02-15 23:15:59,249 - INFO - Accuracy: 0.6667
2025-02-15 23:15:59,249 - INFO - F1_score: 0.3650
2025-02-15 23:15:59,250 - INFO - Precision: 0.2525
2025-02-15 23:15:59,251 - INFO - Recall: 0.6579
2025-02-15 23:15:59,257 - INFO - 
Metrics for Romantis:
2025-02-15 23:15:59,258 - INFO - Accuracy: 0.7356
2025-02-15 23:15:59,258 - INFO - F1_score: 0.4202
2025-02-15 23:15:59,259 - INFO - Precision: 0.2941
2025-02-15 23:15:59,259 - INFO - Recall: 0.7353
2025-02-15 23:15:59,262 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:16:03,055 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:16:03,057 - INFO - Memory usage after evaluation end: 3161.34 MB
2025-02-15 23:16:03,058 - INFO - Trial 12, Epoch 1: Loss = 1.4936, F1 = 0.4403
2025-02-15 23:16:03,060 - ERROR - Error in trial training: 
20

[I 2025-02-15 23:16:04,090] Trial 12 pruned. 


2025-02-15 23:16:04,904 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:16:04,929 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:16:06,084 - INFO - Model and tokenizer setup completed
2025-02-15 23:16:06,085 - INFO - Setting up data loaders...
2025-02-15 23:16:06,085 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:16:06,088 - INFO - Created sampler with 1477 weights
2025-02-15 23:16:06,089 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 23:18:41,240 - INFO - Starting model evaluation...
2025-02-15 23:18:41,242 - INFO - Memory usage after evaluation start: 3176.10 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:18:50,785 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:18:50,791 - INFO - 
Metrics for Drama:
2025-02-15 23:18:50,792 - INFO - Accuracy: 0.6858
2025-02-15 23:18:50,792 - INFO - F1_score: 0.5495
2025-02-15 23:18:50,793 - INFO - Precision: 0.4630
2025-02-15 23:18:50,795 - INFO - Recall: 0.6757
2025-02-15 23:18:50,800 - INFO - 
Metrics for Horor:
2025-02-15 23:18:50,801 - INFO - Accuracy: 0.7318
2025-02-15 23:18:50,801 - INFO - F1_score: 0.6111
2025-02-15 23:18:50,802 - INFO - Precision: 0.4472
2025-02-15 23:18:50,803 - INFO - Recall: 0.9649
2025-02-15 23:18:50,808 - INFO - 
Metrics for Komedi:
2025-02-15 23:18:50,809 - INFO - Accuracy: 0.6782
2025-02-15 23:18:50,810 - INFO - F1_score: 0.4615
2025-02-15 23:18:50,810 - INFO - Precision: 0.3673





2025-02-15 23:18:50,811 - INFO - Recall: 0.6207
2025-02-15 23:18:50,818 - INFO - 
Metrics for Laga:
2025-02-15 23:18:50,819 - INFO - Accuracy: 0.4215
2025-02-15 23:18:50,819 - INFO - F1_score: 0.2977
2025-02-15 23:18:50,820 - INFO - Precision: 0.1808
2025-02-15 23:18:50,822 - INFO - Recall: 0.8421
2025-02-15 23:18:50,826 - INFO - 
Metrics for Romantis:
2025-02-15 23:18:50,827 - INFO - Accuracy: 0.8467
2025-02-15 23:18:50,828 - INFO - F1_score: 0.4118
2025-02-15 23:18:50,828 - INFO - Precision: 0.4118
2025-02-15 23:18:50,829 - INFO - Recall: 0.4118
2025-02-15 23:18:50,832 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:18:54,621 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:18:54,622 - INFO - Memory usage after evaluation end: 3180.73 MB
2025-02-15 23:18:54,624 - INFO - Trial 13, Epoch 1: Loss = 1.4764, F1 = 0.4663


                                                                                        

2025-02-15 23:21:30,474 - INFO - Starting model evaluation...
2025-02-15 23:21:30,476 - INFO - Memory usage after evaluation start: 3180.85 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:21:40,026 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:21:40,032 - INFO - 
Metrics for Drama:
2025-02-15 23:21:40,033 - INFO - Accuracy: 0.4176
2025-02-15 23:21:40,034 - INFO - F1_score: 0.4648
2025-02-15 23:21:40,034 - INFO - Precision: 0.3143
2025-02-15 23:21:40,035 - INFO - Recall: 0.8919
2025-02-15 23:21:40,042 - INFO - 
Metrics for Horor:
2025-02-15 23:21:40,042 - INFO - Accuracy: 0.7893
2025-02-15 23:21:40,043 - INFO - F1_score: 0.6541
2025-02-15 23:21:40,044 - INFO - Precision: 0.5098
2025-02-15 23:21:40,045 - INFO - Recall: 0.9123
2025-02-15 23:21:40,051 - INFO - 
Metrics for Komedi:
2025-02-15 23:21:40,052 - INFO - Accuracy: 0.7165
2025-02-15 23:21:40,052 - INFO - F1_score: 0.5316





2025-02-15 23:21:40,053 - INFO - Precision: 0.4200
2025-02-15 23:21:40,054 - INFO - Recall: 0.7241
2025-02-15 23:21:40,060 - INFO - 
Metrics for Laga:
2025-02-15 23:21:40,061 - INFO - Accuracy: 0.7778
2025-02-15 23:21:40,062 - INFO - F1_score: 0.4200
2025-02-15 23:21:40,063 - INFO - Precision: 0.3387
2025-02-15 23:21:40,064 - INFO - Recall: 0.5526
2025-02-15 23:21:40,070 - INFO - 
Metrics for Romantis:
2025-02-15 23:21:40,070 - INFO - Accuracy: 0.8506
2025-02-15 23:21:40,071 - INFO - F1_score: 0.5063
2025-02-15 23:21:40,072 - INFO - Precision: 0.4444
2025-02-15 23:21:40,072 - INFO - Recall: 0.5882
2025-02-15 23:21:40,075 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:21:43,865 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:21:43,866 - INFO - Memory usage after evaluation end: 3186.60 MB
2025-02-15 23:21:43,867 - INFO - Trial 13, Epoch 2: Loss = 1.2556, F1 = 0.5154


                                                                                        

2025-02-15 23:24:19,760 - INFO - Starting model evaluation...
2025-02-15 23:24:19,761 - INFO - Memory usage after evaluation start: 3186.60 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:24:29,312 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:24:29,317 - INFO - 
Metrics for Drama:
2025-02-15 23:24:29,318 - INFO - Accuracy: 0.5594
2025-02-15 23:24:29,318 - INFO - F1_score: 0.5148
2025-02-15 23:24:29,320 - INFO - Precision: 0.3742
2025-02-15 23:24:29,321 - INFO - Recall: 0.8243
2025-02-15 23:24:29,326 - INFO - 
Metrics for Horor:
2025-02-15 23:24:29,327 - INFO - Accuracy: 0.8046
2025-02-15 23:24:29,327 - INFO - F1_score: 0.6710
2025-02-15 23:24:29,328 - INFO - Precision: 0.5306
2025-02-15 23:24:29,329 - INFO - Recall: 0.9123
2025-02-15 23:24:29,335 - INFO - 
Metrics for Komedi:
2025-02-15 23:24:29,335 - INFO - Accuracy: 0.6935





2025-02-15 23:24:29,336 - INFO - F1_score: 0.5000
2025-02-15 23:24:29,337 - INFO - Precision: 0.3922
2025-02-15 23:24:29,338 - INFO - Recall: 0.6897
2025-02-15 23:24:29,344 - INFO - 
Metrics for Laga:
2025-02-15 23:24:29,345 - INFO - Accuracy: 0.7088
2025-02-15 23:24:29,345 - INFO - F1_score: 0.3770
2025-02-15 23:24:29,346 - INFO - Precision: 0.2738
2025-02-15 23:24:29,346 - INFO - Recall: 0.6053
2025-02-15 23:24:29,353 - INFO - 
Metrics for Romantis:
2025-02-15 23:24:29,353 - INFO - Accuracy: 0.7241
2025-02-15 23:24:29,354 - INFO - F1_score: 0.4098
2025-02-15 23:24:29,354 - INFO - Precision: 0.2841
2025-02-15 23:24:29,355 - INFO - Recall: 0.7353
2025-02-15 23:24:29,357 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:24:33,153 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:24:33,154 - INFO - Memory usage after evaluation end: 3192.23 MB
2025-02-15 23:24:33,155 - INFO - Trial 1

[I 2025-02-15 23:24:35,831] Trial 13 finished with value: 0.515370293250627 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-15 23:24:36,675 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:24:36,678 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:24:38,220 - INFO - Model and tokenizer setup completed
2025-02-15 23:24:38,221 - INFO - Setting up data loaders...
2025-02-15 23:24:38,222 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:24:38,225 - INFO - Created sampler with 1477 weights
2025-02-15 23:24:38,226 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 23:27:13,401 - INFO - Starting model evaluation...
2025-02-15 23:27:13,403 - INFO - Memory usage after evaluation start: 3261.97 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:27:22,950 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:27:22,957 - INFO - 
Metrics for Drama:
2025-02-15 23:27:22,958 - INFO - Accuracy: 0.5824
2025-02-15 23:27:22,958 - INFO - F1_score: 0.5281
2025-02-15 23:27:22,960 - INFO - Precision: 0.3885
2025-02-15 23:27:22,961 - INFO - Recall: 0.8243
2025-02-15 23:27:22,966 - INFO - 
Metrics for Horor:
2025-02-15 23:27:22,966 - INFO - Accuracy: 0.8736
2025-02-15 23:27:22,967 - INFO - F1_score: 0.7130
2025-02-15 23:27:22,969 - INFO - Precision: 0.7069
2025-02-15 23:27:22,969 - INFO - Recall: 0.7193





2025-02-15 23:27:22,976 - INFO - 
Metrics for Komedi:
2025-02-15 23:27:22,979 - INFO - Accuracy: 0.4636
2025-02-15 23:27:22,982 - INFO - F1_score: 0.4355
2025-02-15 23:27:22,984 - INFO - Precision: 0.2842
2025-02-15 23:27:22,985 - INFO - Recall: 0.9310
2025-02-15 23:27:22,993 - INFO - 
Metrics for Laga:
2025-02-15 23:27:22,993 - INFO - Accuracy: 0.6322
2025-02-15 23:27:22,994 - INFO - F1_score: 0.3514
2025-02-15 23:27:22,995 - INFO - Precision: 0.2364
2025-02-15 23:27:22,996 - INFO - Recall: 0.6842
2025-02-15 23:27:23,002 - INFO - 
Metrics for Romantis:
2025-02-15 23:27:23,002 - INFO - Accuracy: 0.6820
2025-02-15 23:27:23,003 - INFO - F1_score: 0.3566
2025-02-15 23:27:23,004 - INFO - Precision: 0.2421
2025-02-15 23:27:23,005 - INFO - Recall: 0.6765
2025-02-15 23:27:23,007 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:27:26,823 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:2

                                                                                        

2025-02-15 23:30:02,545 - INFO - Starting model evaluation...
2025-02-15 23:30:02,547 - INFO - Memory usage after evaluation start: 3265.84 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:30:12,120 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:30:12,126 - INFO - 
Metrics for Drama:
2025-02-15 23:30:12,127 - INFO - Accuracy: 0.6284
2025-02-15 23:30:12,128 - INFO - F1_score: 0.5530
2025-02-15 23:30:12,128 - INFO - Precision: 0.4196
2025-02-15 23:30:12,129 - INFO - Recall: 0.8108
2025-02-15 23:30:12,135 - INFO - 
Metrics for Horor:
2025-02-15 23:30:12,135 - INFO - Accuracy: 0.7778
2025-02-15 23:30:12,136 - INFO - F1_score: 0.6506
2025-02-15 23:30:12,136 - INFO - Precision: 0.4954
2025-02-15 23:30:12,137 - INFO - Recall: 0.9474
2025-02-15 23:30:12,143 - INFO - 
Metrics for Komedi:
2025-02-15 23:30:12,144 - INFO - Accuracy: 0.5517





2025-02-15 23:30:12,145 - INFO - F1_score: 0.4706
2025-02-15 23:30:12,146 - INFO - Precision: 0.3190
2025-02-15 23:30:12,146 - INFO - Recall: 0.8966
2025-02-15 23:30:12,152 - INFO - 
Metrics for Laga:
2025-02-15 23:30:12,153 - INFO - Accuracy: 0.7778
2025-02-15 23:30:12,154 - INFO - F1_score: 0.4423
2025-02-15 23:30:12,154 - INFO - Precision: 0.3485
2025-02-15 23:30:12,155 - INFO - Recall: 0.6053
2025-02-15 23:30:12,161 - INFO - 
Metrics for Romantis:
2025-02-15 23:30:12,162 - INFO - Accuracy: 0.7126
2025-02-15 23:30:12,162 - INFO - F1_score: 0.4186
2025-02-15 23:30:12,163 - INFO - Precision: 0.2842
2025-02-15 23:30:12,164 - INFO - Recall: 0.7941
2025-02-15 23:30:12,166 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:30:15,916 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:30:15,917 - INFO - Memory usage after evaluation end: 3271.47 MB
2025-02-15 23:30:15,918 - INFO - Trial 1

                                                                                        

2025-02-15 23:32:52,142 - INFO - Starting model evaluation...
2025-02-15 23:32:52,143 - INFO - Memory usage after evaluation start: 3271.59 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:33:01,698 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:33:01,704 - INFO - 
Metrics for Drama:
2025-02-15 23:33:01,705 - INFO - Accuracy: 0.4483
2025-02-15 23:33:01,705 - INFO - F1_score: 0.4820
2025-02-15 23:33:01,706 - INFO - Precision: 0.3284
2025-02-15 23:33:01,708 - INFO - Recall: 0.9054
2025-02-15 23:33:01,713 - INFO - 
Metrics for Horor:
2025-02-15 23:33:01,714 - INFO - Accuracy: 0.8467
2025-02-15 23:33:01,714 - INFO - F1_score: 0.7222
2025-02-15 23:33:01,715 - INFO - Precision: 0.5977
2025-02-15 23:33:01,716 - INFO - Recall: 0.9123





2025-02-15 23:33:01,723 - INFO - 
Metrics for Komedi:
2025-02-15 23:33:01,724 - INFO - Accuracy: 0.5211
2025-02-15 23:33:01,724 - INFO - F1_score: 0.4589
2025-02-15 23:33:01,725 - INFO - Precision: 0.3064
2025-02-15 23:33:01,727 - INFO - Recall: 0.9138
2025-02-15 23:33:01,732 - INFO - 
Metrics for Laga:
2025-02-15 23:33:01,733 - INFO - Accuracy: 0.7854
2025-02-15 23:33:01,733 - INFO - F1_score: 0.4043
2025-02-15 23:33:01,734 - INFO - Precision: 0.3393
2025-02-15 23:33:01,735 - INFO - Recall: 0.5000
2025-02-15 23:33:01,741 - INFO - 
Metrics for Romantis:
2025-02-15 23:33:01,742 - INFO - Accuracy: 0.7241
2025-02-15 23:33:01,742 - INFO - F1_score: 0.4098
2025-02-15 23:33:01,743 - INFO - Precision: 0.2841
2025-02-15 23:33:01,744 - INFO - Recall: 0.7353
2025-02-15 23:33:01,746 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:33:05,556 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:3

[I 2025-02-15 23:33:08,458] Trial 14 finished with value: 0.5070196760216448 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-15 23:33:09,356 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:33:09,360 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:33:10,543 - INFO - Model and tokenizer setup completed
2025-02-15 23:33:10,544 - INFO - Setting up data loaders...
2025-02-15 23:33:10,547 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:33:10,549 - INFO - Created sampler with 1477 weights
2025-02-15 23:33:10,550 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 23:35:45,782 - INFO - Starting model evaluation...
2025-02-15 23:35:45,784 - INFO - Memory usage after evaluation start: 3342.07 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.80it/s]

2025-02-15 23:35:55,242 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:35:55,248 - INFO - 
Metrics for Drama:
2025-02-15 23:35:55,248 - INFO - Accuracy: 0.6207
2025-02-15 23:35:55,249 - INFO - F1_score: 0.5263
2025-02-15 23:35:55,250 - INFO - Precision: 0.4074
2025-02-15 23:35:55,251 - INFO - Recall: 0.7432
2025-02-15 23:35:55,257 - INFO - 
Metrics for Horor:
2025-02-15 23:35:55,257 - INFO - Accuracy: 0.6552
2025-02-15 23:35:55,258 - INFO - F1_score: 0.5500
2025-02-15 23:35:55,259 - INFO - Precision: 0.3846
2025-02-15 23:35:55,259 - INFO - Recall: 0.9649





2025-02-15 23:35:55,267 - INFO - 
Metrics for Komedi:
2025-02-15 23:35:55,268 - INFO - Accuracy: 0.5249
2025-02-15 23:35:55,269 - INFO - F1_score: 0.4464
2025-02-15 23:35:55,269 - INFO - Precision: 0.3012
2025-02-15 23:35:55,270 - INFO - Recall: 0.8621
2025-02-15 23:35:55,276 - INFO - 
Metrics for Laga:
2025-02-15 23:35:55,276 - INFO - Accuracy: 0.7395
2025-02-15 23:35:55,277 - INFO - F1_score: 0.4237
2025-02-15 23:35:55,278 - INFO - Precision: 0.3125
2025-02-15 23:35:55,279 - INFO - Recall: 0.6579
2025-02-15 23:35:55,284 - INFO - 
Metrics for Romantis:
2025-02-15 23:35:55,285 - INFO - Accuracy: 0.8238
2025-02-15 23:35:55,285 - INFO - F1_score: 0.4103
2025-02-15 23:35:55,286 - INFO - Precision: 0.3636
2025-02-15 23:35:55,287 - INFO - Recall: 0.4706
2025-02-15 23:35:55,289 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:35:59,085 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:3

                                                                                        

2025-02-15 23:38:35,139 - INFO - Starting model evaluation...
2025-02-15 23:38:35,141 - INFO - Memory usage after evaluation start: 3346.95 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:38:44,683 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:38:44,689 - INFO - 
Metrics for Drama:
2025-02-15 23:38:44,690 - INFO - Accuracy: 0.4521
2025-02-15 23:38:44,690 - INFO - F1_score: 0.4838
2025-02-15 23:38:44,691 - INFO - Precision: 0.3300
2025-02-15 23:38:44,692 - INFO - Recall: 0.9054
2025-02-15 23:38:44,698 - INFO - 
Metrics for Horor:
2025-02-15 23:38:44,699 - INFO - Accuracy: 0.8429
2025-02-15 23:38:44,700 - INFO - F1_score: 0.7172
2025-02-15 23:38:44,701 - INFO - Precision: 0.5909
2025-02-15 23:38:44,701 - INFO - Recall: 0.9123





2025-02-15 23:38:44,708 - INFO - 
Metrics for Komedi:
2025-02-15 23:38:44,709 - INFO - Accuracy: 0.6169
2025-02-15 23:38:44,709 - INFO - F1_score: 0.4898
2025-02-15 23:38:44,710 - INFO - Precision: 0.3478
2025-02-15 23:38:44,711 - INFO - Recall: 0.8276
2025-02-15 23:38:44,717 - INFO - 
Metrics for Laga:
2025-02-15 23:38:44,717 - INFO - Accuracy: 0.6705
2025-02-15 23:38:44,718 - INFO - F1_score: 0.3676
2025-02-15 23:38:44,718 - INFO - Precision: 0.2551
2025-02-15 23:38:44,720 - INFO - Recall: 0.6579
2025-02-15 23:38:44,725 - INFO - 
Metrics for Romantis:
2025-02-15 23:38:44,726 - INFO - Accuracy: 0.7893
2025-02-15 23:38:44,726 - INFO - F1_score: 0.4086
2025-02-15 23:38:44,727 - INFO - Precision: 0.3220
2025-02-15 23:38:44,728 - INFO - Recall: 0.5588
2025-02-15 23:38:44,730 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:38:48,534 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:3

                                                                                        

2025-02-15 23:41:24,584 - INFO - Starting model evaluation...
2025-02-15 23:41:24,586 - INFO - Memory usage after evaluation start: 3352.57 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:41:34,119 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:41:34,125 - INFO - 
Metrics for Drama:
2025-02-15 23:41:34,126 - INFO - Accuracy: 0.5785
2025-02-15 23:41:34,127 - INFO - F1_score: 0.5299
2025-02-15 23:41:34,128 - INFO - Precision: 0.3875
2025-02-15 23:41:34,129 - INFO - Recall: 0.8378
2025-02-15 23:41:34,134 - INFO - 
Metrics for Horor:
2025-02-15 23:41:34,135 - INFO - Accuracy: 0.8123
2025-02-15 23:41:34,135 - INFO - F1_score: 0.6755
2025-02-15 23:41:34,137 - INFO - Precision: 0.5426
2025-02-15 23:41:34,137 - INFO - Recall: 0.8947
2025-02-15 23:41:34,143 - INFO - 
Metrics for Komedi:
2025-02-15 23:41:34,144 - INFO - Accuracy: 0.4981
2025-02-15 23:41:34,144 - INFO - F1_score: 0.4426
2025-02-15 23:41:34,146 - INFO - Precision: 0.2938
2025-02-15 23:41:34,146 - INFO - Recall: 0.8966





2025-02-15 23:41:34,153 - INFO - 
Metrics for Laga:
2025-02-15 23:41:34,154 - INFO - Accuracy: 0.7893
2025-02-15 23:41:34,154 - INFO - F1_score: 0.4086
2025-02-15 23:41:34,155 - INFO - Precision: 0.3455
2025-02-15 23:41:34,156 - INFO - Recall: 0.5000
2025-02-15 23:41:34,162 - INFO - 
Metrics for Romantis:
2025-02-15 23:41:34,162 - INFO - Accuracy: 0.7318
2025-02-15 23:41:34,163 - INFO - F1_score: 0.4167
2025-02-15 23:41:34,165 - INFO - Precision: 0.2907
2025-02-15 23:41:34,165 - INFO - Recall: 0.7353
2025-02-15 23:41:34,167 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:41:38,065 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:41:38,066 - INFO - Memory usage after evaluation end: 3358.32 MB
2025-02-15 23:41:38,068 - INFO - Trial 15, Epoch 3: Loss = 1.0570, F1 = 0.4946


[I 2025-02-15 23:41:41,116] Trial 15 finished with value: 0.4946466454699829 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-15 23:41:42,054 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 1e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:41:42,058 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:41:43,211 - INFO - Model and tokenizer setup completed
2025-02-15 23:41:43,212 - INFO - Setting up data loaders...
2025-02-15 23:41:43,214 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:41:43,215 - INFO - Created sampler with 1477 weights
2025-02-15 23:41:43,217 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 23:44:18,190 - INFO - Starting model evaluation...
2025-02-15 23:44:18,192 - INFO - Memory usage after evaluation start: 3428.14 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.81it/s]

2025-02-15 23:44:27,607 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:44:27,614 - INFO - 
Metrics for Drama:
2025-02-15 23:44:27,615 - INFO - Accuracy: 0.3333
2025-02-15 23:44:27,615 - INFO - F1_score: 0.4563
2025-02-15 23:44:27,616 - INFO - Precision: 0.2967
2025-02-15 23:44:27,617 - INFO - Recall: 0.9865
2025-02-15 23:44:27,623 - INFO - 
Metrics for Horor:
2025-02-15 23:44:27,623 - INFO - Accuracy: 0.6590
2025-02-15 23:44:27,625 - INFO - F1_score: 0.5389
2025-02-15 23:44:27,625 - INFO - Precision: 0.3824
2025-02-15 23:44:27,626 - INFO - Recall: 0.9123
2025-02-15 23:44:27,632 - INFO - 
Metrics for Komedi:
2025-02-15 23:44:27,633 - INFO - Accuracy: 0.6130
2025-02-15 23:44:27,634 - INFO - F1_score: 0.4599
2025-02-15 23:44:27,635 - INFO - Precision: 0.3333





2025-02-15 23:44:27,635 - INFO - Recall: 0.7414
2025-02-15 23:44:27,642 - INFO - 
Metrics for Laga:
2025-02-15 23:44:27,643 - INFO - Accuracy: 0.5019
2025-02-15 23:44:27,643 - INFO - F1_score: 0.3011
2025-02-15 23:44:27,644 - INFO - Precision: 0.1892
2025-02-15 23:44:27,646 - INFO - Recall: 0.7368
2025-02-15 23:44:27,651 - INFO - 
Metrics for Romantis:
2025-02-15 23:44:27,651 - INFO - Accuracy: 0.5785
2025-02-15 23:44:27,652 - INFO - F1_score: 0.3373
2025-02-15 23:44:27,653 - INFO - Precision: 0.2121
2025-02-15 23:44:27,653 - INFO - Recall: 0.8235
2025-02-15 23:44:27,656 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:44:31,457 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:44:31,458 - INFO - Memory usage after evaluation end: 3431.77 MB
2025-02-15 23:44:31,459 - INFO - Trial 16, Epoch 1: Loss = 1.5209, F1 = 0.4187
2025-02-15 23:44:31,461 - ERROR - Error in trial training: 
20

[I 2025-02-15 23:44:32,643] Trial 16 pruned. 


2025-02-15 23:44:33,605 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:44:33,646 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:44:34,606 - INFO - Model and tokenizer setup completed
2025-02-15 23:44:34,607 - INFO - Setting up data loaders...
2025-02-15 23:44:34,609 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:44:34,610 - INFO - Created sampler with 1477 weights
2025-02-15 23:44:34,612 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-15 23:47:09,407 - INFO - Starting model evaluation...
2025-02-15 23:47:09,409 - INFO - Memory usage after evaluation start: 3305.88 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.81it/s]

2025-02-15 23:47:18,814 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:47:18,821 - INFO - 
Metrics for Drama:
2025-02-15 23:47:18,822 - INFO - Accuracy: 0.6284
2025-02-15 23:47:18,823 - INFO - F1_score: 0.5126
2025-02-15 23:47:18,824 - INFO - Precision: 0.4080
2025-02-15 23:47:18,824 - INFO - Recall: 0.6892
2025-02-15 23:47:18,832 - INFO - 
Metrics for Horor:
2025-02-15 23:47:18,832 - INFO - Accuracy: 0.8238
2025-02-15 23:47:18,833 - INFO - F1_score: 0.6933
2025-02-15 23:47:18,833 - INFO - Precision: 0.5591
2025-02-15 23:47:18,834 - INFO - Recall: 0.9123





2025-02-15 23:47:18,843 - INFO - 
Metrics for Komedi:
2025-02-15 23:47:18,843 - INFO - Accuracy: 0.6207
2025-02-15 23:47:18,844 - INFO - F1_score: 0.4817
2025-02-15 23:47:18,845 - INFO - Precision: 0.3459
2025-02-15 23:47:18,845 - INFO - Recall: 0.7931
2025-02-15 23:47:18,853 - INFO - 
Metrics for Laga:
2025-02-15 23:47:18,854 - INFO - Accuracy: 0.6130
2025-02-15 23:47:18,854 - INFO - F1_score: 0.3804
2025-02-15 23:47:18,855 - INFO - Precision: 0.2480
2025-02-15 23:47:18,857 - INFO - Recall: 0.8158
2025-02-15 23:47:18,864 - INFO - 
Metrics for Romantis:
2025-02-15 23:47:18,864 - INFO - Accuracy: 0.5632
2025-02-15 23:47:18,865 - INFO - F1_score: 0.3294
2025-02-15 23:47:18,867 - INFO - Precision: 0.2059
2025-02-15 23:47:18,868 - INFO - Recall: 0.8235
2025-02-15 23:47:18,870 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:47:22,772 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:4

                                                                                        

2025-02-15 23:49:58,802 - INFO - Starting model evaluation...
2025-02-15 23:49:58,804 - INFO - Memory usage after evaluation start: 3310.50 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.79it/s]

2025-02-15 23:50:08,289 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:50:08,295 - INFO - 
Metrics for Drama:
2025-02-15 23:50:08,296 - INFO - Accuracy: 0.4789
2025-02-15 23:50:08,297 - INFO - F1_score: 0.5000
2025-02-15 23:50:08,297 - INFO - Precision: 0.3434
2025-02-15 23:50:08,298 - INFO - Recall: 0.9189
2025-02-15 23:50:08,304 - INFO - 
Metrics for Horor:
2025-02-15 23:50:08,304 - INFO - Accuracy: 0.8008
2025-02-15 23:50:08,305 - INFO - F1_score: 0.6750
2025-02-15 23:50:08,305 - INFO - Precision: 0.5243
2025-02-15 23:50:08,306 - INFO - Recall: 0.9474





2025-02-15 23:50:08,313 - INFO - 
Metrics for Komedi:
2025-02-15 23:50:08,314 - INFO - Accuracy: 0.5556
2025-02-15 23:50:08,315 - INFO - F1_score: 0.4821
2025-02-15 23:50:08,316 - INFO - Precision: 0.3253
2025-02-15 23:50:08,317 - INFO - Recall: 0.9310
2025-02-15 23:50:08,322 - INFO - 
Metrics for Laga:
2025-02-15 23:50:08,323 - INFO - Accuracy: 0.7280
2025-02-15 23:50:08,323 - INFO - F1_score: 0.3932
2025-02-15 23:50:08,325 - INFO - Precision: 0.2911
2025-02-15 23:50:08,325 - INFO - Recall: 0.6053
2025-02-15 23:50:08,331 - INFO - 
Metrics for Romantis:
2025-02-15 23:50:08,331 - INFO - Accuracy: 0.7854
2025-02-15 23:50:08,332 - INFO - F1_score: 0.4286
2025-02-15 23:50:08,334 - INFO - Precision: 0.3281
2025-02-15 23:50:08,334 - INFO - Recall: 0.6176
2025-02-15 23:50:08,336 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:50:12,150 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:5

                                                                                        

2025-02-15 23:52:48,205 - INFO - Starting model evaluation...
2025-02-15 23:52:48,208 - INFO - Memory usage after evaluation start: 3316.13 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.78it/s]

2025-02-15 23:52:57,769 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:52:57,774 - INFO - 
Metrics for Drama:
2025-02-15 23:52:57,775 - INFO - Accuracy: 0.4253
2025-02-15 23:52:57,776 - INFO - F1_score: 0.4718
2025-02-15 23:52:57,777 - INFO - Precision: 0.3190
2025-02-15 23:52:57,777 - INFO - Recall: 0.9054
2025-02-15 23:52:57,784 - INFO - 
Metrics for Horor:
2025-02-15 23:52:57,784 - INFO - Accuracy: 0.8582
2025-02-15 23:52:57,785 - INFO - F1_score: 0.7338
2025-02-15 23:52:57,786 - INFO - Precision: 0.6220
2025-02-15 23:52:57,786 - INFO - Recall: 0.8947
2025-02-15 23:52:57,792 - INFO - 
Metrics for Komedi:
2025-02-15 23:52:57,793 - INFO - Accuracy: 0.7050
2025-02-15 23:52:57,794 - INFO - F1_score: 0.5600
2025-02-15 23:52:57,794 - INFO - Precision: 0.4188
2025-02-15 23:52:57,796 - INFO - Recall: 0.8448





2025-02-15 23:52:57,803 - INFO - 
Metrics for Laga:
2025-02-15 23:52:57,803 - INFO - Accuracy: 0.6130
2025-02-15 23:52:57,804 - INFO - F1_score: 0.3567
2025-02-15 23:52:57,805 - INFO - Precision: 0.2353
2025-02-15 23:52:57,806 - INFO - Recall: 0.7368
2025-02-15 23:52:57,812 - INFO - 
Metrics for Romantis:
2025-02-15 23:52:57,813 - INFO - Accuracy: 0.7241
2025-02-15 23:52:57,814 - INFO - F1_score: 0.4098
2025-02-15 23:52:57,815 - INFO - Precision: 0.2841
2025-02-15 23:52:57,816 - INFO - Recall: 0.7353
2025-02-15 23:52:57,818 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:53:01,612 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:53:01,614 - INFO - Memory usage after evaluation end: 3321.88 MB
2025-02-15 23:53:01,615 - INFO - Trial 17, Epoch 3: Loss = 1.1103, F1 = 0.5064


[I 2025-02-15 23:53:04,827] Trial 17 finished with value: 0.5064335798437446 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-15 23:53:05,830 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:53:05,834 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:53:07,002 - INFO - Model and tokenizer setup completed
2025-02-15 23:53:07,003 - INFO - Setting up data loaders...
2025-02-15 23:53:07,004 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:53:07,006 - INFO - Created sampler with 1477 weights
2025-02-15 23:53:07,008 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-15 23:55:43,408 - INFO - Starting model evaluation...
2025-02-15 23:55:43,410 - INFO - Memory usage after evaluation start: 3545.16 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.70it/s]

2025-02-15 23:55:52,333 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:55:52,339 - INFO - 
Metrics for Drama:
2025-02-15 23:55:52,340 - INFO - Accuracy: 0.3870
2025-02-15 23:55:52,341 - INFO - F1_score: 0.4737
2025-02-15 23:55:52,341 - INFO - Precision: 0.3130
2025-02-15 23:55:52,343 - INFO - Recall: 0.9730
2025-02-15 23:55:52,349 - INFO - 
Metrics for Horor:
2025-02-15 23:55:52,349 - INFO - Accuracy: 0.8008
2025-02-15 23:55:52,350 - INFO - F1_score: 0.6709
2025-02-15 23:55:52,351 - INFO - Precision: 0.5248
2025-02-15 23:55:52,352 - INFO - Recall: 0.9298
2025-02-15 23:55:52,357 - INFO - 
Metrics for Komedi:
2025-02-15 23:55:52,358 - INFO - Accuracy: 0.4330
2025-02-15 23:55:52,359 - INFO - F1_score: 0.4127
2025-02-15 23:55:52,359 - INFO - Precision: 0.2680
2025-02-15 23:55:52,360 - INFO - Recall: 0.8966





2025-02-15 23:55:52,367 - INFO - 
Metrics for Laga:
2025-02-15 23:55:52,368 - INFO - Accuracy: 0.6897
2025-02-15 23:55:52,368 - INFO - F1_score: 0.3910
2025-02-15 23:55:52,370 - INFO - Precision: 0.2737
2025-02-15 23:55:52,370 - INFO - Recall: 0.6842
2025-02-15 23:55:52,376 - INFO - 
Metrics for Romantis:
2025-02-15 23:55:52,377 - INFO - Accuracy: 0.7126
2025-02-15 23:55:52,377 - INFO - F1_score: 0.4000
2025-02-15 23:55:52,378 - INFO - Precision: 0.2747
2025-02-15 23:55:52,380 - INFO - Recall: 0.7353
2025-02-15 23:55:52,381 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:55:56,153 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:55:56,155 - INFO - Memory usage after evaluation end: 3548.79 MB
2025-02-15 23:55:56,156 - INFO - Trial 18, Epoch 1: Loss = 1.4260, F1 = 0.4696


                                                                                          

2025-02-15 23:58:33,237 - INFO - Starting model evaluation...
2025-02-15 23:58:33,240 - INFO - Memory usage after evaluation start: 3548.91 MB


Evaluating: 100%|██████████| 33/33 [00:08<00:00,  3.72it/s]

2025-02-15 23:58:42,109 - INFO - 
Per-genre Performance Metrics:
2025-02-15 23:58:42,114 - INFO - 
Metrics for Drama:
2025-02-15 23:58:42,115 - INFO - Accuracy: 0.5670
2025-02-15 23:58:42,116 - INFO - F1_score: 0.5311
2025-02-15 23:58:42,117 - INFO - Precision: 0.3832
2025-02-15 23:58:42,118 - INFO - Recall: 0.8649
2025-02-15 23:58:42,124 - INFO - 
Metrics for Horor:
2025-02-15 23:58:42,124 - INFO - Accuracy: 0.7893
2025-02-15 23:58:42,125 - INFO - F1_score: 0.6584
2025-02-15 23:58:42,126 - INFO - Precision: 0.5096
2025-02-15 23:58:42,127 - INFO - Recall: 0.9298
2025-02-15 23:58:42,133 - INFO - 
Metrics for Komedi:
2025-02-15 23:58:42,133 - INFO - Accuracy: 0.4598





2025-02-15 23:58:42,134 - INFO - F1_score: 0.4337
2025-02-15 23:58:42,135 - INFO - Precision: 0.2827
2025-02-15 23:58:42,136 - INFO - Recall: 0.9310
2025-02-15 23:58:42,142 - INFO - 
Metrics for Laga:
2025-02-15 23:58:42,143 - INFO - Accuracy: 0.6973
2025-02-15 23:58:42,144 - INFO - F1_score: 0.3780
2025-02-15 23:58:42,144 - INFO - Precision: 0.2697
2025-02-15 23:58:42,145 - INFO - Recall: 0.6316
2025-02-15 23:58:42,152 - INFO - 
Metrics for Romantis:
2025-02-15 23:58:42,152 - INFO - Accuracy: 0.7969
2025-02-15 23:58:42,153 - INFO - F1_score: 0.4301
2025-02-15 23:58:42,154 - INFO - Precision: 0.3390
2025-02-15 23:58:42,155 - INFO - Recall: 0.5882
2025-02-15 23:58:42,157 - INFO - Generating detailed confusion matrices for each genre...
2025-02-15 23:58:45,905 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-15 23:58:45,906 - INFO - Memory usage after evaluation end: 3554.79 MB
2025-02-15 23:58:45,907 - INFO - Trial 1

[I 2025-02-15 23:58:46,983] Trial 18 pruned. 


2025-02-15 23:58:48,064 - INFO - Trial parameter set: {'batch_size': 16, 'learning_rate': 1e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-15 23:58:48,068 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-15 23:58:49,222 - INFO - Model and tokenizer setup completed
2025-02-15 23:58:49,223 - INFO - Setting up data loaders...
2025-02-15 23:58:49,224 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-15 23:58:49,226 - INFO - Created sampler with 1477 weights
2025-02-15 23:58:49,227 - INFO - Created data loaders with batch size 16


                                                                                        

2025-02-16 00:01:24,368 - INFO - Starting model evaluation...
2025-02-16 00:01:24,370 - INFO - Memory usage after evaluation start: 3604.32 MB


Evaluating: 100%|██████████| 17/17 [00:09<00:00,  1.80it/s]

2025-02-16 00:01:33,825 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:01:33,831 - INFO - 
Metrics for Drama:
2025-02-16 00:01:33,832 - INFO - Accuracy: 0.6973
2025-02-16 00:01:33,833 - INFO - F1_score: 0.4903
2025-02-16 00:01:33,834 - INFO - Precision: 0.4691
2025-02-16 00:01:33,835 - INFO - Recall: 0.5135
2025-02-16 00:01:33,841 - INFO - 
Metrics for Horor:
2025-02-16 00:01:33,841 - INFO - Accuracy: 0.6590
2025-02-16 00:01:33,842 - INFO - F1_score: 0.5137
2025-02-16 00:01:33,843 - INFO - Precision: 0.3730
2025-02-16 00:01:33,844 - INFO - Recall: 0.8246





2025-02-16 00:01:33,851 - INFO - 
Metrics for Komedi:
2025-02-16 00:01:33,852 - INFO - Accuracy: 0.6245
2025-02-16 00:01:33,852 - INFO - F1_score: 0.4235
2025-02-16 00:01:33,853 - INFO - Precision: 0.3214
2025-02-16 00:01:33,854 - INFO - Recall: 0.6207
2025-02-16 00:01:33,861 - INFO - 
Metrics for Laga:
2025-02-16 00:01:33,861 - INFO - Accuracy: 0.7280
2025-02-16 00:01:33,863 - INFO - F1_score: 0.3486
2025-02-16 00:01:33,863 - INFO - Precision: 0.2676
2025-02-16 00:01:33,864 - INFO - Recall: 0.5000
2025-02-16 00:01:33,870 - INFO - 
Metrics for Romantis:
2025-02-16 00:01:33,871 - INFO - Accuracy: 0.6284
2025-02-16 00:01:33,871 - INFO - F1_score: 0.3490
2025-02-16 00:01:33,880 - INFO - Precision: 0.2261
2025-02-16 00:01:33,881 - INFO - Recall: 0.7647
2025-02-16 00:01:33,882 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 00:01:37,657 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:0

[I 2025-02-16 00:01:38,953] Trial 19 pruned. 


2025-02-16 00:01:40,022 - INFO - 
Hyperparameter Optimization Results:
2025-02-16 00:01:40,027 - INFO - Best trial number: 13
2025-02-16 00:01:40,028 - INFO - Best F1-score: 0.5154
2025-02-16 00:01:40,029 - INFO - 
Best hyperparameters:
2025-02-16 00:01:40,030 - INFO - batch_size: 16
2025-02-16 00:01:40,031 - INFO - learning_rate: 2e-05
2025-02-16 00:01:40,031 - INFO - weight_decay: 0.01
2025-02-16 00:01:40,032 - INFO - mixup_prob: 0.2
2025-02-16 00:01:40,033 - INFO - smoothing: 0.1
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido

2025-02-16 00:01:42,089 - INFO - 
Best Hyperparameters found:
2025-02-16 00:01:42,091 - INFO - batch_size: 16
2025-02-16 00:01:42,092 - INFO - learning_rate: 2e-05
2025-02-16 00:01:42,093 - INFO - weight_decay: 0.01
2025-02-16 00:01:42,094 - INFO - mixup_prob: 0.2
2025-02-16 00:01:42,094 - INFO - smoothing: 0.1
2025-02-16 00:01:42,095 - INFO - 
Training final model with optim

100%|██████████| 1738/1738 [00:00<00:00, 14551.37it/s]

2025-02-16 00:01:42,272 - INFO - Memory usage after preprocessing: 3594.89 MB
2025-02-16 00:01:42,273 - INFO - 
Dataset statistics:
2025-02-16 00:01:42,274 - INFO - Total samples after preprocessing: 1738
2025-02-16 00:01:42,279 - INFO - Genre 'Drama': 510 samples
2025-02-16 00:01:42,279 - INFO - Genre 'Horor': 349 samples
2025-02-16 00:01:42,280 - INFO - Genre 'Komedi': 374 samples
2025-02-16 00:01:42,280 - INFO - Genre 'Laga': 297 samples
2025-02-16 00:01:42,282 - INFO - Genre 'Romantis': 208 samples
2025-02-16 00:01:42,283 - INFO - 
Training set size: 1477
2025-02-16 00:01:42,284 - INFO - Testing set size: 261
2025-02-16 00:01:42,284 - INFO - Setting up model and tokenizer...



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 00:01:43,407 - INFO - Model and tokenizer setup completed
2025-02-16 00:01:43,408 - INFO - Setting up data loaders...
2025-02-16 00:01:43,409 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 00:01:43,412 - INFO - Created sampler with 1477 weights
2025-02-16 00:01:43,413 - INFO - Created data loaders with batch size 10


Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]
Epoch 1:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 1:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.6268][A
Epoch 1:   1%|          | 1/148 [00:01<02:55,  1.20s/it, training_loss=1.6268][A
Epoch 1:   1%|          | 1/148 [00:02<02:55,  1.20s/it, training_loss=1.6441][A
Epoch 1:   1%|▏         | 2/148 [00:02<02:38,  1.09s/it, training_loss=1.6441][A
Epoch 1:   1%|▏         | 2/148 [00:03<02:38,  1.09s/it, training_loss=1.7695][A
Epoch 1:   2%|▏         | 3/148 [00:03<02:32,  1.05s/it, training_loss=1.7695][A
Epoch 1:   2%|▏         | 3/148 [00:04<02:32,  1.05s/it, training_loss=1.7077][A
Epoch 1:   3%|▎         | 4/148 [00:04<02:29,  1.04s/it, training_loss=1.7077][A
Epoch 1:   3%|▎         | 4/148 [00:05<02:29,  1.04s/it, training_loss=1.5704][A
Epoch 1:   3%|▎         | 5/148 [00:05<02:28,  1.04s/it, training_loss=1.5704][A
Epoch 1:   3%|▎         | 5/148 [00:06<02:28,  1.04s/it, training_loss=1.5620

2025-02-16 00:04:18,942 - INFO - Starting model evaluation...
2025-02-16 00:04:18,944 - INFO - Memory usage after evaluation start: 3653.98 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.07it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  3.01it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.98it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.95it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.95it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.94it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.93it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.94it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.95it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.95it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.94it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.93it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.93it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.93it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.92it/s][A
Evaluatin

2025-02-16 00:04:27,874 - INFO - Optimizing classification thresholds...
2025-02-16 00:04:27,892 - INFO - Class 'Drama': Optimal threshold = 0.650, F1 Score = 0.530
2025-02-16 00:04:27,908 - INFO - Class 'Horor': Optimal threshold = 0.600, F1 Score = 0.632
2025-02-16 00:04:27,922 - INFO - Class 'Komedi': Optimal threshold = 0.450, F1 Score = 0.415
2025-02-16 00:04:27,938 - INFO - Class 'Laga': Optimal threshold = 0.650, F1 Score = 0.337
2025-02-16 00:04:27,952 - INFO - Class 'Romantis': Optimal threshold = 0.500, F1 Score = 0.385
2025-02-16 00:04:27,979 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:04:27,987 - INFO - 
Metrics for Drama:
2025-02-16 00:04:27,988 - INFO - Accuracy: 0.6743
2025-02-16 00:04:27,989 - INFO - F1_score: 0.5304
2025-02-16 00:04:27,990 - INFO - Precision: 0.4486
2025-02-16 00:04:27,992 - INFO - Recall: 0.6486
2025-02-16 00:04:27,998 - INFO - 
Metrics for Horor:
2025-02-16 00:04:27,999 - INFO - Accuracy: 0.8352
2025-02-16 00:04:28,000 - INFO - F1_score: 0




2025-02-16 00:04:28,028 - INFO - 
Metrics for Laga:
2025-02-16 00:04:28,029 - INFO - Accuracy: 0.7433
2025-02-16 00:04:28,030 - INFO - F1_score: 0.3366
2025-02-16 00:04:28,031 - INFO - Precision: 0.2698
2025-02-16 00:04:28,032 - INFO - Recall: 0.4474
2025-02-16 00:04:28,040 - INFO - 
Metrics for Romantis:
2025-02-16 00:04:28,040 - INFO - Accuracy: 0.6820
2025-02-16 00:04:28,042 - INFO - F1_score: 0.3852
2025-02-16 00:04:28,042 - INFO - Precision: 0.2574
2025-02-16 00:04:28,043 - INFO - Recall: 0.7647
2025-02-16 00:04:28,045 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 00:04:31,844 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:04:31,845 - INFO - Memory usage after evaluation end: 3658.73 MB


Training Progress:   0%|          | 0/100 [02:57<?, ?it/s, Train Loss=1.5752, Val Loss=0.0573, Accuracy=0.6659]

2025-02-16 00:04:40,699 - INFO - New best accuracy: 0.6659
2025-02-16 00:04:41,630 - INFO - New best loss: 0.0573
2025-02-16 00:04:42,547 - INFO - Learning rate: 1e-05


Training Progress:   1%|          | 1/100 [02:59<4:55:33, 179.13s/it, Train Loss=1.5752, Val Loss=0.0573, Accuracy=0.6659]
Epoch 2:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 2:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.6912][A
Epoch 2:   1%|          | 1/148 [00:01<02:32,  1.04s/it, training_loss=1.6912][A
Epoch 2:   1%|          | 1/148 [00:02<02:32,  1.04s/it, training_loss=1.7170][A
Epoch 2:   1%|▏         | 2/148 [00:02<02:30,  1.03s/it, training_loss=1.7170][A
Epoch 2:   1%|▏         | 2/148 [00:03<02:30,  1.03s/it, training_loss=1.5352][A
Epoch 2:   2%|▏         | 3/148 [00:03<02:30,  1.04s/it, training_loss=1.5352][A
Epoch 2:   2%|▏         | 3/148 [00:04<02:30,  1.04s/it, training_loss=1.4469][A
Epoch 2:   3%|▎         | 4/148 [00:04<02:30,  1.04s/it, training_loss=1.4469][A
Epoch 2:   3%|▎         | 4/148 [00:05<02:30,  1.04s/it, training_loss=1.4622][A
Epoch 2:   3%|▎         | 5/148 [00:05<02:29,  1.04s/it, training_loss=1.4622][A
Epoch 2:   3%

2025-02-16 00:07:17,968 - INFO - Starting model evaluation...
2025-02-16 00:07:17,969 - INFO - Memory usage after evaluation start: 3661.33 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  2.98it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.94it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.94it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.95it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.95it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.94it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.93it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.93it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.92it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.91it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.92it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.92it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.92it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.92it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.93it/s][A
Evaluatin

2025-02-16 00:07:26,887 - INFO - Optimizing classification thresholds...
2025-02-16 00:07:26,901 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.541
2025-02-16 00:07:26,916 - INFO - Class 'Horor': Optimal threshold = 0.550, F1 Score = 0.748
2025-02-16 00:07:26,930 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.497
2025-02-16 00:07:26,944 - INFO - Class 'Laga': Optimal threshold = 0.750, F1 Score = 0.395
2025-02-16 00:07:26,958 - INFO - Class 'Romantis': Optimal threshold = 0.750, F1 Score = 0.431
2025-02-16 00:07:26,979 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:07:26,985 - INFO - 
Metrics for Drama:
2025-02-16 00:07:26,985 - INFO - Accuracy: 0.6552
2025-02-16 00:07:26,986 - INFO - F1_score: 0.5408
2025-02-16 00:07:26,988 - INFO - Precision: 0.4344
2025-02-16 00:07:26,988 - INFO - Recall: 0.7162
2025-02-16 00:07:26,994 - INFO - 
Metrics for Horor:
2025-02-16 00:07:26,994 - INFO - Accuracy: 0.8659
2025-02-16 00:07:26,995 - INFO - F1_score: 0




2025-02-16 00:07:30,858 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:07:30,860 - INFO - Memory usage after evaluation end: 3682.70 MB


Training Progress:   1%|          | 1/100 [05:56<4:55:33, 179.13s/it, Train Loss=1.4812, Val Loss=0.0531, Accuracy=0.7762]

2025-02-16 00:07:39,590 - INFO - New best accuracy: 0.7762
2025-02-16 00:07:40,981 - INFO - New best loss: 0.0531
2025-02-16 00:07:42,395 - INFO - Learning rate: 1e-05


Training Progress:   2%|▏         | 2/100 [05:58<4:53:16, 179.55s/it, Train Loss=1.4812, Val Loss=0.0531, Accuracy=0.7762]
Epoch 3:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 3:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.5329][A
Epoch 3:   1%|          | 1/148 [00:01<02:36,  1.06s/it, training_loss=1.5329][A
Epoch 3:   1%|          | 1/148 [00:02<02:36,  1.06s/it, training_loss=1.4829][A
Epoch 3:   1%|▏         | 2/148 [00:02<02:32,  1.04s/it, training_loss=1.4829][A
Epoch 3:   1%|▏         | 2/148 [00:03<02:32,  1.04s/it, training_loss=1.6148][A
Epoch 3:   2%|▏         | 3/148 [00:03<02:30,  1.04s/it, training_loss=1.6148][A
Epoch 3:   2%|▏         | 3/148 [00:04<02:30,  1.04s/it, training_loss=1.6529][A
Epoch 3:   3%|▎         | 4/148 [00:04<02:28,  1.03s/it, training_loss=1.6529][A
Epoch 3:   3%|▎         | 4/148 [00:05<02:28,  1.03s/it, training_loss=1.6230][A
Epoch 3:   3%|▎         | 5/148 [00:05<02:27,  1.03s/it, training_loss=1.6230][A
Epoch 3:   3%

2025-02-16 00:10:17,870 - INFO - Starting model evaluation...
2025-02-16 00:10:17,872 - INFO - Memory usage after evaluation start: 3667.06 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.04it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.97it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.94it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.92it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.92it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.91it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.91it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.92it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.92it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.91it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.91it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.91it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.91it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.91it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.92it/s][A
Evaluatin

2025-02-16 00:10:26,831 - INFO - Optimizing classification thresholds...
2025-02-16 00:10:26,846 - INFO - Class 'Drama': Optimal threshold = 0.550, F1 Score = 0.528
2025-02-16 00:10:26,860 - INFO - Class 'Horor': Optimal threshold = 0.750, F1 Score = 0.773
2025-02-16 00:10:26,874 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.480
2025-02-16 00:10:26,888 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.400
2025-02-16 00:10:26,901 - INFO - Class 'Romantis': Optimal threshold = 0.550, F1 Score = 0.500
2025-02-16 00:10:26,924 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:10:26,930 - INFO - 
Metrics for Drama:
2025-02-16 00:10:26,930 - INFO - Accuracy: 0.6437
2025-02-16 00:10:26,931 - INFO - F1_score: 0.5279
2025-02-16 00:10:26,932 - INFO - Precision: 0.4228
2025-02-16 00:10:26,932 - INFO - Recall: 0.7027
2025-02-16 00:10:26,939 - INFO - 
Metrics for Horor:
2025-02-16 00:10:26,939 - INFO - Accuracy: 0.8966
2025-02-16 00:10:26,940 - INFO - F1_score: 0




2025-02-16 00:10:30,913 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:10:30,915 - INFO - Memory usage after evaluation end: 3688.31 MB


Training Progress:   2%|▏         | 2/100 [08:56<4:53:16, 179.55s/it, Train Loss=1.4309, Val Loss=0.0533, Accuracy=0.7395]

2025-02-16 00:10:39,773 - INFO - Learning rate: 1e-05


Training Progress:   3%|▎         | 3/100 [08:56<4:48:40, 178.56s/it, Train Loss=1.4309, Val Loss=0.0533, Accuracy=0.7395]
Epoch 4:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 4:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.6027][A
Epoch 4:   1%|          | 1/148 [00:01<02:32,  1.04s/it, training_loss=1.6027][A
Epoch 4:   1%|          | 1/148 [00:02<02:32,  1.04s/it, training_loss=1.1488][A
Epoch 4:   1%|▏         | 2/148 [00:02<02:32,  1.05s/it, training_loss=1.1488][A
Epoch 4:   1%|▏         | 2/148 [00:03<02:32,  1.05s/it, training_loss=1.1876][A
Epoch 4:   2%|▏         | 3/148 [00:03<02:31,  1.05s/it, training_loss=1.1876][A
Epoch 4:   2%|▏         | 3/148 [00:04<02:31,  1.05s/it, training_loss=1.1445][A
Epoch 4:   3%|▎         | 4/148 [00:04<02:30,  1.05s/it, training_loss=1.1445][A
Epoch 4:   3%|▎         | 4/148 [00:05<02:30,  1.05s/it, training_loss=1.7833][A
Epoch 4:   3%|▎         | 5/148 [00:05<02:29,  1.05s/it, training_loss=1.7833][A
Epoch 4:   3%

2025-02-16 00:13:15,102 - INFO - Starting model evaluation...
2025-02-16 00:13:15,105 - INFO - Memory usage after evaluation start: 3688.43 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.00it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.94it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.93it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.92it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.92it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.92it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.91it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.92it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.91it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.92it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.91it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.91it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.91it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.91it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.92it/s][A
Evaluatin

2025-02-16 00:13:24,071 - INFO - Optimizing classification thresholds...
2025-02-16 00:13:24,086 - INFO - Class 'Drama': Optimal threshold = 0.650, F1 Score = 0.533
2025-02-16 00:13:24,100 - INFO - Class 'Horor': Optimal threshold = 0.650, F1 Score = 0.785
2025-02-16 00:13:24,113 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.595
2025-02-16 00:13:24,127 - INFO - Class 'Laga': Optimal threshold = 0.650, F1 Score = 0.396
2025-02-16 00:13:24,140 - INFO - Class 'Romantis': Optimal threshold = 0.750, F1 Score = 0.535
2025-02-16 00:13:24,161 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:13:24,166 - INFO - 
Metrics for Drama:
2025-02-16 00:13:24,167 - INFO - Accuracy: 0.7586
2025-02-16 00:13:24,168 - INFO - F1_score: 0.5333
2025-02-16 00:13:24,169 - INFO - Precision: 0.5902
2025-02-16 00:13:24,170 - INFO - Recall: 0.4865
2025-02-16 00:13:24,176 - INFO - 
Metrics for Horor:
2025-02-16 00:13:24,177 - INFO - Accuracy: 0.8889
2025-02-16 00:13:24,178 - INFO - F1_score: 0




2025-02-16 00:13:28,032 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:13:28,034 - INFO - Memory usage after evaluation end: 3694.18 MB


Training Progress:   3%|▎         | 3/100 [11:53<4:48:40, 178.56s/it, Train Loss=1.4083, Val Loss=0.0527, Accuracy=0.8138]

2025-02-16 00:13:36,876 - INFO - New best accuracy: 0.8138
2025-02-16 00:13:38,308 - INFO - New best loss: 0.0527
2025-02-16 00:13:39,794 - INFO - Learning rate: 1e-05


Training Progress:   4%|▍         | 4/100 [11:56<4:46:37, 179.14s/it, Train Loss=1.4083, Val Loss=0.0527, Accuracy=0.8138]
Epoch 5:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 5:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.5687][A
Epoch 5:   1%|          | 1/148 [00:01<02:38,  1.08s/it, training_loss=1.5687][A
Epoch 5:   1%|          | 1/148 [00:02<02:38,  1.08s/it, training_loss=1.5795][A
Epoch 5:   1%|▏         | 2/148 [00:02<02:33,  1.05s/it, training_loss=1.5795][A
Epoch 5:   1%|▏         | 2/148 [00:03<02:33,  1.05s/it, training_loss=1.2761][A
Epoch 5:   2%|▏         | 3/148 [00:03<02:32,  1.05s/it, training_loss=1.2761][A
Epoch 5:   2%|▏         | 3/148 [00:04<02:32,  1.05s/it, training_loss=1.1138][A
Epoch 5:   3%|▎         | 4/148 [00:04<02:32,  1.06s/it, training_loss=1.1138][A
Epoch 5:   3%|▎         | 4/148 [00:05<02:32,  1.06s/it, training_loss=1.3345][A
Epoch 5:   3%|▎         | 5/148 [00:05<02:31,  1.06s/it, training_loss=1.3345][A
Epoch 5:   3%

2025-02-16 00:16:15,085 - INFO - Starting model evaluation...
2025-02-16 00:16:15,087 - INFO - Memory usage after evaluation start: 3678.82 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.06it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.98it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.95it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.93it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.93it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.92it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.91it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.91it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.91it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.92it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.91it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.92it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.91it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.92it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.91it/s][A
Evaluatin

2025-02-16 00:16:24,049 - INFO - Optimizing classification thresholds...
2025-02-16 00:16:24,064 - INFO - Class 'Drama': Optimal threshold = 0.550, F1 Score = 0.533
2025-02-16 00:16:24,077 - INFO - Class 'Horor': Optimal threshold = 0.600, F1 Score = 0.788
2025-02-16 00:16:24,091 - INFO - Class 'Komedi': Optimal threshold = 0.700, F1 Score = 0.576
2025-02-16 00:16:24,104 - INFO - Class 'Laga': Optimal threshold = 0.650, F1 Score = 0.415
2025-02-16 00:16:24,117 - INFO - Class 'Romantis': Optimal threshold = 0.700, F1 Score = 0.528
2025-02-16 00:16:24,138 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:16:24,143 - INFO - 
Metrics for Drama:
2025-02-16 00:16:24,144 - INFO - Accuracy: 0.7050
2025-02-16 00:16:24,144 - INFO - F1_score: 0.5333
2025-02-16 00:16:24,145 - INFO - Precision: 0.4835
2025-02-16 00:16:24,146 - INFO - Recall: 0.5946
2025-02-16 00:16:24,152 - INFO - 
Metrics for Horor:
2025-02-16 00:16:24,153 - INFO - Accuracy: 0.8927
2025-02-16 00:16:24,153 - INFO - F1_score: 0




2025-02-16 00:16:27,980 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:16:27,982 - INFO - Memory usage after evaluation end: 3700.20 MB


Training Progress:   4%|▍         | 4/100 [14:53<4:46:37, 179.14s/it, Train Loss=1.3699, Val Loss=0.0521, Accuracy=0.8054]

2025-02-16 00:16:36,823 - INFO - New best loss: 0.0521
2025-02-16 00:16:38,251 - INFO - Learning rate: 1e-05


Training Progress:   5%|▌         | 5/100 [14:56<4:44:00, 179.37s/it, Train Loss=1.3699, Val Loss=0.0521, Accuracy=0.8054]
Epoch 6:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 6:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.6178][A
Epoch 6:   1%|          | 1/148 [00:01<02:33,  1.04s/it, training_loss=1.6178][A
Epoch 6:   1%|          | 1/148 [00:02<02:33,  1.04s/it, training_loss=1.6454][A
Epoch 6:   1%|▏         | 2/148 [00:02<02:32,  1.04s/it, training_loss=1.6454][A
Epoch 6:   1%|▏         | 2/148 [00:03<02:32,  1.04s/it, training_loss=1.2963][A
Epoch 6:   2%|▏         | 3/148 [00:03<02:30,  1.04s/it, training_loss=1.2963][A
Epoch 6:   2%|▏         | 3/148 [00:04<02:30,  1.04s/it, training_loss=1.2091][A
Epoch 6:   3%|▎         | 4/148 [00:04<02:31,  1.05s/it, training_loss=1.2091][A
Epoch 6:   3%|▎         | 4/148 [00:05<02:31,  1.05s/it, training_loss=1.5970][A
Epoch 6:   3%|▎         | 5/148 [00:05<02:30,  1.05s/it, training_loss=1.5970][A
Epoch 6:   3%

2025-02-16 00:19:15,120 - INFO - Starting model evaluation...
2025-02-16 00:19:15,122 - INFO - Memory usage after evaluation start: 3684.64 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.05it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.97it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.95it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.93it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.93it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.92it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.92it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.92it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.92it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.92it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.91it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.92it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.92it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.92it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.92it/s][A
Evaluatin

2025-02-16 00:19:24,079 - INFO - Optimizing classification thresholds...
2025-02-16 00:19:24,095 - INFO - Class 'Drama': Optimal threshold = 0.650, F1 Score = 0.556
2025-02-16 00:19:24,109 - INFO - Class 'Horor': Optimal threshold = 0.700, F1 Score = 0.791
2025-02-16 00:19:24,123 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.556
2025-02-16 00:19:24,137 - INFO - Class 'Laga': Optimal threshold = 0.650, F1 Score = 0.351
2025-02-16 00:19:24,150 - INFO - Class 'Romantis': Optimal threshold = 0.750, F1 Score = 0.535
2025-02-16 00:19:24,172 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:19:24,177 - INFO - 
Metrics for Drama:
2025-02-16 00:19:24,178 - INFO - Accuracy: 0.6935
2025-02-16 00:19:24,179 - INFO - F1_score: 0.5556
2025-02-16 00:19:24,179 - INFO - Precision: 0.4717
2025-02-16 00:19:24,180 - INFO - Recall: 0.6757
2025-02-16 00:19:24,187 - INFO - 
Metrics for Horor:
2025-02-16 00:19:24,187 - INFO - Accuracy: 0.8966
2025-02-16 00:19:24,188 - INFO - F1_score: 0




2025-02-16 00:19:28,160 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:19:28,162 - INFO - Memory usage after evaluation end: 3705.64 MB


Training Progress:   5%|▌         | 5/100 [17:53<4:44:00, 179.37s/it, Train Loss=1.3371, Val Loss=0.0543, Accuracy=0.8130]

2025-02-16 00:19:36,949 - INFO - Learning rate: 1e-05


Training Progress:   6%|▌         | 6/100 [17:53<4:39:56, 178.69s/it, Train Loss=1.3371, Val Loss=0.0543, Accuracy=0.8130]
Epoch 7:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 7:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=0.9535][A
Epoch 7:   1%|          | 1/148 [00:01<02:34,  1.05s/it, training_loss=0.9535][A
Epoch 7:   1%|          | 1/148 [00:02<02:34,  1.05s/it, training_loss=1.0906][A
Epoch 7:   1%|▏         | 2/148 [00:02<02:33,  1.05s/it, training_loss=1.0906][A
Epoch 7:   1%|▏         | 2/148 [00:03<02:33,  1.05s/it, training_loss=1.0887][A
Epoch 7:   2%|▏         | 3/148 [00:03<02:31,  1.05s/it, training_loss=1.0887][A
Epoch 7:   2%|▏         | 3/148 [00:04<02:31,  1.05s/it, training_loss=1.5097][A
Epoch 7:   3%|▎         | 4/148 [00:04<02:30,  1.04s/it, training_loss=1.5097][A
Epoch 7:   3%|▎         | 4/148 [00:05<02:30,  1.04s/it, training_loss=1.1199][A
Epoch 7:   3%|▎         | 5/148 [00:05<02:30,  1.05s/it, training_loss=1.1199][A
Epoch 7:   3%

2025-02-16 00:22:12,315 - INFO - Starting model evaluation...
2025-02-16 00:22:12,316 - INFO - Memory usage after evaluation start: 3705.64 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.06it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  3.00it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.98it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.96it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.95it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.94it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.94it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.94it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.95it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.95it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.93it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.93it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.93it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.93it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.92it/s][A
Evaluatin

2025-02-16 00:22:21,241 - INFO - Optimizing classification thresholds...
2025-02-16 00:22:21,256 - INFO - Class 'Drama': Optimal threshold = 0.600, F1 Score = 0.570
2025-02-16 00:22:21,271 - INFO - Class 'Horor': Optimal threshold = 0.750, F1 Score = 0.761
2025-02-16 00:22:21,285 - INFO - Class 'Komedi': Optimal threshold = 0.650, F1 Score = 0.600
2025-02-16 00:22:21,298 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.386
2025-02-16 00:22:21,311 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.563
2025-02-16 00:22:21,333 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:22:21,339 - INFO - 
Metrics for Drama:
2025-02-16 00:22:21,339 - INFO - Accuracy: 0.7050
2025-02-16 00:22:21,340 - INFO - F1_score: 0.5698
2025-02-16 00:22:21,341 - INFO - Precision: 0.4857
2025-02-16 00:22:21,342 - INFO - Recall: 0.6892
2025-02-16 00:22:21,348 - INFO - 
Metrics for Horor:
2025-02-16 00:22:21,349 - INFO - Accuracy: 0.8774
2025-02-16 00:22:21,350 - INFO - F1_score: 0




2025-02-16 00:22:25,261 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:22:25,262 - INFO - Memory usage after evaluation end: 3711.39 MB


Training Progress:   6%|▌         | 6/100 [20:50<4:39:56, 178.69s/it, Train Loss=1.2909, Val Loss=0.0540, Accuracy=0.7900]

2025-02-16 00:22:33,999 - INFO - Learning rate: 1e-05


Training Progress:   7%|▋         | 7/100 [20:50<4:36:08, 178.15s/it, Train Loss=1.2909, Val Loss=0.0540, Accuracy=0.7900]
Epoch 8:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 8:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.5242][A
Epoch 8:   1%|          | 1/148 [00:01<02:31,  1.03s/it, training_loss=1.5242][A
Epoch 8:   1%|          | 1/148 [00:02<02:31,  1.03s/it, training_loss=0.9771][A
Epoch 8:   1%|▏         | 2/148 [00:02<02:32,  1.05s/it, training_loss=0.9771][A
Epoch 8:   1%|▏         | 2/148 [00:03<02:32,  1.05s/it, training_loss=1.5400][A
Epoch 8:   2%|▏         | 3/148 [00:03<02:31,  1.04s/it, training_loss=1.5400][A
Epoch 8:   2%|▏         | 3/148 [00:04<02:31,  1.04s/it, training_loss=1.5547][A
Epoch 8:   3%|▎         | 4/148 [00:04<02:29,  1.04s/it, training_loss=1.5547][A
Epoch 8:   3%|▎         | 4/148 [00:05<02:29,  1.04s/it, training_loss=0.8317][A
Epoch 8:   3%|▎         | 5/148 [00:05<02:28,  1.04s/it, training_loss=0.8317][A
Epoch 8:   3%

2025-02-16 00:25:09,314 - INFO - Starting model evaluation...
2025-02-16 00:25:09,316 - INFO - Memory usage after evaluation start: 3711.64 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.04it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.95it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.93it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.93it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.92it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.92it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.91it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.91it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.91it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.91it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.91it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.91it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.91it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.91it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.91it/s][A
Evaluatin

2025-02-16 00:25:18,287 - INFO - Optimizing classification thresholds...
2025-02-16 00:25:18,301 - INFO - Class 'Drama': Optimal threshold = 0.700, F1 Score = 0.590
2025-02-16 00:25:18,315 - INFO - Class 'Horor': Optimal threshold = 0.750, F1 Score = 0.783
2025-02-16 00:25:18,329 - INFO - Class 'Komedi': Optimal threshold = 0.700, F1 Score = 0.583
2025-02-16 00:25:18,342 - INFO - Class 'Laga': Optimal threshold = 0.500, F1 Score = 0.360
2025-02-16 00:25:18,355 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.507
2025-02-16 00:25:18,376 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:25:18,382 - INFO - 
Metrics for Drama:
2025-02-16 00:25:18,382 - INFO - Accuracy: 0.7280
2025-02-16 00:25:18,383 - INFO - F1_score: 0.5896
2025-02-16 00:25:18,383 - INFO - Precision: 0.5152
2025-02-16 00:25:18,384 - INFO - Recall: 0.6892
2025-02-16 00:25:18,390 - INFO - 
Metrics for Horor:
2025-02-16 00:25:18,391 - INFO - Accuracy: 0.9004
2025-02-16 00:25:18,392 - INFO - F1_score: 0




2025-02-16 00:25:22,234 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:25:22,235 - INFO - Memory usage after evaluation end: 3717.27 MB


Training Progress:   7%|▋         | 7/100 [23:47<4:36:08, 178.15s/it, Train Loss=1.2596, Val Loss=0.0556, Accuracy=0.7923]

2025-02-16 00:25:31,107 - INFO - Learning rate: 5e-06


Training Progress:   8%|▊         | 8/100 [23:47<4:32:39, 177.82s/it, Train Loss=1.2596, Val Loss=0.0556, Accuracy=0.7923]
Epoch 9:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 9:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.5592][A
Epoch 9:   1%|          | 1/148 [00:01<02:33,  1.04s/it, training_loss=1.5592][A
Epoch 9:   1%|          | 1/148 [00:02<02:33,  1.04s/it, training_loss=1.6573][A
Epoch 9:   1%|▏         | 2/148 [00:02<02:32,  1.05s/it, training_loss=1.6573][A
Epoch 9:   1%|▏         | 2/148 [00:03<02:32,  1.05s/it, training_loss=1.5807][A
Epoch 9:   2%|▏         | 3/148 [00:03<02:31,  1.05s/it, training_loss=1.5807][A
Epoch 9:   2%|▏         | 3/148 [00:04<02:31,  1.05s/it, training_loss=0.9757][A
Epoch 9:   3%|▎         | 4/148 [00:04<02:30,  1.05s/it, training_loss=0.9757][A
Epoch 9:   3%|▎         | 4/148 [00:05<02:30,  1.05s/it, training_loss=1.6886][A
Epoch 9:   3%|▎         | 5/148 [00:05<02:30,  1.05s/it, training_loss=1.6886][A
Epoch 9:   3%

2025-02-16 00:28:06,404 - INFO - Starting model evaluation...
2025-02-16 00:28:06,405 - INFO - Memory usage after evaluation start: 3717.39 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.01it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.93it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.93it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.92it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.92it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.91it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.92it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.91it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.91it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.91it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.91it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.92it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.92it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.91it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.91it/s][A
Evaluatin

2025-02-16 00:28:15,372 - INFO - Optimizing classification thresholds...
2025-02-16 00:28:15,385 - INFO - Class 'Drama': Optimal threshold = 0.650, F1 Score = 0.581
2025-02-16 00:28:15,400 - INFO - Class 'Horor': Optimal threshold = 0.700, F1 Score = 0.727
2025-02-16 00:28:15,414 - INFO - Class 'Komedi': Optimal threshold = 0.650, F1 Score = 0.647
2025-02-16 00:28:15,427 - INFO - Class 'Laga': Optimal threshold = 0.600, F1 Score = 0.349
2025-02-16 00:28:15,440 - INFO - Class 'Romantis': Optimal threshold = 0.500, F1 Score = 0.530
2025-02-16 00:28:15,462 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:28:15,467 - INFO - 
Metrics for Drama:
2025-02-16 00:28:15,468 - INFO - Accuracy: 0.7241
2025-02-16 00:28:15,468 - INFO - F1_score: 0.5814
2025-02-16 00:28:15,469 - INFO - Precision: 0.5102
2025-02-16 00:28:15,469 - INFO - Recall: 0.6757
2025-02-16 00:28:15,476 - INFO - 
Metrics for Horor:
2025-02-16 00:28:15,477 - INFO - Accuracy: 0.8621
2025-02-16 00:28:15,477 - INFO - F1_score: 0




2025-02-16 00:28:19,311 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:28:19,313 - INFO - Memory usage after evaluation end: 3723.14 MB


Training Progress:   8%|▊         | 8/100 [26:44<4:32:39, 177.82s/it, Train Loss=1.2695, Val Loss=0.0548, Accuracy=0.8084]

2025-02-16 00:28:28,117 - INFO - Learning rate: 5e-06


Training Progress:   9%|▉         | 9/100 [26:44<4:29:18, 177.57s/it, Train Loss=1.2695, Val Loss=0.0548, Accuracy=0.8084]
Epoch 10:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 10:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=0.8833][A
Epoch 10:   1%|          | 1/148 [00:01<02:32,  1.04s/it, training_loss=0.8833][A
Epoch 10:   1%|          | 1/148 [00:02<02:32,  1.04s/it, training_loss=0.8369][A
Epoch 10:   1%|▏         | 2/148 [00:02<02:32,  1.05s/it, training_loss=0.8369][A
Epoch 10:   1%|▏         | 2/148 [00:03<02:32,  1.05s/it, training_loss=1.0014][A
Epoch 10:   2%|▏         | 3/148 [00:03<02:31,  1.05s/it, training_loss=1.0014][A
Epoch 10:   2%|▏         | 3/148 [00:04<02:31,  1.05s/it, training_loss=0.8578][A
Epoch 10:   3%|▎         | 4/148 [00:04<02:31,  1.05s/it, training_loss=0.8578][A
Epoch 10:   3%|▎         | 4/148 [00:05<02:31,  1.05s/it, training_loss=1.5095][A
Epoch 10:   3%|▎         | 5/148 [00:05<02:30,  1.05s/it, training_loss=1.5095][A
Ep

2025-02-16 00:31:03,334 - INFO - Starting model evaluation...
2025-02-16 00:31:03,335 - INFO - Memory usage after evaluation start: 3723.14 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:08,  3.06it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:08,  2.98it/s][A
Evaluating:  11%|█         | 3/27 [00:01<00:08,  2.94it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:07,  2.93it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:07,  2.93it/s][A
Evaluating:  22%|██▏       | 6/27 [00:02<00:07,  2.92it/s][A
Evaluating:  26%|██▌       | 7/27 [00:02<00:06,  2.92it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:06,  2.91it/s][A
Evaluating:  33%|███▎      | 9/27 [00:03<00:06,  2.91it/s][A
Evaluating:  37%|███▋      | 10/27 [00:03<00:05,  2.91it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:05,  2.92it/s][A
Evaluating:  44%|████▍     | 12/27 [00:04<00:05,  2.91it/s][A
Evaluating:  48%|████▊     | 13/27 [00:04<00:04,  2.92it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:04<00:04,  2.92it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:05<00:04,  2.92it/s][A
Evaluatin

2025-02-16 00:31:12,295 - INFO - Optimizing classification thresholds...
2025-02-16 00:31:12,310 - INFO - Class 'Drama': Optimal threshold = 0.750, F1 Score = 0.564
2025-02-16 00:31:12,325 - INFO - Class 'Horor': Optimal threshold = 0.750, F1 Score = 0.726
2025-02-16 00:31:12,338 - INFO - Class 'Komedi': Optimal threshold = 0.650, F1 Score = 0.636
2025-02-16 00:31:12,351 - INFO - Class 'Laga': Optimal threshold = 0.500, F1 Score = 0.361
2025-02-16 00:31:12,364 - INFO - Class 'Romantis': Optimal threshold = 0.400, F1 Score = 0.396
2025-02-16 00:31:12,385 - INFO - 
Per-genre Performance Metrics:
2025-02-16 00:31:12,391 - INFO - 
Metrics for Drama:
2025-02-16 00:31:12,391 - INFO - Accuracy: 0.7280
2025-02-16 00:31:12,392 - INFO - F1_score: 0.5644
2025-02-16 00:31:12,393 - INFO - Precision: 0.5169
2025-02-16 00:31:12,394 - INFO - Recall: 0.6216
2025-02-16 00:31:12,400 - INFO - 
Metrics for Horor:
2025-02-16 00:31:12,400 - INFO - Accuracy: 0.8582
2025-02-16 00:31:12,401 - INFO - F1_score: 0




2025-02-16 00:31:16,210 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices
2025-02-16 00:31:16,211 - INFO - Memory usage after evaluation end: 3728.89 MB


Training Progress:   9%|▉         | 9/100 [29:41<4:29:18, 177.57s/it, Train Loss=1.1635, Val Loss=0.0588, Accuracy=0.7785]

2025-02-16 00:31:24,956 - INFO - 
Early stopping triggered after 10 epochs


Training Progress:   9%|▉         | 9/100 [29:41<5:00:13, 197.95s/it, Train Loss=1.1635, Val Loss=0.0588, Accuracy=0.7785]


2025-02-16 00:31:26,819 - INFO - Saved training history plots to /kaggle/working/logs/experiments/20250215_214222/plots/training_history.png
2025-02-16 00:31:26,820 - INFO - Training history saved successfully
2025-02-16 00:31:29,405 - INFO - 
Testing model on a sample...
2025-02-16 00:31:29,413 - INFO - Loading and preprocessing data...
2025-02-16 00:31:29,414 - INFO - Memory usage after start: 3732.64 MB
2025-02-16 00:31:29,427 - INFO - Successfully loaded data using utf-8 encoding
2025-02-16 00:31:29,428 - INFO - Memory usage after data loading: 3732.64 MB
2025-02-16 00:31:29,429 - INFO - Taking sample of 1 from 1738 total samples
2025-02-16 00:31:29,430 - INFO - 
Sample data:
2025-02-16 00:31:29,431 - INFO - 
Sample 1:
2025-02-16 00:31:29,432 - INFO - Synopsis: Setelah kematian yang tampak, Siena mampu melihat tanda-tanda bahwa orang-orang akan meninggal. Namu...
2025-02-16 00:31:29,433 - INFO - Genre: Horor
2025-02-16 00:31:29,433 - INFO - 
Preprocessing text data...


100%|██████████| 1/1 [00:00<00:00, 2743.17it/s]

2025-02-16 00:31:29,442 - INFO - Memory usage after preprocessing: 3732.64 MB
2025-02-16 00:31:29,492 - INFO - 
Sample prediction results:
2025-02-16 00:31:29,493 - INFO - Sample text: setelah kematian yang tampak siena mampu melihat tanda tanda bahwa orang orang akan meninggal namun ...
2025-02-16 00:31:29,494 - INFO - Genre: Horor, Probability: 0.9214, Threshold Used: 0.750
2025-02-16 00:31:29,496 - INFO - 
Training completed successfully!
2025-02-16 00:31:29,496 - INFO - All results and models saved in: /kaggle/working/logs/experiments/20250215_214222
2025-02-16 00:31:29,497 - INFO - 
Cleaning up resources...





2025-02-16 00:31:30,785 - INFO - Cleaning up resources...


In [7]:
!zip -r folder.zip /kaggle/working/logs/experiments

  adding: kaggle/working/logs/experiments/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250215_214222/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/accuracies_trends.png (deflated 6%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices/confusion_matrix_Horor.png (deflated 20%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices/confusion_matrix_Laga.png (deflated 19%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices/confusion_matrix_Komedi.png (deflated 19%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices/confusion_matrix_Romantis.png (deflated 18%)
  adding: kaggle/working/logs/experiments/20250215_214222/plots/confusion_matrices/confus

In [8]:
from IPython.display import FileLink
FileLink(r'folder.zip')