In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv


In [2]:
# -*- coding: utf-8 -*-
"""
Movie Genre Classification with IndoBERT
Environment: Kaggle
"""

!pip install optuna




In [3]:
# BAGIAN PERTAMA - Import dan Konfigurasi
import os
import logging
import datetime
import json
import argparse
import gc
import sys
import codecs
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Union

import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, accuracy_score
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
import torch.nn.functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import re
import psutil
import optuna
from optuna.trial import Trial

# Define Base Path for Kaggle
BASE_PATH = Path('/kaggle/working')
DATASETS_PATH = Path('/kaggle/input/datasets-classificationsynopsis')

# Configuration Constants
class Config:
    # Model Parameters
    MODEL_PARAMS = {
        'EPOCHS': 100,
        'BATCH_SIZE': 10,
        'LEARNING_RATE': 1e-5,
        'MAX_LENGTH': 512,
        'TEST_SIZE': 0.15,
        'WEIGHT_DECAY': 0.05,
        'MIXUP_PROB': 0.5,
        'PATIENCE': 5,
        'SMOOTHING': 0.2
    }

    # Optimization Parameters
    OPTIM_PARAMS = {
        'batch_size': [2, 4, 8],           
        'learning_rate': [3E-6, 5E-6, 8E-6], 
        'weight_decay': [0.01, 0.02],
        'mixup_prob': [0.2, 0.3],
        'smoothing': [0.1, 0.15]

    }

    # Paths Configuration untuk Kaggle
    BASE_DIR = BASE_PATH
    DATA_PATH = Path('/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv')
    LOG_DIR = BASE_DIR / 'logs'
    BACKUP_DIR = BASE_DIR / 'backups'
    TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    EXPERIMENT_DIR = LOG_DIR / 'experiments' / TIMESTAMP

    # Model and Data Paths
    MODEL_SAVE_DIR = EXPERIMENT_DIR / 'model'
    TOKENIZER_SAVE_DIR = EXPERIMENT_DIR / 'tokenizer'
    METRICS_DIR = EXPERIMENT_DIR / 'metrics'
    PLOTS_DIR = EXPERIMENT_DIR / 'plots'
    CM_DIR = PLOTS_DIR / 'confusion_matrices'

    # Model Files
    MODEL_BEST_ACC = MODEL_SAVE_DIR / "best_accuracy"
    MODEL_BEST_LOSS = MODEL_SAVE_DIR / "best_loss"
    TOKENIZER_BEST_ACC = TOKENIZER_SAVE_DIR / "best_accuracy"
    TOKENIZER_BEST_LOSS = TOKENIZER_SAVE_DIR / "best_loss"
    DATA_PATH = Path('/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv')  # Path langsung ke file CSV

    # Device Configuration
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    SAMPLE_SIZE: Optional[int] = None

    @classmethod
    def create_directories(cls) -> None:
        """Create all necessary directories in Kaggle working directory"""
        directories = [
            cls.LOG_DIR, cls.BACKUP_DIR,
            cls.EXPERIMENT_DIR, cls.MODEL_SAVE_DIR, cls.TOKENIZER_SAVE_DIR,
            cls.METRICS_DIR, cls.PLOTS_DIR, cls.CM_DIR
        ]
        for dir_path in directories:
            dir_path.mkdir(parents=True, exist_ok=True)
            print(f"Created directory: {dir_path}")

    @classmethod
    def setup_logging(cls) -> None:
        """Setup logging configuration untuk Kaggle"""
        log_file = cls.EXPERIMENT_DIR / 'training.log'
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file, encoding='utf-8', mode='a'),
                logging.StreamHandler(sys.stdout)
            ]
        )
        logging.info(f"Log file created at: {log_file}")

# Environment Check Function
def check_environment() -> bool:
    """Verify Kaggle environment and paths"""
    try:
        # Check if datasets directory exists
        if not DATASETS_PATH.exists():
            raise RuntimeError(f"Dataset directory tidak ditemukan di: {DATASETS_PATH}")
        
        # List available files in dataset directory
        print("\nFiles in dataset directory:")
        for file in DATASETS_PATH.glob('*'):
            print(f"- {file.name}")

        # Check if dataset exists
        if not Config.DATA_PATH.exists():
            raise RuntimeError(f"Dataset tidak ditemukan di: {Config.DATA_PATH}")

        # Check GPU availability
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert to GB
            print(f"GPU tersedia: {gpu_name}")
            print(f"GPU Memory: {gpu_memory:.2f} GB")
        else:
            print("WARNING: GPU tidak tersedia, menggunakan CPU")

        return True
    except Exception as e:
        print(f"Error dalam setup environment: {str(e)}")
        return False
class DynamicThresholdOptimizer:
    """Class untuk mengoptimalkan threshold per-class secara dinamis"""
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.thresholds = [0.5] * num_classes  # Initial thresholds
        self.performance_history = {i: [] for i in range(num_classes)}
        self.best_thresholds = [0.5] * num_classes
        self.best_f1_scores = [0.0] * num_classes

    def optimize_thresholds(self, true_labels, predictions, class_names):
        """Optimize thresholds based on F1 score"""
        logging.info("Optimizing classification thresholds...")
        
        for class_idx in range(self.num_classes):
            best_threshold = 0.5
            best_f1 = 0.0
            
            # Test different thresholds
            for threshold in np.arange(0.3, 0.8, 0.05):
                class_preds = (predictions[:, class_idx] > threshold).astype(int)
                f1 = f1_score(true_labels[:, class_idx], class_preds, zero_division=0)
                
                if f1 > best_f1:
                    best_f1 = f1
                    best_threshold = threshold
            
            self.thresholds[class_idx] = best_threshold
            self.performance_history[class_idx].append({
                'threshold': best_threshold,
                'f1_score': best_f1
            })
            
            if best_f1 > self.best_f1_scores[class_idx]:
                self.best_f1_scores[class_idx] = best_f1
                self.best_thresholds[class_idx] = best_threshold
            
            logging.info(f"Class '{class_names[class_idx]}': Optimal threshold = {best_threshold:.3f}, F1 Score = {best_f1:.3f}")

    def apply_thresholds(self, predictions):
        """Apply optimized thresholds to predictions"""
        thresholded_preds = np.zeros_like(predictions)
        for i in range(self.num_classes):
            thresholded_preds[:, i] = (predictions[:, i] > self.thresholds[i]).astype(int)
        return thresholded_preds

    def save_threshold_history(self, save_path, class_names):
        """Save threshold optimization history"""
        history_data = {
            'class_thresholds': {
                class_names[i]: {
                    'current_threshold': self.thresholds[i],
                    'best_threshold': self.best_thresholds[i],
                    'best_f1_score': self.best_f1_scores[i],
                    'history': self.performance_history[i]
                } for i in range(self.num_classes)
            }
        }
        
        with open(save_path / 'threshold_history.json', 'w') as f:
            json.dump(history_data, f, indent=4)

class PerformanceTracker:
    """Class untuk melacak performa per-class selama training"""
    def __init__(self, num_classes, class_names):
        self.num_classes = num_classes
        self.class_names = class_names
        self.metrics_history = {name: {
            'f1_scores': [],
            'precisions': [],
            'recalls': [],
            'accuracies': []
        } for name in class_names}
        self.best_metrics = {name: {
            'f1_score': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'accuracy': 0.0,
            'epoch': 0
        } for name in class_names}

    def update_metrics(self, true_labels, predictions, epoch):
        """Update performance metrics for each class"""
        for i, class_name in enumerate(self.class_names):
            # Calculate metrics
            accuracy = accuracy_score(true_labels[:, i], predictions[:, i])
            precision = precision_score(true_labels[:, i], predictions[:, i], zero_division=0)
            recall = recall_score(true_labels[:, i], predictions[:, i], zero_division=0)
            f1 = f1_score(true_labels[:, i], predictions[:, i], zero_division=0)
            
            # Update history
            self.metrics_history[class_name]['accuracies'].append(accuracy)
            self.metrics_history[class_name]['precisions'].append(precision)
            self.metrics_history[class_name]['recalls'].append(recall)
            self.metrics_history[class_name]['f1_scores'].append(f1)
            
            # Update best metrics if necessary
            if f1 > self.best_metrics[class_name]['f1_score']:
                self.best_metrics[class_name].update({
                    'f1_score': f1,
                    'precision': precision,
                    'recall': recall,
                    'accuracy': accuracy,
                    'epoch': epoch
                })

    def plot_performance_trends(self, save_path):
        """Plot performance trends untuk setiap class"""
        for metric in ['f1_scores', 'precisions', 'recalls', 'accuracies']:
            plt.figure(figsize=(12, 6))
            for class_name in self.class_names:
                plt.plot(
                    self.metrics_history[class_name][metric],
                    label=class_name
                )
            
            plt.title(f'{metric.replace("_", " ").title()} Trends per Class')
            plt.xlabel('Epoch')
            plt.ylabel('Score')
            plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(save_path / f'{metric}_trends.png')
            plt.close()

    def save_performance_history(self, save_path):
        """Save performance history ke file"""
        history_data = {
            'metrics_history': self.metrics_history,
            'best_metrics': self.best_metrics
        }
        
        with open(save_path / 'performance_history.json', 'w') as f:
            json.dump(history_data, f, indent=4)

# Memory Management
class ModelManager:
    """Context manager for model memory management"""
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def __enter__(self):
        return self.model, self.tokenizer

    def __exit__(self, exc_type, exc_val, exc_tb):
        del self.model
        del self.tokenizer
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

# Error Handling
def error_handler(func):
    """Decorator for consistent error handling"""
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            logging.error(f"Error in {func.__name__}: {str(e)}")
            raise
    return wrapper

# Utility Functions
def get_memory_usage() -> float:
    """Get current memory usage of the program"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024  # in MB

def log_memory(step_name: str) -> None:
    """Log memory usage with consistent format"""
    memory = get_memory_usage()
    logging.info(f"Memory usage after {step_name}: {memory:.2f} MB")

def log_system_info() -> None:
    """Log system information including GPU details"""
    logging.info("System Information:")
    logging.info(f"Python Version: {sys.version}")
    logging.info(f"CPU Count: {os.cpu_count()}")
    logging.info(f"Initial Memory Usage: {get_memory_usage():.2f} MB")
    if torch.cuda.is_available():
        logging.info(f"GPU Device: {torch.cuda.get_device_name(0)}")
        logging.info(f"GPU Memory Total: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        logging.info(f"CUDA Version: {torch.version.cuda}")

# Create directories and setup logging
if check_environment():
    Config.create_directories()
    Config.setup_logging()
    log_system_info()
else:
    print("Failed to initialize environment. Please check the setup.")
    sys.exit(1)


Files in dataset directory:
- final_combined_movies_5genres.csv
GPU tersedia: Tesla T4
GPU Memory: 15.83 GB
Created directory: /kaggle/working/logs
Created directory: /kaggle/working/backups
Created directory: /kaggle/working/logs/experiments/20250216_171623
Created directory: /kaggle/working/logs/experiments/20250216_171623/model
Created directory: /kaggle/working/logs/experiments/20250216_171623/tokenizer
Created directory: /kaggle/working/logs/experiments/20250216_171623/metrics
Created directory: /kaggle/working/logs/experiments/20250216_171623/plots
Created directory: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:16:23,310 - INFO - Log file created at: /kaggle/working/logs/experiments/20250216_171623/training.log
2025-02-16 17:16:23,311 - INFO - System Information:
2025-02-16 17:16:23,311 - INFO - Python Version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
2025-02-16 17:16:23,318 - INFO - CPU Count: 4
2025-02-16 17:16:23,319 - INFO

In [4]:
# BAGIAN KEDUA - Dataset dan Data Processing

class MovieDataset(Dataset):
    """Dataset class untuk movie genre classification"""
    def __init__(self, texts: Union[List, np.ndarray],
                 labels: Union[List, np.ndarray],
                 tokenizer,
                 max_length: int = 512):
        # Input validation
        if not isinstance(texts, (list, np.ndarray)):
            raise ValueError("texts must be a list or numpy array")
        if not isinstance(labels, (list, np.ndarray)):
            raise ValueError("labels must be a list or numpy array")
        if len(texts) != len(labels):
            raise ValueError("texts and labels must have the same length")

        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self) -> int:
        return len(self.texts)

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten().long(),
            'attention_mask': encoding['attention_mask'].flatten().long(),
            'labels': torch.FloatTensor(self.labels[idx])
        }

class DataProcessor:
    """Class for handling data preprocessing and loading"""
    @staticmethod
    def clean_text(text: str) -> str:
        """Clean and preprocess text data"""
        if isinstance(text, str):
            text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
            text = re.sub(r'\S+@\S+', '', text)
            text = re.sub(r'[^\w\s]', ' ', text)
            text = re.sub(r'\s+', ' ', text)
            return text.strip().lower()
        return ''

    @staticmethod
    @error_handler
    def load_and_preprocess_data(data_path: Path, sample_size: Optional[int] = None) -> pd.DataFrame:
        """Load and preprocess data with proper encoding handling"""
        if not data_path.exists():
            raise FileNotFoundError(f"Data file not found: {data_path}")

        if sample_size is not None and (not isinstance(sample_size, int) or sample_size <= 0):
            raise ValueError("sample_size must be a positive integer")

        logging.info("Loading and preprocessing data...")
        log_memory("start")
        initial_size = None

        # Try different encodings for Google Drive compatibility
        encodings_to_try = ['utf-8', 'utf-8-sig', 'latin1', 'iso-8859-1', 'cp1252']
        df = None

        for encoding in encodings_to_try:
            try:
                df = pd.read_csv(data_path, encoding=encoding)
                logging.info(f"Successfully loaded data using {encoding} encoding")
                initial_size = len(df)
                break
            except (UnicodeDecodeError, UnicodeError):
                continue

        if df is None:
            raise UnicodeError(f"Failed to read file with any of these encodings: {encodings_to_try}")

        log_memory("data loading")

        # Sample data if requested
        if sample_size:
            if sample_size > initial_size:
                logging.warning(f"Requested sample_size ({sample_size}) is larger than dataset size ({initial_size})")
                sample_size = initial_size
            logging.info(f"Taking sample of {sample_size} from {initial_size} total samples")
            df = df.head(sample_size)
        else:
            logging.info(f"Using full dataset with {initial_size} samples")

        # Log sample data
        logging.info("\nSample data:")
        for i, row in df.head(3).iterrows():
            logging.info(f"\nSample {i+1}:")
            logging.info(f"Synopsis: {row['sinopsis'][:100]}...")
            logging.info(f"Genre: {row['genre']}")

        # Preprocess data
        logging.info("\nPreprocessing text data...")
        tqdm.pandas()
        df['sinopsis'] = df['sinopsis'].progress_apply(DataProcessor.clean_text)
        df['genre'] = df['genre'].str.split(',')
        df = df.dropna(subset=['sinopsis', 'genre'])

        log_memory("preprocessing")
        return df

    @staticmethod
    def prepare_data(df: pd.DataFrame, mlb: MultiLabelBinarizer) -> Tuple:
        """Prepare data for training"""
        genre_labels = mlb.fit_transform(df['genre'])
        return train_test_split(
            df['sinopsis'].values,
            genre_labels,
            test_size=Config.MODEL_PARAMS['TEST_SIZE'],
            random_state=42,
            stratify=genre_labels if len(genre_labels.shape) == 1 else None
        )

    @staticmethod
    def create_weighted_sampler(genre_labels: np.ndarray) -> WeightedRandomSampler:
        """Create weighted sampler for balanced batch sampling"""
        logging.info("Creating weighted sampler for balanced batch sampling...")

        sample_weights = np.zeros(len(genre_labels))
        for i in range(genre_labels.shape[1]):
            sample_weights += genre_labels[:, i] * (1.0 / np.sum(genre_labels[:, i]))

        sample_weights = sample_weights / sample_weights.sum()
        sampler = WeightedRandomSampler(
            weights=sample_weights,
            num_samples=len(sample_weights),
            replacement=True
        )

        logging.info(f"Created sampler with {len(sample_weights)} weights")
        return sampler

    @staticmethod
    def calculate_class_weights(genre_labels: np.ndarray, mlb: MultiLabelBinarizer) -> torch.Tensor:
        """Calculate class weights for handling imbalanced data"""
        class_weights = []
        logging.info("\nCalculating class weights for handling imbalanced data...")

        for i in range(genre_labels.shape[1]):
            genre = mlb.classes_[i]
            positive_samples = np.sum(genre_labels[:, i])
            total_samples = len(genre_labels)

            weights = compute_class_weight(
                class_weight='balanced',
                classes=np.array([0, 1]),
                y=genre_labels[:, i]
            )
            class_weights.append(weights[1])

            logging.info(f"{genre}:")
            logging.info(f"  Positive samples: {positive_samples}")
            logging.info(f"  Negative samples: {total_samples - positive_samples}")
            logging.info(f"  Weight: {weights[1]:.2f}")

        return torch.FloatTensor(class_weights).to(Config.DEVICE)

class ModelSetup:
    """Class for handling model setup and data loaders"""
    @staticmethod
    def setup_model_and_tokenizer(num_labels: int) -> Tuple:
        """Setup model dan tokenizer"""
        logging.info("Setting up model and tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
        model = AutoModelForSequenceClassification.from_pretrained(
            'indobenchmark/indobert-base-p1',
            num_labels=num_labels,
            problem_type="multi_label_classification"
        ).to(Config.DEVICE)
        logging.info("Model and tokenizer setup completed")
        return model, tokenizer

    @staticmethod
    def setup_dataloaders(X_train: np.ndarray,
                         X_test: np.ndarray,
                         y_train: np.ndarray,
                         y_test: np.ndarray,
                         tokenizer,
                         batch_size: int) -> Tuple:
        """Setup data loaders"""
        logging.info("Setting up data loaders...")
        train_dataset = MovieDataset(X_train, y_train, tokenizer)
        val_dataset = MovieDataset(X_test, y_test, tokenizer)
        sampler = DataProcessor.create_weighted_sampler(y_train)

        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            sampler=sampler,
            num_workers=0,  # Set to 0 for Colab compatibility
            pin_memory=True if torch.cuda.is_available() else False
        )

        val_loader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            num_workers=0,  # Set to 0 for Colab compatibility
            pin_memory=True if torch.cuda.is_available() else False
        )

        logging.info(f"Created data loaders with batch size {batch_size}")
        return train_loader, val_loader

In [5]:
# BAGIAN KETIGA - Loss Functions dan Training

class LossFunctions:
    """Class untuk menangani berbagai loss functions"""

    @staticmethod
    def focal_loss(predictions: torch.Tensor,
                  targets: torch.Tensor,
                  gamma: float = 2.0,
                  alpha: float = 0.25) -> torch.Tensor:
        """Calculate focal loss for multi-label classification"""
        ce_loss = F.binary_cross_entropy_with_logits(predictions, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = alpha * (1-pt)**gamma * ce_loss
        return focal_loss.mean()

    @staticmethod
    def label_smoothing_loss(outputs: torch.Tensor,
                           targets: torch.Tensor,
                           smoothing: float) -> torch.Tensor:
        """Calculate loss with label smoothing"""
        log_probs = F.log_softmax(outputs, dim=-1)
        targets = torch.clamp(targets * (1.0 - smoothing), min=smoothing / (targets.size(-1) - 1))
        return torch.mean(torch.sum(-targets * log_probs, dim=-1))

class DataAugmentation:
    """Class untuk menangani augmentasi data"""

    @staticmethod
    def apply_mixup(batch: Dict[str, torch.Tensor], alpha: float = 0.2) -> Dict[str, torch.Tensor]:
        """Apply mixup augmentation to batch"""
        # Move tensors to device
        input_ids = batch['input_ids'].to(Config.DEVICE)
        attention_mask = batch['attention_mask'].to(Config.DEVICE)
        labels = batch['labels'].to(Config.DEVICE)

        lam = np.random.beta(alpha, alpha)
        mixed_input_ids = lam * input_ids + (1 - lam) * input_ids.flip(0)
        mixed_attention_mask = lam * attention_mask + (1 - lam) * attention_mask.flip(0)
        mixed_labels = lam * labels + (1 - lam) * labels.flip(0)

        return {
            'input_ids': mixed_input_ids.long(),
            'attention_mask': mixed_attention_mask.long(),
            'labels': mixed_labels
        }

class Visualization:
    """Class untuk menangani visualisasi"""

    @staticmethod
    def plot_confusion_matrices(predictions: np.ndarray,
                              labels: np.ndarray,
                              classes: List[str]) -> None:
        """Plot detailed confusion matrices for each genre"""
        logging.info("Generating detailed confusion matrices for each genre...")

        # Pastikan input dalam format yang benar
        predictions = np.array(predictions)
        labels = np.array(labels)

        if len(predictions.shape) == 1:
            predictions = predictions.reshape(-1, 1)
        if len(labels.shape) == 1:
            labels = labels.reshape(-1, 1)

        for i, genre in enumerate(classes):
            try:
                genre_preds = predictions[:, i]
                genre_labels = labels[:, i]

                # Calculate confusion matrix
                cm = confusion_matrix(genre_labels, genre_preds)

                # Extract values
                TN, FP = cm[0]
                FN, TP = cm[1]

                # Calculate metrics
                accuracy = (TP + TN) / (TP + TN + FP + FN)
                precision = TP / (TP + FP) if (TP + FP) > 0 else 0
                recall = TP / (TP + FN) if (TP + FN) > 0 else 0
                f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

                # Create plot
                plt.figure(figsize=(12, 8))

                # Main confusion matrix plot
                main_ax = plt.subplot2grid((3, 3), (0, 0), rowspan=2, colspan=2)

                # Plot heatmap
                plot_labels = [f'Non-{genre}', genre]
                sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                          xticklabels=plot_labels,
                          yticklabels=plot_labels,
                          ax=main_ax)

                main_ax.set_title(f'Confusion Matrix - {genre}')
                main_ax.set_ylabel('True Label')
                main_ax.set_xlabel('Predicted Label')

                # Create text box for detailed metrics
                plt.subplot2grid((3, 3), (0, 2), rowspan=3)
                plt.axis('off')

                metrics_text = [
                    f'Detailed Metrics for {genre}:\n',
                    f'\nConfusion Matrix Values:',
                    f'True Negative (TN): {TN}',
                    f'False Positive (FP): {FP}',
                    f'False Negative (FN): {FN}',
                    f'True Positive (TP): {TP}',
                    f'\nPerformance Metrics:',
                    f'Accuracy: {accuracy:.3f}',
                    f'Precision: {precision:.3f}',
                    f'Recall: {recall:.3f}',
                    f'F1 Score: {f1:.3f}',
                    f'\nAdditional Information:',
                    f'Total Samples: {len(genre_labels)}',
                    f'Positive Samples: {np.sum(genre_labels)}',
                    f'Negative Samples: {len(genre_labels) - np.sum(genre_labels)}'
                ]

                plt.text(0, 0.95, '\n'.join(metrics_text),
                        fontsize=10,
                        verticalalignment='top',
                        bbox=dict(boxstyle='round,pad=1', facecolor='white', alpha=0.8))

                # Add interpretation text
                interpretation_ax = plt.subplot2grid((3, 3), (2, 0), colspan=2)
                interpretation_ax.axis('off')

                interpretation_text = [
                    'Matrix Interpretation:',
                    f'• Model correctly identified {TN} non-{genre} movies (True Negatives)',
                    f'• Model correctly identified {TP} {genre} movies (True Positives)',
                    f'• Model incorrectly classified {FP} non-{genre} movies as {genre} (False Positives)',
                    f'• Model failed to identify {FN} {genre} movies (False Negatives)'
                ]

                interpretation_ax.text(0, 0.5, '\n'.join(interpretation_text),
                                    fontsize=9,
                                    verticalalignment='center',
                                    bbox=dict(boxstyle='round,pad=1', facecolor='lightyellow', alpha=0.3))

                plt.tight_layout()
                plot_path = Config.CM_DIR / f'confusion_matrix_{genre}.png'
                plt.savefig(plot_path, dpi=300, bbox_inches='tight')
                plt.close()

            except Exception as e:
                logging.error(f"Error plotting confusion matrix for genre {genre}: {str(e)}")
                continue

        logging.info(f"Confusion matrices saved in: {Config.CM_DIR}")

    @staticmethod
    def plot_training_history(history_data: Dict) -> None:
        """Plot and save training metrics"""
        plt.figure(figsize=(15, 5))

        # Plot Loss
        plt.subplot(1, 3, 1)
        plt.plot(history_data['epochs'], history_data['training_loss'],
                label='Training Loss', marker='o')
        plt.plot(history_data['epochs'], history_data['validation_loss'],
                label='Validation Loss', marker='o')
        plt.title('Training History - Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)

        # Plot Accuracy
        plt.subplot(1, 3, 2)
        plt.plot(history_data['epochs'], history_data['accuracy'],
                label='Accuracy', marker='o', color='green')
        plt.title('Training History - Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.grid(True)

        # Plot Loss Difference
        plt.subplot(1, 3, 3)
        loss_diff = np.array(history_data['training_loss']) - np.array(history_data['validation_loss'])
        plt.plot(history_data['epochs'], loss_diff,
                label='Loss Difference', marker='o', color='red')
        plt.title('Learning Curve (Train-Val Loss)')
        plt.xlabel('Epoch')
        plt.ylabel('Difference')
        plt.legend()
        plt.grid(True)

        plt.tight_layout()
        plot_path = Config.PLOTS_DIR / 'training_history.png'
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        plt.close()
        logging.info(f"Saved training history plots to {plot_path}")

class ModelEvaluator:
    """Class untuk evaluasi model"""

    @staticmethod
    @error_handler
    def evaluate_model(model: torch.nn.Module,
                      val_loader: DataLoader,
                      mlb: MultiLabelBinarizer,
                      threshold_optimizer: DynamicThresholdOptimizer = None,
                      performance_tracker: PerformanceTracker = None,
                      epoch: int = None) -> Dict:
        """Evaluate model performance with dynamic thresholding"""
        model.eval()
        all_preds = []
        all_labels = []
        raw_predictions = []

        logging.info("Starting model evaluation...")
        log_memory("evaluation start")

        try:
            with torch.no_grad(), tqdm(val_loader, desc="Evaluating") as pbar:
                for batch in pbar:
                    input_ids = batch['input_ids'].to(Config.DEVICE)
                    attention_mask = batch['attention_mask'].to(Config.DEVICE)
                    labels = batch['labels'].cpu().numpy()

                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    logits = outputs.logits
                    probs = torch.sigmoid(logits).cpu().numpy()
                    
                    raw_predictions.extend(probs)
                    all_labels.extend(labels)

            raw_predictions = np.array(raw_predictions)
            all_labels = np.array(all_labels)

            # Apply dynamic thresholding if available
            if threshold_optimizer is not None:
                threshold_optimizer.optimize_thresholds(all_labels, raw_predictions, mlb.classes_)
                all_preds = threshold_optimizer.apply_thresholds(raw_predictions)
            else:
                all_preds = (raw_predictions > 0.5).astype(int)

            # Update performance tracker if available
            if performance_tracker is not None and epoch is not None:
                performance_tracker.update_metrics(all_labels, all_preds, epoch)

            # Calculate all metrics
            correct_predictions = np.sum(all_preds == all_labels)
            total_predictions = all_labels.size
            accuracy = correct_predictions / total_predictions

            # Calculate per-genre metrics
            genre_metrics = ModelEvaluator._calculate_genre_metrics(
                all_preds, all_labels, mlb.classes_
            )

            # Calculate macro metrics
            macro_metrics = ModelEvaluator._calculate_macro_metrics(genre_metrics)

            # Save metrics
            evaluation_metrics = {
                'overall': macro_metrics,
                'per_genre': genre_metrics
            }

            metrics_file = Config.METRICS_DIR / 'evaluation_metrics.json'
            with open(metrics_file, 'w', encoding='utf-8') as f:
                json.dump(evaluation_metrics, f, indent=4, ensure_ascii=False)

            # Plot confusion matrices
            Visualization.plot_confusion_matrices(all_preds, all_labels, mlb.classes_)

            log_memory("evaluation end")

            return {
                'accuracy': float(accuracy),
                'macro_f1': float(macro_metrics['macro_f1']),
                'genre_metrics': genre_metrics,
                'raw_predictions': raw_predictions,
                'true_labels': all_labels
            }

        except Exception as e:
            logging.error(f"Error during model evaluation: {str(e)}")
            raise

    @staticmethod
    def _calculate_macro_metrics(genre_metrics: Dict) -> Dict:
        """Calculate macro-averaged metrics"""
        return {
            'accuracy': np.mean([metrics['accuracy'] for metrics in genre_metrics.values()]),
            'macro_f1': np.mean([metrics['f1_score'] for metrics in genre_metrics.values()]),
            'macro_precision': np.mean([metrics['precision'] for metrics in genre_metrics.values()]),
            'macro_recall': np.mean([metrics['recall'] for metrics in genre_metrics.values()])
        }

    @staticmethod
    def _calculate_genre_metrics(predictions: np.ndarray,
                               labels: np.ndarray,
                               classes: List[str]) -> Dict:
        """Calculate metrics for each genre"""
        genre_metrics = {}
        logging.info("\nPer-genre Performance Metrics:")

        for i, genre in enumerate(classes):
            genre_preds = predictions[:, i]
            genre_labels = labels[:, i]

            metrics = {
                'accuracy': float(np.mean(genre_preds == genre_labels)),
                'f1_score': float(f1_score(genre_labels, genre_preds, zero_division=0)),
                'precision': float(precision_score(genre_labels, genre_preds, zero_division=0)),
                'recall': float(recall_score(genre_labels, genre_preds, zero_division=0))
            }

            genre_metrics[genre] = metrics
            logging.info(f"\nMetrics for {genre}:")
            for metric_name, value in metrics.items():
                logging.info(f"{metric_name.capitalize()}: {value:.4f}")

        return genre_metrics

In [6]:
# BAGIAN KEEMPAT - Training dan Hyperparameter Optimization

class ModelTrainer:
    @staticmethod
    @error_handler
    def train_model(sample_size: Optional[int] = None) -> Tuple:
        Config.SAMPLE_SIZE = sample_size
        logging.info("Starting model training")
        log_memory("training start")

        try:
            # Load dan preprocess data
            df = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, Config.SAMPLE_SIZE)
            logging.info(f"\nDataset statistics:")
            logging.info(f"Total samples after preprocessing: {len(df)}")

            # Prepare data
            mlb = MultiLabelBinarizer()
            X_train, X_test, y_train, y_test = DataProcessor.prepare_data(df, mlb)

            # Initialize threshold optimizer and performance tracker
            threshold_optimizer = DynamicThresholdOptimizer(len(mlb.classes_))
            performance_tracker = PerformanceTracker(len(mlb.classes_), mlb.classes_)

            # Log genre distribution
            genre_labels = mlb.fit_transform(df['genre'])
            genre_counts = genre_labels.sum(axis=0)
            for genre, count in zip(mlb.classes_, genre_counts):
                logging.info(f"Genre '{genre}': {count} samples")

            logging.info(f"\nTraining set size: {len(X_train)}")
            logging.info(f"Testing set size: {len(X_test)}")

            # Setup model dan data loaders
            with ModelManager(*ModelSetup.setup_model_and_tokenizer(len(mlb.classes_))) as (model, tokenizer):
                train_loader, val_loader = ModelSetup.setup_dataloaders(
                    X_train, X_test, y_train, y_test,
                    tokenizer, Config.MODEL_PARAMS['BATCH_SIZE']
                )

                # Training loop
                best_val_loss = float('inf')
                best_accuracy = 0.0
                patience_counter = 0
                training_losses = []
                validation_losses = []
                accuracies = []

                optimizer = torch.optim.AdamW(
                    model.parameters(),
                    lr=Config.MODEL_PARAMS['LEARNING_RATE'],
                    weight_decay=Config.MODEL_PARAMS['WEIGHT_DECAY']
                )

                scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, mode='min', factor=0.5, patience=2, verbose=True
                )

                epochs_range = tqdm(range(Config.MODEL_PARAMS['EPOCHS']), 
                                  desc="Training Progress",
                                  position=0, leave=True)
                
                for epoch in epochs_range:
                    try:
                        # Training phase
                        model.train()
                        epoch_metrics = ModelTrainer._train_epoch(
                            model, train_loader, optimizer,
                            epoch + 1
                        )
                        training_losses.append(epoch_metrics['train_loss'])

                        # Evaluation phase with dynamic thresholding
                        validation_metrics = ModelEvaluator.evaluate_model(
                            model, val_loader, mlb,
                            threshold_optimizer=threshold_optimizer,
                            performance_tracker=performance_tracker,
                            epoch=epoch
                        )
                        
                        current_accuracy = validation_metrics['accuracy']
                        accuracies.append(current_accuracy)

                        # Validation loss calculation
                        avg_val_loss = ModelTrainer._calculate_validation_loss(
                            model, val_loader
                        )
                        validation_losses.append(avg_val_loss)

                        # Update progress bar
                        epochs_range.set_postfix({
                            'Train Loss': f"{epoch_metrics['train_loss']:.4f}",
                            'Val Loss': f"{avg_val_loss:.4f}",
                            'Accuracy': f"{current_accuracy:.4f}"
                        })

                        # Model improvement check
                        model_improved = ModelTrainer._check_model_improvement(
                            model, tokenizer, current_accuracy, avg_val_loss,
                            best_accuracy, best_val_loss
                        )

                        if model_improved:
                            best_accuracy = max(best_accuracy, current_accuracy)
                            best_val_loss = min(best_val_loss, avg_val_loss)
                            patience_counter = 0
                        else:
                            patience_counter += 1

                        # Early stopping check
                        if patience_counter >= Config.MODEL_PARAMS['PATIENCE']:
                            logging.info(f"\nEarly stopping triggered after {epoch + 1} epochs")
                            break

                        scheduler.step(avg_val_loss)
                        logging.info(f"Learning rate: {optimizer.param_groups[0]['lr']}")

                        # Clear GPU cache periodically
                        if torch.cuda.is_available() and (epoch + 1) % 5 == 0:
                            torch.cuda.empty_cache()
                            gc.collect()

                    except Exception as e:
                        logging.error(f"Error during epoch {epoch + 1}: {str(e)}")
                        raise

                # Save performance history and plots
                performance_tracker.plot_performance_trends(Config.PLOTS_DIR)
                performance_tracker.save_performance_history(Config.METRICS_DIR)
                threshold_optimizer.save_threshold_history(Config.METRICS_DIR, mlb.classes_)

                # Save training history
                ModelTrainer._save_training_history(
                    training_losses, validation_losses, accuracies,
                    best_accuracy, best_val_loss, df, mlb,
                    threshold_optimizer, performance_tracker
                )

                return model, tokenizer, mlb, threshold_optimizer

        except Exception as e:
            logging.error(f"Error in training: {str(e)}")
            raise
        finally:
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

    @staticmethod
    def _save_training_history(training_losses: List[float],
                             validation_losses: List[float],
                             accuracies: List[float],
                             best_accuracy: float,
                             best_val_loss: float,
                             df: pd.DataFrame,
                             mlb: MultiLabelBinarizer,
                             threshold_optimizer: DynamicThresholdOptimizer,
                             performance_tracker: PerformanceTracker) -> None:
        """Save training history and plot results with threshold and performance info"""
        try:
            # Initialize genre_labels
            genre_labels = mlb.fit_transform(df['genre'])
            
            history_data = {
                'model_info': {
                    'classes': mlb.classes_.tolist(),
                    'total_samples': len(df),
                    'genre_distribution': {
                        genre: int(count) for genre, count in zip(mlb.classes_, genre_labels.sum(axis=0))
                    }
                },
                'training_config': {
                    'batch_size': Config.MODEL_PARAMS['BATCH_SIZE'],
                    'learning_rate': Config.MODEL_PARAMS['LEARNING_RATE'],
                    'max_length': Config.MODEL_PARAMS['MAX_LENGTH'],
                    'weight_decay': Config.MODEL_PARAMS['WEIGHT_DECAY'],
                    'early_stopping': Config.MODEL_PARAMS['PATIENCE'],
                    'mixup_prob': Config.MODEL_PARAMS['MIXUP_PROB'],
                    'train_split': 1-Config.MODEL_PARAMS['TEST_SIZE'],
                    'test_split': Config.MODEL_PARAMS['TEST_SIZE']
                },
                'training_history': {
                    'epochs': list(range(1, len(training_losses) + 1)),
                    'training_loss': [float(loss) for loss in training_losses],
                    'validation_loss': [float(loss) for loss in validation_losses],
                    'accuracy': [float(acc) for acc in accuracies],
                    'best_accuracy': float(best_accuracy),
                    'best_val_loss': float(best_val_loss)
                },
                'thresholding_info': {
                    'final_thresholds': {
                        mlb.classes_[i]: thresh for i, thresh in enumerate(threshold_optimizer.thresholds)
                    },
                    'best_thresholds': {
                        mlb.classes_[i]: thresh for i, thresh in enumerate(threshold_optimizer.best_thresholds)
                    },
                    'best_f1_scores': {
                        mlb.classes_[i]: score for i, score in enumerate(threshold_optimizer.best_f1_scores)
                    }
                },
                'per_class_performance': performance_tracker.best_metrics
            }
    
            # Save history to JSON
            history_file = Config.METRICS_DIR / 'training_history.json'
            with open(history_file, 'w', encoding='utf-8') as f:
                json.dump(history_data, f, indent=4, ensure_ascii=False)
    
            # Plot training history
            Visualization.plot_training_history(history_data['training_history'])
            logging.info("Training history saved successfully")
    
        except Exception as e:
            logging.error(f"Error saving training history: {str(e)}")
            raise
    @staticmethod
    def _train_epoch(model: torch.nn.Module,
                    train_loader: DataLoader,
                    optimizer: torch.optim.Optimizer,
                    epoch: int) -> Dict:
        """Train model for one epoch"""
        total_loss = 0
        steps = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}",
                          position=1, leave=False)

        for batch in progress_bar:
            try:
                optimizer.zero_grad()

                if np.random.random() < Config.MODEL_PARAMS['MIXUP_PROB']:
                    batch = DataAugmentation.apply_mixup(batch)

                input_ids = batch['input_ids'].to(Config.DEVICE)
                attention_mask = batch['attention_mask'].to(Config.DEVICE)
                labels = batch['labels'].to(Config.DEVICE)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = LossFunctions.label_smoothing_loss(
                    outputs.logits, labels, Config.MODEL_PARAMS['SMOOTHING']
                )

                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

                total_loss += loss.item()
                steps += 1
                progress_bar.set_postfix({'training_loss': f'{loss.item():.4f}'})

            except RuntimeError as e:
                if "out of memory" in str(e):
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                    logging.error("GPU out of memory during training. Try reducing batch size.")
                raise

        return {'train_loss': total_loss / steps}

    @staticmethod
    def _calculate_validation_loss(model: torch.nn.Module,
                                 val_loader: DataLoader) -> float:
        """Calculate validation loss"""
        model.eval()
        total_loss = 0
        steps = 0

        with torch.no_grad():
            for batch in val_loader:
                try:
                    input_ids = batch['input_ids'].to(Config.DEVICE)
                    attention_mask = batch['attention_mask'].to(Config.DEVICE)
                    labels = batch['labels'].to(Config.DEVICE)

                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    loss = LossFunctions.focal_loss(outputs.logits, labels)
                    total_loss += loss.item()
                    steps += 1

                except RuntimeError as e:
                    if "out of memory" in str(e):
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()
                        logging.error("GPU out of memory during validation. Try reducing batch size.")
                    raise

        return total_loss / steps

    @staticmethod
    def _check_model_improvement(model: torch.nn.Module,
                               tokenizer,
                               current_accuracy: float,
                               current_loss: float,
                               best_accuracy: float,
                               best_loss: float) -> bool:
        """Check if model improved and save if necessary"""
        improved = False

        try:
            if current_accuracy > best_accuracy:
                logging.info(f"New best accuracy: {current_accuracy:.4f}")
                model.save_pretrained(str(Config.MODEL_BEST_ACC))  # Convert to string for Colab
                tokenizer.save_pretrained(str(Config.TOKENIZER_BEST_ACC))
                improved = True

            if current_loss < best_loss:
                logging.info(f"New best loss: {current_loss:.4f}")
                model.save_pretrained(str(Config.MODEL_BEST_LOSS))
                tokenizer.save_pretrained(str(Config.TOKENIZER_BEST_LOSS))
                improved = True

        except Exception as e:
            logging.error(f"Error saving model: {str(e)}")
            raise

        return improved


class HyperparameterOptimizer:
    @staticmethod
    @error_handler
    def objective(trial: Trial, df: pd.DataFrame, mlb: MultiLabelBinarizer) -> float:
        """Objective function untuk Optuna optimization"""
        try:
            # Get trial parameters
            params = HyperparameterOptimizer._get_trial_parameters(trial)

            # Prepare data
            X_train, X_test, y_train, y_test = DataProcessor.prepare_data(df, mlb)

            # Setup model dan data loaders
            with ModelManager(*ModelSetup.setup_model_and_tokenizer(len(mlb.classes_))) as (model, tokenizer):
                train_loader, val_loader = ModelSetup.setup_dataloaders(
                    X_train, X_test, y_train, y_test,
                    tokenizer, params['batch_size']
                )

                # Training loop singkat untuk optimasi
                best_val_metrics = HyperparameterOptimizer._train_trial(
                    trial, model, train_loader, val_loader, mlb, params
                )

                # Clear GPU cache after each trial
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    gc.collect()

            return best_val_metrics['macro_f1']

        except Exception as e:
            logging.error(f"Error in optimization objective: {str(e)}")
            raise

    @staticmethod
    def _get_trial_parameters(trial: Trial) -> Dict:
        """Get parameters for trial"""
        params = {}
        try:
            # Batch size dari list nilai diskrit
            params['batch_size'] = trial.suggest_categorical('batch_size',
                Config.OPTIM_PARAMS['batch_size'])

            # Learning rate dari list nilai diskrit
            params['learning_rate'] = trial.suggest_categorical('learning_rate',
                Config.OPTIM_PARAMS['learning_rate'])

            # Weight decay dari list nilai diskrit
            params['weight_decay'] = trial.suggest_categorical('weight_decay',
                Config.OPTIM_PARAMS['weight_decay'])

            # Mixup probability dari list nilai diskrit
            params['mixup_prob'] = trial.suggest_categorical('mixup_prob',
                Config.OPTIM_PARAMS['mixup_prob'])

            # Smoothing dari list nilai diskrit
            params['smoothing'] = trial.suggest_categorical('smoothing',
                Config.OPTIM_PARAMS['smoothing'])

            logging.info(f"Trial parameter set: {params}")

        except Exception as e:
            logging.error(f"Error getting trial parameters: {str(e)}")
            raise
        return params

    @staticmethod
    def _train_trial(trial: Trial,
                    model: torch.nn.Module,
                    train_loader: DataLoader,
                    val_loader: DataLoader,
                    mlb: MultiLabelBinarizer,
                    params: Dict) -> Dict:
        """Train model for one trial"""
        try:
            optimizer = torch.optim.AdamW(
                model.parameters(),
                lr=params['learning_rate'],
                weight_decay=params['weight_decay']
            )

            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', factor=0.5, patience=2, verbose=True
            )

            best_val_metrics = None

            for epoch in range(3):  # Reduced epochs for optimization
                # Training
                model.train()
                total_loss = 0
                steps = 0

                progress_bar = tqdm(train_loader,
                                  desc=f"Epoch {epoch+1}/3",
                                  position=0,
                                  leave=False)

                for batch in progress_bar:
                    try:
                        optimizer.zero_grad()

                        if np.random.random() < params['mixup_prob']:
                            batch = DataAugmentation.apply_mixup(batch)

                        input_ids = batch['input_ids'].to(Config.DEVICE)
                        attention_mask = batch['attention_mask'].to(Config.DEVICE)
                        labels = batch['labels'].to(Config.DEVICE)

                        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                        loss = LossFunctions.label_smoothing_loss(
                            outputs.logits, labels, params['smoothing']
                        )

                        loss.backward()
                        optimizer.step()

                        total_loss += loss.item()
                        steps += 1

                        # Update progress bar
                        progress_bar.set_postfix({
                            'loss': f'{loss.item():.4f}',
                            'avg_loss': f'{(total_loss/steps):.4f}'
                        })

                    except RuntimeError as e:
                        if "out of memory" in str(e):
                            if torch.cuda.is_available():
                                torch.cuda.empty_cache()
                            logging.error("GPU OOM in trial. Trying to recover...")
                            continue
                        raise

                avg_loss = total_loss / steps

                # Evaluation
                metrics = ModelEvaluator.evaluate_model(model, val_loader, mlb)
                current_f1 = metrics['macro_f1']

                logging.info(f"Trial {trial.number}, Epoch {epoch+1}: "
                           f"Loss = {avg_loss:.4f}, F1 = {current_f1:.4f}")

                if best_val_metrics is None or current_f1 > best_val_metrics['macro_f1']:
                    best_val_metrics = metrics

                scheduler.step(metrics['macro_f1'])

                # Report intermediate value
                trial.report(metrics['macro_f1'], epoch)

                # Handle pruning based on the intermediate value
                if trial.should_prune():
                    raise optuna.TrialPruned()

                # Clear GPU cache
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    gc.collect()

            return best_val_metrics

        except Exception as e:
            logging.error(f"Error in trial training: {str(e)}")
            raise

    @staticmethod
    def run_optimization(df: pd.DataFrame,
                        mlb: MultiLabelBinarizer,
                        n_trials: int = 30) -> Dict:

        try:
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(seed=42),
                pruner=optuna.pruners.MedianPruner()
            )

            objective_func = lambda trial: HyperparameterOptimizer.objective(trial, df, mlb)

            logging.info("Starting hyperparameter optimization...")
            study.optimize(objective_func, n_trials=n_trials,
                         callbacks=[lambda study, trial: gc.collect()])

            # Log results
            logging.info("\nHyperparameter Optimization Results:")
            logging.info(f"Best trial number: {study.best_trial.number}")
            logging.info(f"Best F1-score: {study.best_trial.value:.4f}")
            logging.info("\nBest hyperparameters:")
            for param, value in study.best_trial.params.items():
                logging.info(f"{param}: {value}")

            # Save study results
            results_file = Config.METRICS_DIR / 'optuna_results.json'
            results = {
                'best_trial': {
                    'number': study.best_trial.number,
                    'value': study.best_trial.value,
                    'params': study.best_trial.params
                },
                'all_trials': [
                    {
                        'number': trial.number,
                        'value': trial.value,
                        'params': trial.params
                    }
                    for trial in study.trials if trial.value is not None
                ]
            }

            with open(results_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, indent=4, ensure_ascii=False)

            # Save visualizations
            try:
                # Optimization history plot
                fig1 = optuna.visualization.plot_optimization_history(study)
                fig1.write_image(str(Config.PLOTS_DIR / "optuna_optimization_history.png"))

                # Parameter importance plot
                fig2 = optuna.visualization.plot_param_importances(study)
                fig2.write_image(str(Config.PLOTS_DIR / "optuna_param_importances.png"))

                # Parameter relationships plot
                fig3 = optuna.visualization.plot_parallel_coordinate(study)
                fig3.write_image(str(Config.PLOTS_DIR / "optuna_param_relationships.png"))

            except Exception as e:
                logging.warning(f"Could not create optimization plots: {str(e)}")

            return study.best_trial.params

        except Exception as e:
            logging.error(f"Error during optimization: {str(e)}")
            raise
        finally:
            # Clean up
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

# Fungsi get_args untuk Colab
class Args:
    def __init__(self):
        # Training parameters
        self.sample_size = None  # Number of samples to use (default: use all data)
        self.epochs = Config.MODEL_PARAMS['EPOCHS']
        self.batch_size = Config.MODEL_PARAMS['BATCH_SIZE']
        self.learning_rate = Config.MODEL_PARAMS['LEARNING_RATE']
        self.max_length = Config.MODEL_PARAMS['MAX_LENGTH']

        # Model configuration
        self.test_size = Config.MODEL_PARAMS['TEST_SIZE']
        self.weight_decay = Config.MODEL_PARAMS['WEIGHT_DECAY']
        self.mixup_prob = Config.MODEL_PARAMS['MIXUP_PROB']
        self.patience = Config.MODEL_PARAMS['PATIENCE']

        # System configuration
        self.output_dir = None
        self.no_cuda = False
        self.seed = 42

        # Label Smoothing
        self.smoothing = Config.MODEL_PARAMS['SMOOTHING']

        # Optuna specific
        self.n_trials = 20

# Modifikasi fungsi get_args
def get_args():
    return Args()

def main(args: Args) -> None:
    """Main function"""
    try:
        # Check environment first
        if not check_environment():
            raise RuntimeError("Environment check failed!")

        # Update configuration
        Config.MODEL_PARAMS.update({
            'EPOCHS': args.epochs,
            'BATCH_SIZE': args.batch_size,
            'LEARNING_RATE': args.learning_rate,
            'MAX_LENGTH': args.max_length,
            'TEST_SIZE': args.test_size,
            'WEIGHT_DECAY': args.weight_decay,
            'MIXUP_PROB': args.mixup_prob,
            'PATIENCE': args.patience,
            'SMOOTHING': args.smoothing
        })

        if args.no_cuda:
            Config.DEVICE = torch.device('cpu')
            logging.info("CUDA disabled by user")

        # Initialize logging dan experiment info
        log_system_info()

        logging.info("Starting movie genre classification with hyperparameter optimization")
        logging.info(f"Using device: {Config.DEVICE}")

        # Log initial configuration
        logging.info("\nInitial Configuration:")
        logging.info(f"Sample Size: {Config.SAMPLE_SIZE if Config.SAMPLE_SIZE else 'Full Dataset'}")
        for param, value in Config.MODEL_PARAMS.items():
            logging.info(f"{param}: {value}")

        # Load and preprocess data
        logging.info("\nLoading and preprocessing data...")
        df = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, Config.SAMPLE_SIZE)
        mlb = MultiLabelBinarizer()

        # Run hyperparameter optimization
        logging.info("\nStarting hyperparameter optimization...")
        best_params = HyperparameterOptimizer.run_optimization(df, mlb, args.n_trials)

        # Update configuration with best parameters
        logging.info("\nBest Hyperparameters found:")
        for param, value in best_params.items():
            logging.info(f"{param}: {value}")
            if param in Config.MODEL_PARAMS:
                Config.MODEL_PARAMS[param] = value

        # Train final model with best parameters
        logging.info("\nTraining final model with optimized parameters...")
        model, tokenizer, mlb, threshold_optimizer = ModelTrainer.train_model(Config.SAMPLE_SIZE)

        # Test on a sample
        logging.info("\nTesting model on a sample...")
        df_sample = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, sample_size=1)
        sample_text = df_sample['sinopsis'].iloc[0]

        model.eval()
        inputs = tokenizer(
            sample_text,
            return_tensors='pt',
            max_length=Config.MODEL_PARAMS['MAX_LENGTH'],
            padding='max_length',
            truncation=True
        )

        input_ids = inputs['input_ids'].to(Config.DEVICE)
        attention_mask = inputs['attention_mask'].to(Config.DEVICE)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            probs = torch.sigmoid(outputs.logits)
            
            # Gunakan threshold_optimizer untuk prediksi
            raw_predictions = probs.cpu().numpy()
            thresholded_preds = threshold_optimizer.apply_thresholds(raw_predictions)

        # Buat prediksi dengan threshold yang dioptimalkan
        predictions = []
        for idx, pred in enumerate(thresholded_preds[0]):
            if pred > 0:  # Karena sudah di-threshold
                predictions.append({
                    'genre': mlb.classes_[idx],
                    'probability': float(probs[0][idx].item()),
                    'threshold_used': threshold_optimizer.thresholds[idx]
                })

        predictions.sort(key=lambda x: x['probability'], reverse=True)

        # Log prediction results dengan informasi threshold
        logging.info("\nSample prediction results:")
        logging.info(f"Sample text: {sample_text[:100]}...")
        for pred in predictions:
            logging.info(
                f"Genre: {pred['genre']}, "
                f"Probability: {pred['probability']:.4f}, "
                f"Threshold Used: {pred['threshold_used']:.3f}"
            )

        # Save final configuration
        final_config = {
            'hyperparameters': best_params,
            'model_info': {
                'num_classes': len(mlb.classes_),
                'classes': mlb.classes_.tolist()
            },
            'training_info': {
                'device': str(Config.DEVICE),
                'final_sample_size': len(df),
                'optimization_trials': args.n_trials
            },
            'threshold_info': {
                'final_thresholds': {
                    class_name: float(thresh) 
                    for class_name, thresh in zip(mlb.classes_, threshold_optimizer.thresholds)
                },
                'best_f1_scores': {
                    class_name: float(score)
                    for class_name, score in zip(mlb.classes_, threshold_optimizer.best_f1_scores)
                }
            }
        }

        with open(Config.EXPERIMENT_DIR / 'final_configuration.json', 'w', encoding='utf-8') as f:
            json.dump(final_config, f, indent=4, ensure_ascii=False)

        logging.info("\nTraining completed successfully!")
        logging.info(f"All results and models saved in: {Config.EXPERIMENT_DIR}")

    except KeyboardInterrupt:
        logging.info("\nTraining interrupted by user")
        raise
    except Exception as e:
        logging.error(f"\nError during execution: {str(e)}")
        raise
    finally:
        logging.info("\nCleaning up resources...")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

# Entry point for Colab
if __name__ == "__main__":
    # Set random seeds for reproducibility
    args = get_args()

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    try:
        main(args)
    except KeyboardInterrupt:
        logging.info("\nTraining interrupted by user")
    except Exception as e:
        logging.error(f"Training failed: {str(e)}")
        raise
    finally:
        logging.info("Cleaning up resources...")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()


Files in dataset directory:
- final_combined_movies_5genres.csv
GPU tersedia: Tesla T4
GPU Memory: 15.83 GB
2025-02-16 17:16:23,459 - INFO - System Information:
2025-02-16 17:16:23,461 - INFO - Python Version: 3.10.12 (main, Nov  6 2024, 20:22:13) [GCC 11.4.0]
2025-02-16 17:16:23,461 - INFO - CPU Count: 4
2025-02-16 17:16:23,463 - INFO - Initial Memory Usage: 635.78 MB
2025-02-16 17:16:23,463 - INFO - GPU Device: Tesla T4
2025-02-16 17:16:23,464 - INFO - GPU Memory Total: 15.83 GB
2025-02-16 17:16:23,465 - INFO - CUDA Version: 12.1
2025-02-16 17:16:23,466 - INFO - Starting movie genre classification with hyperparameter optimization
2025-02-16 17:16:23,466 - INFO - Using device: cuda
2025-02-16 17:16:23,467 - INFO - 
Initial Configuration:
2025-02-16 17:16:23,468 - INFO - Sample Size: Full Dataset
2025-02-16 17:16:23,468 - INFO - EPOCHS: 100
2025-02-16 17:16:23,469 - INFO - BATCH_SIZE: 10
2025-02-16 17:16:23,470 - INFO - LEARNING_RATE: 1e-05
2025-02-16 17:16:23,470 - INFO - MAX_LENGTH:

100%|██████████| 1738/1738 [00:00<00:00, 13716.01it/s]

2025-02-16 17:16:23,697 - INFO - Memory usage after preprocessing: 639.85 MB
2025-02-16 17:16:23,698 - INFO - 
Starting hyperparameter optimization...



[I 2025-02-16 17:16:23,699] A new study created in memory with name: no-name-a3e650c4-ac39-4d78-8311-833087d97fd0


2025-02-16 17:16:23,701 - INFO - Starting hyperparameter optimization...
2025-02-16 17:16:23,703 - INFO - Trial parameter set: {'batch_size': 4, 'learning_rate': 3e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}
2025-02-16 17:16:23,709 - INFO - Setting up model and tokenizer...


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/229k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 17:16:46,515 - INFO - Model and tokenizer setup completed
2025-02-16 17:16:46,517 - INFO - Setting up data loaders...
2025-02-16 17:16:46,517 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 17:16:46,520 - INFO - Created sampler with 1477 weights
2025-02-16 17:16:46,522 - INFO - Created data loaders with batch size 4


                                                                                          

2025-02-16 17:19:11,092 - INFO - Starting model evaluation...
2025-02-16 17:19:11,094 - INFO - Memory usage after evaluation start: 1717.90 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.49it/s]

2025-02-16 17:19:18,874 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:19:18,884 - INFO - 
Metrics for Drama:
2025-02-16 17:19:18,885 - INFO - Accuracy: 0.4559
2025-02-16 17:19:18,885 - INFO - F1_score: 0.4621
2025-02-16 17:19:18,887 - INFO - Precision: 0.3211
2025-02-16 17:19:18,887 - INFO - Recall: 0.8243
2025-02-16 17:19:18,893 - INFO - 
Metrics for Horor:
2025-02-16 17:19:18,894 - INFO - Accuracy: 0.7280
2025-02-16 17:19:18,894 - INFO - F1_score: 0.5644
2025-02-16 17:19:18,895 - INFO - Precision: 0.4340
2025-02-16 17:19:18,897 - INFO - Recall: 0.8070
2025-02-16 17:19:18,903 - INFO - 
Metrics for Komedi:
2025-02-16 17:19:18,903 - INFO - Accuracy: 0.6207
2025-02-16 17:19:18,904 - INFO - F1_score: 0.4072
2025-02-16 17:19:18,905 - INFO - Precision: 0.3119
2025-02-16 17:19:18,905 - INFO - Recall: 0.5862
2025-02-16 17:19:18,912 - INFO - 
Metrics for Laga:
2025-02-16 17:19:18,913 - INFO - Accuracy: 0.2912
2025-02-16 17:19:18,913 - INFO - F1_score: 0.2570
2025-02-16 17:19:18,914 - 




2025-02-16 17:19:18,922 - INFO - 
Metrics for Romantis:
2025-02-16 17:19:18,923 - INFO - Accuracy: 0.7969
2025-02-16 17:19:18,924 - INFO - F1_score: 0.3614
2025-02-16 17:19:18,924 - INFO - Precision: 0.3061
2025-02-16 17:19:18,925 - INFO - Recall: 0.4412
2025-02-16 17:19:18,928 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:19:23,794 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:19:23,796 - INFO - Memory usage after evaluation end: 1759.54 MB
2025-02-16 17:19:23,796 - INFO - Trial 0, Epoch 1: Loss = 1.5835, F1 = 0.4104


                                                                                          

2025-02-16 17:21:51,707 - INFO - Starting model evaluation...
2025-02-16 17:21:51,709 - INFO - Memory usage after evaluation start: 1759.91 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.41it/s]

2025-02-16 17:21:59,557 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:21:59,562 - INFO - 
Metrics for Drama:
2025-02-16 17:21:59,563 - INFO - Accuracy: 0.5862
2025-02-16 17:21:59,564 - INFO - F1_score: 0.5000
2025-02-16 17:21:59,564 - INFO - Precision: 0.3803
2025-02-16 17:21:59,565 - INFO - Recall: 0.7297
2025-02-16 17:21:59,571 - INFO - 
Metrics for Horor:
2025-02-16 17:21:59,572 - INFO - Accuracy: 0.7280
2025-02-16 17:21:59,573 - INFO - F1_score: 0.6077
2025-02-16 17:21:59,573 - INFO - Precision: 0.4435
2025-02-16 17:21:59,574 - INFO - Recall: 0.9649
2025-02-16 17:21:59,580 - INFO - 
Metrics for Komedi:
2025-02-16 17:21:59,581 - INFO - Accuracy: 0.4828
2025-02-16 17:21:59,581 - INFO - F1_score: 0.4255
2025-02-16 17:21:59,582 - INFO - Precision: 0.2825
2025-02-16 17:21:59,584 - INFO - Recall: 0.8621
2025-02-16 17:21:59,589 - INFO - 
Metrics for Laga:
2025-02-16 17:21:59,590 - INFO - Accuracy: 0.5594
2025-02-16 17:21:59,591 - INFO - F1_score: 0.3114
2025-02-16 17:21:59,591 - 




2025-02-16 17:21:59,600 - INFO - F1_score: 0.3636
2025-02-16 17:21:59,602 - INFO - Precision: 0.2449
2025-02-16 17:21:59,603 - INFO - Recall: 0.7059
2025-02-16 17:21:59,605 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:22:03,803 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:22:03,804 - INFO - Memory usage after evaluation end: 1784.99 MB
2025-02-16 17:22:03,806 - INFO - Trial 0, Epoch 2: Loss = 1.4657, F1 = 0.4417


                                                                                          

2025-02-16 17:24:31,689 - INFO - Starting model evaluation...
2025-02-16 17:24:31,691 - INFO - Memory usage after evaluation start: 1785.87 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.43it/s]

2025-02-16 17:24:39,521 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:24:39,528 - INFO - 
Metrics for Drama:
2025-02-16 17:24:39,529 - INFO - Accuracy: 0.5326
2025-02-16 17:24:39,529 - INFO - F1_score: 0.5041
2025-02-16 17:24:39,530 - INFO - Precision: 0.3605
2025-02-16 17:24:39,532 - INFO - Recall: 0.8378
2025-02-16 17:24:39,539 - INFO - 
Metrics for Horor:
2025-02-16 17:24:39,539 - INFO - Accuracy: 0.7816
2025-02-16 17:24:39,540 - INFO - F1_score: 0.6545
2025-02-16 17:24:39,541 - INFO - Precision: 0.5000
2025-02-16 17:24:39,543 - INFO - Recall: 0.9474
2025-02-16 17:24:39,551 - INFO - 
Metrics for Komedi:
2025-02-16 17:24:39,551 - INFO - Accuracy: 0.3793
2025-02-16 17:24:39,552 - INFO - F1_score: 0.4044
2025-02-16 17:24:39,553 - INFO - Precision: 0.2570
2025-02-16 17:24:39,553 - INFO - Recall: 0.9483
2025-02-16 17:24:39,561 - INFO - 
Metrics for Laga:
2025-02-16 17:24:39,562 - INFO - Accuracy: 0.7816





2025-02-16 17:24:39,563 - INFO - F1_score: 0.4124
2025-02-16 17:24:39,564 - INFO - Precision: 0.3390
2025-02-16 17:24:39,566 - INFO - Recall: 0.5263
2025-02-16 17:24:39,573 - INFO - 
Metrics for Romantis:
2025-02-16 17:24:39,574 - INFO - Accuracy: 0.6398
2025-02-16 17:24:39,574 - INFO - F1_score: 0.3380
2025-02-16 17:24:39,576 - INFO - Precision: 0.2222
2025-02-16 17:24:39,577 - INFO - Recall: 0.7059
2025-02-16 17:24:39,579 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:24:43,774 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:24:43,775 - INFO - Memory usage after evaluation end: 1813.56 MB
2025-02-16 17:24:43,776 - INFO - Trial 0, Epoch 3: Loss = 1.3669, F1 = 0.4627


[I 2025-02-16 17:24:44,815] Trial 0 finished with value: 0.46268431258728926 and parameters: {'batch_size': 4, 'learning_rate': 3e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 0 with value: 0.46268431258728926.


2025-02-16 17:24:45,126 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}
2025-02-16 17:24:45,130 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 17:24:46,101 - INFO - Model and tokenizer setup completed
2025-02-16 17:24:46,102 - INFO - Setting up data loaders...
2025-02-16 17:24:46,103 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 17:24:46,105 - INFO - Created sampler with 1477 weights
2025-02-16 17:24:46,106 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 17:27:32,165 - INFO - Starting model evaluation...
2025-02-16 17:27:32,168 - INFO - Memory usage after evaluation start: 1905.47 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.45it/s]

2025-02-16 17:27:40,135 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:27:40,142 - INFO - 
Metrics for Drama:
2025-02-16 17:27:40,143 - INFO - Accuracy: 0.5785
2025-02-16 17:27:40,144 - INFO - F1_score: 0.4860
2025-02-16 17:27:40,144 - INFO - Precision: 0.3714
2025-02-16 17:27:40,145 - INFO - Recall: 0.7027
2025-02-16 17:27:40,152 - INFO - 
Metrics for Horor:
2025-02-16 17:27:40,152 - INFO - Accuracy: 0.8774
2025-02-16 17:27:40,154 - INFO - F1_score: 0.7714
2025-02-16 17:27:40,155 - INFO - Precision: 0.6506
2025-02-16 17:27:40,155 - INFO - Recall: 0.9474
2025-02-16 17:27:40,163 - INFO - 
Metrics for Komedi:
2025-02-16 17:27:40,163 - INFO - Accuracy: 0.3410
2025-02-16 17:27:40,164 - INFO - F1_score: 0.3723
2025-02-16 17:27:40,165 - INFO - Precision: 0.2361
2025-02-16 17:27:40,166 - INFO - Recall: 0.8793
2025-02-16 17:27:40,173 - INFO - 
Metrics for Laga:
2025-02-16 17:27:40,174 - INFO - Accuracy: 0.6130
2025-02-16 17:27:40,174 - INFO - F1_score: 0.3484
2025-02-16 17:27:40,175 - 




2025-02-16 17:27:40,184 - INFO - 
Metrics for Romantis:
2025-02-16 17:27:40,184 - INFO - Accuracy: 0.8123
2025-02-16 17:27:40,185 - INFO - F1_score: 0.4235
2025-02-16 17:27:40,186 - INFO - Precision: 0.3529
2025-02-16 17:27:40,186 - INFO - Recall: 0.5294
2025-02-16 17:27:40,189 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:27:44,511 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:27:44,512 - INFO - Memory usage after evaluation end: 1925.59 MB
2025-02-16 17:27:44,514 - INFO - Trial 1, Epoch 1: Loss = 1.4730, F1 = 0.4803


                                                                                          

2025-02-16 17:30:31,354 - INFO - Starting model evaluation...
2025-02-16 17:30:31,355 - INFO - Memory usage after evaluation start: 1925.59 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.48it/s]

2025-02-16 17:30:39,310 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:30:39,317 - INFO - 
Metrics for Drama:
2025-02-16 17:30:39,317 - INFO - Accuracy: 0.4253
2025-02-16 17:30:39,318 - INFO - F1_score: 0.4755
2025-02-16 17:30:39,319 - INFO - Precision: 0.3208
2025-02-16 17:30:39,319 - INFO - Recall: 0.9189
2025-02-16 17:30:39,326 - INFO - 
Metrics for Horor:
2025-02-16 17:30:39,327 - INFO - Accuracy: 0.7586
2025-02-16 17:30:39,328 - INFO - F1_score: 0.6316
2025-02-16 17:30:39,329 - INFO - Precision: 0.4737
2025-02-16 17:30:39,329 - INFO - Recall: 0.9474
2025-02-16 17:30:39,336 - INFO - 
Metrics for Komedi:
2025-02-16 17:30:39,336 - INFO - Accuracy: 0.5096
2025-02-16 17:30:39,337 - INFO - F1_score: 0.4336
2025-02-16 17:30:39,337 - INFO - Precision: 0.2917
2025-02-16 17:30:39,338 - INFO - Recall: 0.8448
2025-02-16 17:30:39,344 - INFO - 
Metrics for Laga:
2025-02-16 17:30:39,345 - INFO - Accuracy: 0.7318
2025-02-16 17:30:39,346 - INFO - F1_score: 0.3636
2025-02-16 17:30:39,347 - 




2025-02-16 17:30:39,355 - INFO - 
Metrics for Romantis:
2025-02-16 17:30:39,356 - INFO - Accuracy: 0.8084
2025-02-16 17:30:39,357 - INFO - F1_score: 0.4444
2025-02-16 17:30:39,359 - INFO - Precision: 0.3571
2025-02-16 17:30:39,360 - INFO - Recall: 0.5882
2025-02-16 17:30:39,362 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:30:43,440 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:30:43,442 - INFO - Memory usage after evaluation end: 1916.01 MB
2025-02-16 17:30:43,443 - INFO - Trial 1, Epoch 2: Loss = 1.2482, F1 = 0.4698


                                                                                          

2025-02-16 17:33:29,747 - INFO - Starting model evaluation...
2025-02-16 17:33:29,749 - INFO - Memory usage after evaluation start: 1916.01 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.55it/s]

2025-02-16 17:33:37,670 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:33:37,676 - INFO - 
Metrics for Drama:
2025-02-16 17:33:37,677 - INFO - Accuracy: 0.3640
2025-02-16 17:33:37,678 - INFO - F1_score: 0.4575
2025-02-16 17:33:37,679 - INFO - Precision: 0.3017
2025-02-16 17:33:37,680 - INFO - Recall: 0.9459
2025-02-16 17:33:37,686 - INFO - 
Metrics for Horor:
2025-02-16 17:33:37,686 - INFO - Accuracy: 0.7816
2025-02-16 17:33:37,687 - INFO - F1_score: 0.6545
2025-02-16 17:33:37,688 - INFO - Precision: 0.5000
2025-02-16 17:33:37,689 - INFO - Recall: 0.9474
2025-02-16 17:33:37,695 - INFO - 
Metrics for Komedi:
2025-02-16 17:33:37,696 - INFO - Accuracy: 0.7280
2025-02-16 17:33:37,696 - INFO - F1_score: 0.5298
2025-02-16 17:33:37,697 - INFO - Precision: 0.4301
2025-02-16 17:33:37,699 - INFO - Recall: 0.6897
2025-02-16 17:33:37,704 - INFO - 
Metrics for Laga:
2025-02-16 17:33:37,705 - INFO - Accuracy: 0.7931
2025-02-16 17:33:37,705 - INFO - F1_score: 0.4000
2025-02-16 17:33:37,706 - 




2025-02-16 17:33:37,714 - INFO - F1_score: 0.4554
2025-02-16 17:33:37,715 - INFO - Precision: 0.3433
2025-02-16 17:33:37,715 - INFO - Recall: 0.6765
2025-02-16 17:33:37,718 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:33:41,827 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:33:41,828 - INFO - Memory usage after evaluation end: 1932.70 MB
2025-02-16 17:33:41,829 - INFO - Trial 1, Epoch 3: Loss = 1.1208, F1 = 0.4995


[I 2025-02-16 17:33:42,997] Trial 1 finished with value: 0.49946173269450045 and parameters: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 1 with value: 0.49946173269450045.


2025-02-16 17:33:43,370 - INFO - Trial parameter set: {'batch_size': 4, 'learning_rate': 5e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 17:33:43,374 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 17:33:44,267 - INFO - Model and tokenizer setup completed
2025-02-16 17:33:44,268 - INFO - Setting up data loaders...
2025-02-16 17:33:44,269 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 17:33:44,270 - INFO - Created sampler with 1477 weights
2025-02-16 17:33:44,271 - INFO - Created data loaders with batch size 4


                                                                                          

2025-02-16 17:36:11,861 - INFO - Starting model evaluation...
2025-02-16 17:36:11,863 - INFO - Memory usage after evaluation start: 1934.09 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.42it/s]

2025-02-16 17:36:19,706 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:36:19,714 - INFO - 
Metrics for Drama:
2025-02-16 17:36:19,714 - INFO - Accuracy: 0.5479
2025-02-16 17:36:19,715 - INFO - F1_score: 0.5124
2025-02-16 17:36:19,716 - INFO - Precision: 0.3690
2025-02-16 17:36:19,716 - INFO - Recall: 0.8378
2025-02-16 17:36:19,723 - INFO - 
Metrics for Horor:
2025-02-16 17:36:19,723 - INFO - Accuracy: 0.7548
2025-02-16 17:36:19,724 - INFO - F1_score: 0.6322
2025-02-16 17:36:19,725 - INFO - Precision: 0.4701
2025-02-16 17:36:19,725 - INFO - Recall: 0.9649
2025-02-16 17:36:19,732 - INFO - 
Metrics for Komedi:
2025-02-16 17:36:19,732 - INFO - Accuracy: 0.4291
2025-02-16 17:36:19,733 - INFO - F1_score: 0.4157
2025-02-16 17:36:19,733 - INFO - Precision: 0.2690
2025-02-16 17:36:19,734 - INFO - Recall: 0.9138
2025-02-16 17:36:19,740 - INFO - 
Metrics for Laga:
2025-02-16 17:36:19,740 - INFO - Accuracy: 0.7816
2025-02-16 17:36:19,741 - INFO - F1_score: 0.3736
2025-02-16 17:36:19,742 - 




2025-02-16 17:36:19,749 - INFO - 
Metrics for Romantis:
2025-02-16 17:36:19,750 - INFO - Accuracy: 0.4023
2025-02-16 17:36:19,751 - INFO - F1_score: 0.2844
2025-02-16 17:36:19,751 - INFO - Precision: 0.1685
2025-02-16 17:36:19,752 - INFO - Recall: 0.9118
2025-02-16 17:36:19,755 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:36:23,845 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:36:23,846 - INFO - Memory usage after evaluation end: 1938.79 MB
2025-02-16 17:36:23,847 - INFO - Trial 2, Epoch 1: Loss = 1.5212, F1 = 0.4437


                                                                                          

2025-02-16 17:38:51,888 - INFO - Starting model evaluation...
2025-02-16 17:38:51,890 - INFO - Memory usage after evaluation start: 1938.79 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.43it/s]

2025-02-16 17:38:59,722 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:38:59,728 - INFO - 
Metrics for Drama:
2025-02-16 17:38:59,729 - INFO - Accuracy: 0.4904
2025-02-16 17:38:59,729 - INFO - F1_score: 0.4981
2025-02-16 17:38:59,730 - INFO - Precision: 0.3455
2025-02-16 17:38:59,731 - INFO - Recall: 0.8919
2025-02-16 17:38:59,737 - INFO - 
Metrics for Horor:
2025-02-16 17:38:59,738 - INFO - Accuracy: 0.7701
2025-02-16 17:38:59,738 - INFO - F1_score: 0.6429
2025-02-16 17:38:59,740 - INFO - Precision: 0.4865
2025-02-16 17:38:59,740 - INFO - Recall: 0.9474
2025-02-16 17:38:59,746 - INFO - 
Metrics for Komedi:
2025-02-16 17:38:59,747 - INFO - Accuracy: 0.5939
2025-02-16 17:38:59,748 - INFO - F1_score: 0.4592
2025-02-16 17:38:59,749 - INFO - Precision: 0.3261
2025-02-16 17:38:59,750 - INFO - Recall: 0.7759
2025-02-16 17:38:59,755 - INFO - 
Metrics for Laga:
2025-02-16 17:38:59,756 - INFO - Accuracy: 0.7548
2025-02-16 17:38:59,756 - INFO - F1_score: 0.4074
2025-02-16 17:38:59,757 - 




2025-02-16 17:38:59,765 - INFO - Accuracy: 0.5594
2025-02-16 17:38:59,766 - INFO - F1_score: 0.3353
2025-02-16 17:38:59,767 - INFO - Precision: 0.2086
2025-02-16 17:38:59,768 - INFO - Recall: 0.8529
2025-02-16 17:38:59,770 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:39:03,871 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:39:03,872 - INFO - Memory usage after evaluation end: 1955.17 MB
2025-02-16 17:39:03,873 - INFO - Trial 2, Epoch 2: Loss = 1.3426, F1 = 0.4686


                                                                                          

2025-02-16 17:41:31,818 - INFO - Starting model evaluation...
2025-02-16 17:41:31,819 - INFO - Memory usage after evaluation start: 1955.17 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.41it/s]

2025-02-16 17:41:39,671 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:41:39,682 - INFO - 
Metrics for Drama:
2025-02-16 17:41:39,683 - INFO - Accuracy: 0.4215
2025-02-16 17:41:39,684 - INFO - F1_score: 0.4811
2025-02-16 17:41:39,685 - INFO - Precision: 0.3226
2025-02-16 17:41:39,686 - INFO - Recall: 0.9459
2025-02-16 17:41:39,696 - INFO - 
Metrics for Horor:
2025-02-16 17:41:39,697 - INFO - Accuracy: 0.8238
2025-02-16 17:41:39,698 - INFO - F1_score: 0.6933
2025-02-16 17:41:39,699 - INFO - Precision: 0.5591
2025-02-16 17:41:39,701 - INFO - Recall: 0.9123





2025-02-16 17:41:39,713 - INFO - 
Metrics for Komedi:
2025-02-16 17:41:39,714 - INFO - Accuracy: 0.3678
2025-02-16 17:41:39,715 - INFO - F1_score: 0.4043
2025-02-16 17:41:39,716 - INFO - Precision: 0.2557
2025-02-16 17:41:39,718 - INFO - Recall: 0.9655
2025-02-16 17:41:39,728 - INFO - 
Metrics for Laga:
2025-02-16 17:41:39,729 - INFO - Accuracy: 0.7816
2025-02-16 17:41:39,730 - INFO - F1_score: 0.4356
2025-02-16 17:41:39,732 - INFO - Precision: 0.3492
2025-02-16 17:41:39,733 - INFO - Recall: 0.5789
2025-02-16 17:41:39,740 - INFO - 
Metrics for Romantis:
2025-02-16 17:41:39,741 - INFO - Accuracy: 0.8046
2025-02-16 17:41:39,742 - INFO - F1_score: 0.4742
2025-02-16 17:41:39,744 - INFO - Precision: 0.3651
2025-02-16 17:41:39,744 - INFO - Recall: 0.6765
2025-02-16 17:41:39,746 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:41:43,927 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:4

[I 2025-02-16 17:41:45,308] Trial 2 finished with value: 0.49772709762695355 and parameters: {'batch_size': 4, 'learning_rate': 5e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}. Best is trial 1 with value: 0.49946173269450045.


2025-02-16 17:41:45,776 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}
2025-02-16 17:41:45,781 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 17:41:46,658 - INFO - Model and tokenizer setup completed
2025-02-16 17:41:46,660 - INFO - Setting up data loaders...
2025-02-16 17:41:46,661 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 17:41:46,663 - INFO - Created sampler with 1477 weights
2025-02-16 17:41:46,664 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 17:44:04,741 - INFO - Starting model evaluation...
2025-02-16 17:44:04,743 - INFO - Memory usage after evaluation start: 2142.91 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.49it/s]

2025-02-16 17:44:12,097 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:44:12,103 - INFO - 
Metrics for Drama:
2025-02-16 17:44:12,104 - INFO - Accuracy: 0.4636
2025-02-16 17:44:12,104 - INFO - F1_score: 0.4815
2025-02-16 17:44:12,106 - INFO - Precision: 0.3316
2025-02-16 17:44:12,107 - INFO - Recall: 0.8784
2025-02-16 17:44:12,112 - INFO - 
Metrics for Horor:
2025-02-16 17:44:12,113 - INFO - Accuracy: 0.7433
2025-02-16 17:44:12,113 - INFO - F1_score: 0.5732
2025-02-16 17:44:12,114 - INFO - Precision: 0.4500
2025-02-16 17:44:12,116 - INFO - Recall: 0.7895
2025-02-16 17:44:12,121 - INFO - 
Metrics for Komedi:
2025-02-16 17:44:12,122 - INFO - Accuracy: 0.3678
2025-02-16 17:44:12,122 - INFO - F1_score: 0.4000
2025-02-16 17:44:12,123 - INFO - Precision: 0.2535
2025-02-16 17:44:12,124 - INFO - Recall: 0.9483
2025-02-16 17:44:12,130 - INFO - 
Metrics for Laga:
2025-02-16 17:44:12,130 - INFO - Accuracy: 0.5057
2025-02-16 17:44:12,131 - INFO - F1_score: 0.3385
2025-02-16 17:44:12,132 - 




2025-02-16 17:44:16,084 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:44:16,086 - INFO - Memory usage after evaluation end: 2146.66 MB
2025-02-16 17:44:16,086 - INFO - Trial 3, Epoch 1: Loss = 1.5504, F1 = 0.4317


                                                                                          

2025-02-16 17:46:34,179 - INFO - Starting model evaluation...
2025-02-16 17:46:34,181 - INFO - Memory usage after evaluation start: 2146.66 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 17:46:41,555 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:46:41,561 - INFO - 
Metrics for Drama:
2025-02-16 17:46:41,561 - INFO - Accuracy: 0.4828
2025-02-16 17:46:41,562 - INFO - F1_score: 0.4867
2025-02-16 17:46:41,563 - INFO - Precision: 0.3386
2025-02-16 17:46:41,563 - INFO - Recall: 0.8649
2025-02-16 17:46:41,569 - INFO - 
Metrics for Horor:
2025-02-16 17:46:41,570 - INFO - Accuracy: 0.7280
2025-02-16 17:46:41,570 - INFO - F1_score: 0.6034
2025-02-16 17:46:41,572 - INFO - Precision: 0.4426
2025-02-16 17:46:41,572 - INFO - Recall: 0.9474
2025-02-16 17:46:41,578 - INFO - 
Metrics for Komedi:
2025-02-16 17:46:41,579 - INFO - Accuracy: 0.6284
2025-02-16 17:46:41,579 - INFO - F1_score: 0.4757
2025-02-16 17:46:41,580 - INFO - Precision: 0.3465
2025-02-16 17:46:41,581 - INFO - Recall: 0.7586
2025-02-16 17:46:41,587 - INFO - 
Metrics for Laga:
2025-02-16 17:46:41,588 - INFO - Accuracy: 0.6743
2025-02-16 17:46:41,588 - INFO - F1_score: 0.3796
2025-02-16 17:46:41,589 - 




2025-02-16 17:46:45,606 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:46:45,607 - INFO - Memory usage after evaluation end: 2168.07 MB
2025-02-16 17:46:45,608 - INFO - Trial 3, Epoch 2: Loss = 1.4210, F1 = 0.4736


                                                                                          

2025-02-16 17:49:03,907 - INFO - Starting model evaluation...
2025-02-16 17:49:03,908 - INFO - Memory usage after evaluation start: 2168.07 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 17:49:11,274 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:49:11,281 - INFO - 
Metrics for Drama:
2025-02-16 17:49:11,281 - INFO - Accuracy: 0.4330
2025-02-16 17:49:11,282 - INFO - F1_score: 0.4638
2025-02-16 17:49:11,283 - INFO - Precision: 0.3168
2025-02-16 17:49:11,283 - INFO - Recall: 0.8649
2025-02-16 17:49:11,289 - INFO - 
Metrics for Horor:
2025-02-16 17:49:11,290 - INFO - Accuracy: 0.6973
2025-02-16 17:49:11,291 - INFO - F1_score: 0.5820
2025-02-16 17:49:11,292 - INFO - Precision: 0.4167
2025-02-16 17:49:11,292 - INFO - Recall: 0.9649
2025-02-16 17:49:11,299 - INFO - 
Metrics for Komedi:
2025-02-16 17:49:11,299 - INFO - Accuracy: 0.5134
2025-02-16 17:49:11,300 - INFO - F1_score: 0.4549
2025-02-16 17:49:11,301 - INFO - Precision: 0.3029
2025-02-16 17:49:11,302 - INFO - Recall: 0.9138
2025-02-16 17:49:11,307 - INFO - 
Metrics for Laga:
2025-02-16 17:49:11,308 - INFO - Accuracy: 0.7816
2025-02-16 17:49:11,309 - INFO - F1_score: 0.4242
2025-02-16 17:49:11,310 - 




2025-02-16 17:49:15,280 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:49:15,281 - INFO - Memory usage after evaluation end: 2158.01 MB
2025-02-16 17:49:15,282 - INFO - Trial 3, Epoch 3: Loss = 1.2760, F1 = 0.4757


[I 2025-02-16 17:49:16,793] Trial 3 finished with value: 0.475658015569193 and parameters: {'batch_size': 8, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 1 with value: 0.49946173269450045.


2025-02-16 17:49:17,275 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 5e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}
2025-02-16 17:49:17,280 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 17:49:18,170 - INFO - Model and tokenizer setup completed
2025-02-16 17:49:18,171 - INFO - Setting up data loaders...
2025-02-16 17:49:18,172 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 17:49:18,175 - INFO - Created sampler with 1477 weights
2025-02-16 17:49:18,176 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 17:51:36,331 - INFO - Starting model evaluation...
2025-02-16 17:51:36,332 - INFO - Memory usage after evaluation start: 2207.48 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.49it/s]

2025-02-16 17:51:43,686 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:51:43,693 - INFO - 
Metrics for Drama:
2025-02-16 17:51:43,694 - INFO - Accuracy: 0.5670
2025-02-16 17:51:43,694 - INFO - F1_score: 0.4978
2025-02-16 17:51:43,695 - INFO - Precision: 0.3709
2025-02-16 17:51:43,697 - INFO - Recall: 0.7568
2025-02-16 17:51:43,702 - INFO - 
Metrics for Horor:
2025-02-16 17:51:43,703 - INFO - Accuracy: 0.5172
2025-02-16 17:51:43,704 - INFO - F1_score: 0.4375
2025-02-16 17:51:43,705 - INFO - Precision: 0.2934
2025-02-16 17:51:43,707 - INFO - Recall: 0.8596
2025-02-16 17:51:43,714 - INFO - 
Metrics for Komedi:
2025-02-16 17:51:43,715 - INFO - Accuracy: 0.5249
2025-02-16 17:51:43,716 - INFO - F1_score: 0.3981
2025-02-16 17:51:43,718 - INFO - Precision: 0.2770
2025-02-16 17:51:43,719 - INFO - Recall: 0.7069
2025-02-16 17:51:43,726 - INFO - 
Metrics for Laga:
2025-02-16 17:51:43,730 - INFO - Accuracy: 0.4100
2025-02-16 17:51:43,731 - INFO - F1_score: 0.2596
2025-02-16 17:51:43,732 - 




2025-02-16 17:51:43,745 - INFO - Recall: 0.6765
2025-02-16 17:51:43,746 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 17:51:47,686 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:51:47,687 - INFO - Memory usage after evaluation end: 2211.22 MB
2025-02-16 17:51:47,688 - INFO - Trial 4, Epoch 1: Loss = 1.5618, F1 = 0.3966


                                                                                          

2025-02-16 17:54:06,133 - INFO - Starting model evaluation...
2025-02-16 17:54:06,135 - INFO - Memory usage after evaluation start: 2211.22 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 17:54:13,502 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:54:13,508 - INFO - 
Metrics for Drama:
2025-02-16 17:54:13,509 - INFO - Accuracy: 0.6360
2025-02-16 17:54:13,509 - INFO - F1_score: 0.5226
2025-02-16 17:54:13,510 - INFO - Precision: 0.4160
2025-02-16 17:54:13,511 - INFO - Recall: 0.7027
2025-02-16 17:54:13,518 - INFO - 
Metrics for Horor:
2025-02-16 17:54:13,518 - INFO - Accuracy: 0.6552
2025-02-16 17:54:13,519 - INFO - F1_score: 0.5500
2025-02-16 17:54:13,520 - INFO - Precision: 0.3846
2025-02-16 17:54:13,521 - INFO - Recall: 0.9649
2025-02-16 17:54:13,527 - INFO - 
Metrics for Komedi:
2025-02-16 17:54:13,527 - INFO - Accuracy: 0.4828
2025-02-16 17:54:13,529 - INFO - F1_score: 0.4304
2025-02-16 17:54:13,529 - INFO - Precision: 0.2849
2025-02-16 17:54:13,530 - INFO - Recall: 0.8793
2025-02-16 17:54:13,536 - INFO - 
Metrics for Laga:
2025-02-16 17:54:13,537 - INFO - Accuracy: 0.7011
2025-02-16 17:54:13,538 - INFO - F1_score: 0.3710
2025-02-16 17:54:13,538 - 




2025-02-16 17:54:17,601 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:54:17,603 - INFO - Memory usage after evaluation end: 2232.68 MB
2025-02-16 17:54:17,604 - INFO - Trial 4, Epoch 2: Loss = 1.4419, F1 = 0.4548


                                                                                          

2025-02-16 17:56:35,899 - INFO - Starting model evaluation...
2025-02-16 17:56:35,900 - INFO - Memory usage after evaluation start: 2232.68 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 17:56:43,267 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:56:43,273 - INFO - 
Metrics for Drama:
2025-02-16 17:56:43,274 - INFO - Accuracy: 0.5632
2025-02-16 17:56:43,274 - INFO - F1_score: 0.5210
2025-02-16 17:56:43,275 - INFO - Precision: 0.3780
2025-02-16 17:56:43,275 - INFO - Recall: 0.8378
2025-02-16 17:56:43,281 - INFO - 
Metrics for Horor:
2025-02-16 17:56:43,282 - INFO - Accuracy: 0.8429
2025-02-16 17:56:43,283 - INFO - F1_score: 0.7211
2025-02-16 17:56:43,283 - INFO - Precision: 0.5889
2025-02-16 17:56:43,284 - INFO - Recall: 0.9298
2025-02-16 17:56:43,290 - INFO - 
Metrics for Komedi:
2025-02-16 17:56:43,291 - INFO - Accuracy: 0.5019
2025-02-16 17:56:43,292 - INFO - F1_score: 0.4298
2025-02-16 17:56:43,293 - INFO - Precision: 0.2882
2025-02-16 17:56:43,294 - INFO - Recall: 0.8448
2025-02-16 17:56:43,299 - INFO - 
Metrics for Laga:
2025-02-16 17:56:43,300 - INFO - Accuracy: 0.6169
2025-02-16 17:56:43,300 - INFO - F1_score: 0.3421
2025-02-16 17:56:43,301 - 




2025-02-16 17:56:47,307 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:56:47,309 - INFO - Memory usage after evaluation end: 2233.51 MB
2025-02-16 17:56:47,310 - INFO - Trial 4, Epoch 3: Loss = 1.3503, F1 = 0.4718


[I 2025-02-16 17:56:48,945] Trial 4 finished with value: 0.47177084990075874 and parameters: {'batch_size': 8, 'learning_rate': 5e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 1 with value: 0.49946173269450045.


2025-02-16 17:56:49,448 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 17:56:49,452 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 17:56:50,349 - INFO - Model and tokenizer setup completed
2025-02-16 17:56:50,350 - INFO - Setting up data loaders...
2025-02-16 17:56:50,351 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 17:56:50,353 - INFO - Created sampler with 1477 weights
2025-02-16 17:56:50,354 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 17:59:08,608 - INFO - Starting model evaluation...
2025-02-16 17:59:08,609 - INFO - Memory usage after evaluation start: 2234.84 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 17:59:15,979 - INFO - 
Per-genre Performance Metrics:
2025-02-16 17:59:15,985 - INFO - 
Metrics for Drama:
2025-02-16 17:59:15,986 - INFO - Accuracy: 0.6284
2025-02-16 17:59:15,986 - INFO - F1_score: 0.5403
2025-02-16 17:59:15,988 - INFO - Precision: 0.4161
2025-02-16 17:59:15,989 - INFO - Recall: 0.7703
2025-02-16 17:59:15,994 - INFO - 
Metrics for Horor:
2025-02-16 17:59:15,995 - INFO - Accuracy: 0.7471
2025-02-16 17:59:15,996 - INFO - F1_score: 0.6207
2025-02-16 17:59:15,997 - INFO - Precision: 0.4615
2025-02-16 17:59:15,998 - INFO - Recall: 0.9474
2025-02-16 17:59:16,004 - INFO - 
Metrics for Komedi:
2025-02-16 17:59:16,004 - INFO - Accuracy: 0.7471
2025-02-16 17:59:16,005 - INFO - F1_score: 0.3774
2025-02-16 17:59:16,006 - INFO - Precision: 0.4167
2025-02-16 17:59:16,007 - INFO - Recall: 0.3448
2025-02-16 17:59:16,013 - INFO - 
Metrics for Laga:
2025-02-16 17:59:16,014 - INFO - Accuracy: 0.6897
2025-02-16 17:59:16,014 - INFO - F1_score: 0.3910
2025-02-16 17:59:16,015 - 




2025-02-16 17:59:20,466 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 17:59:20,467 - INFO - Memory usage after evaluation end: 2239.50 MB
2025-02-16 17:59:20,469 - INFO - Trial 5, Epoch 1: Loss = 1.5273, F1 = 0.4493


                                                                                          

2025-02-16 18:01:39,028 - INFO - Starting model evaluation...
2025-02-16 18:01:39,030 - INFO - Memory usage after evaluation start: 2239.50 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 18:01:46,404 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:01:46,411 - INFO - 
Metrics for Drama:
2025-02-16 18:01:46,411 - INFO - Accuracy: 0.6858
2025-02-16 18:01:46,412 - INFO - F1_score: 0.5638
2025-02-16 18:01:46,413 - INFO - Precision: 0.4649
2025-02-16 18:01:46,414 - INFO - Recall: 0.7162
2025-02-16 18:01:46,420 - INFO - 
Metrics for Horor:
2025-02-16 18:01:46,420 - INFO - Accuracy: 0.6935
2025-02-16 18:01:46,421 - INFO - F1_score: 0.5789
2025-02-16 18:01:46,422 - INFO - Precision: 0.4135
2025-02-16 18:01:46,423 - INFO - Recall: 0.9649
2025-02-16 18:01:46,429 - INFO - 
Metrics for Komedi:
2025-02-16 18:01:46,429 - INFO - Accuracy: 0.5326
2025-02-16 18:01:46,431 - INFO - F1_score: 0.4602
2025-02-16 18:01:46,432 - INFO - Precision: 0.3095
2025-02-16 18:01:46,433 - INFO - Recall: 0.8966
2025-02-16 18:01:46,438 - INFO - 
Metrics for Laga:
2025-02-16 18:01:46,438 - INFO - Accuracy: 0.7778
2025-02-16 18:01:46,439 - INFO - F1_score: 0.4630
2025-02-16 18:01:46,440 - 




2025-02-16 18:01:50,636 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:01:50,637 - INFO - Memory usage after evaluation end: 2256.05 MB
2025-02-16 18:01:50,638 - INFO - Trial 5, Epoch 2: Loss = 1.3212, F1 = 0.4989


                                                                                          

2025-02-16 18:04:09,085 - INFO - Starting model evaluation...
2025-02-16 18:04:09,087 - INFO - Memory usage after evaluation start: 2256.05 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 18:04:16,458 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:04:16,463 - INFO - 
Metrics for Drama:
2025-02-16 18:04:16,464 - INFO - Accuracy: 0.5824
2025-02-16 18:04:16,465 - INFO - F1_score: 0.5439
2025-02-16 18:04:16,466 - INFO - Precision: 0.3939
2025-02-16 18:04:16,467 - INFO - Recall: 0.8784
2025-02-16 18:04:16,473 - INFO - 
Metrics for Horor:
2025-02-16 18:04:16,473 - INFO - Accuracy: 0.8391
2025-02-16 18:04:16,474 - INFO - F1_score: 0.7200
2025-02-16 18:04:16,475 - INFO - Precision: 0.5806
2025-02-16 18:04:16,475 - INFO - Recall: 0.9474
2025-02-16 18:04:16,481 - INFO - 
Metrics for Komedi:
2025-02-16 18:04:16,482 - INFO - Accuracy: 0.6015
2025-02-16 18:04:16,482 - INFO - F1_score: 0.4747
2025-02-16 18:04:16,483 - INFO - Precision: 0.3357
2025-02-16 18:04:16,483 - INFO - Recall: 0.8103
2025-02-16 18:04:16,490 - INFO - 
Metrics for Laga:
2025-02-16 18:04:16,491 - INFO - Accuracy: 0.7165
2025-02-16 18:04:16,491 - INFO - F1_score: 0.4127
2025-02-16 18:04:16,492 - 




2025-02-16 18:04:20,777 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:04:20,778 - INFO - Memory usage after evaluation end: 2280.91 MB
2025-02-16 18:04:20,779 - INFO - Trial 5, Epoch 3: Loss = 1.2059, F1 = 0.5122


[I 2025-02-16 18:04:22,553] Trial 5 finished with value: 0.5122430014825927 and parameters: {'batch_size': 8, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.1}. Best is trial 5 with value: 0.5122430014825927.


2025-02-16 18:04:23,113 - INFO - Trial parameter set: {'batch_size': 4, 'learning_rate': 5e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 18:04:23,118 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:04:24,040 - INFO - Model and tokenizer setup completed
2025-02-16 18:04:24,041 - INFO - Setting up data loaders...
2025-02-16 18:04:24,042 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:04:24,044 - INFO - Created sampler with 1477 weights
2025-02-16 18:04:24,046 - INFO - Created data loaders with batch size 4


                                                                                          

2025-02-16 18:06:51,606 - INFO - Starting model evaluation...
2025-02-16 18:06:51,609 - INFO - Memory usage after evaluation start: 2288.65 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.44it/s]

2025-02-16 18:06:59,429 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:06:59,435 - INFO - 
Metrics for Drama:
2025-02-16 18:06:59,436 - INFO - Accuracy: 0.5019
2025-02-16 18:06:59,437 - INFO - F1_score: 0.5000
2025-02-16 18:06:59,437 - INFO - Precision: 0.3495
2025-02-16 18:06:59,439 - INFO - Recall: 0.8784
2025-02-16 18:06:59,444 - INFO - 
Metrics for Horor:
2025-02-16 18:06:59,445 - INFO - Accuracy: 0.8506
2025-02-16 18:06:59,445 - INFO - F1_score: 0.6723
2025-02-16 18:06:59,446 - INFO - Precision: 0.6452
2025-02-16 18:06:59,447 - INFO - Recall: 0.7018
2025-02-16 18:06:59,453 - INFO - 
Metrics for Komedi:
2025-02-16 18:06:59,454 - INFO - Accuracy: 0.4138
2025-02-16 18:06:59,455 - INFO - F1_score: 0.4183
2025-02-16 18:06:59,456 - INFO - Precision: 0.2683
2025-02-16 18:06:59,456 - INFO - Recall: 0.9483
2025-02-16 18:06:59,462 - INFO - 
Metrics for Laga:
2025-02-16 18:06:59,463 - INFO - Accuracy: 0.3525
2025-02-16 18:06:59,464 - INFO - F1_score: 0.2869
2025-02-16 18:06:59,465 - 




2025-02-16 18:06:59,473 - INFO - Precision: 0.2171
2025-02-16 18:06:59,474 - INFO - Recall: 0.8235
2025-02-16 18:06:59,476 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:07:03,696 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:07:03,698 - INFO - Memory usage after evaluation end: 2310.64 MB
2025-02-16 18:07:03,699 - INFO - Trial 6, Epoch 1: Loss = 1.5199, F1 = 0.4442


                                                                                          

2025-02-16 18:09:31,981 - INFO - Starting model evaluation...
2025-02-16 18:09:31,982 - INFO - Memory usage after evaluation start: 2310.64 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.43it/s]

2025-02-16 18:09:39,819 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:09:39,824 - INFO - 
Metrics for Drama:
2025-02-16 18:09:39,825 - INFO - Accuracy: 0.5364
2025-02-16 18:09:39,826 - INFO - F1_score: 0.4895
2025-02-16 18:09:39,826 - INFO - Precision: 0.3558
2025-02-16 18:09:39,828 - INFO - Recall: 0.7838
2025-02-16 18:09:39,833 - INFO - 
Metrics for Horor:
2025-02-16 18:09:39,834 - INFO - Accuracy: 0.8008
2025-02-16 18:09:39,835 - INFO - F1_score: 0.6750
2025-02-16 18:09:39,835 - INFO - Precision: 0.5243
2025-02-16 18:09:39,836 - INFO - Recall: 0.9474
2025-02-16 18:09:39,842 - INFO - 
Metrics for Komedi:
2025-02-16 18:09:39,843 - INFO - Accuracy: 0.6245
2025-02-16 18:09:39,843 - INFO - F1_score: 0.4948
2025-02-16 18:09:39,844 - INFO - Precision: 0.3529
2025-02-16 18:09:39,845 - INFO - Recall: 0.8276
2025-02-16 18:09:39,851 - INFO - 
Metrics for Laga:
2025-02-16 18:09:39,852 - INFO - Accuracy: 0.6207
2025-02-16 18:09:39,852 - INFO - F1_score: 0.3529
2025-02-16 18:09:39,853 - 




2025-02-16 18:09:39,861 - INFO - Precision: 0.2604
2025-02-16 18:09:39,862 - INFO - Recall: 0.7353
2025-02-16 18:09:39,865 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:09:44,004 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:09:44,006 - INFO - Memory usage after evaluation end: 2339.43 MB
2025-02-16 18:09:44,007 - INFO - Trial 6, Epoch 2: Loss = 1.2975, F1 = 0.4794


                                                                                          

2025-02-16 18:12:12,253 - INFO - Starting model evaluation...
2025-02-16 18:12:12,256 - INFO - Memory usage after evaluation start: 2339.68 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.41it/s]

2025-02-16 18:12:20,104 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:12:20,109 - INFO - 
Metrics for Drama:
2025-02-16 18:12:20,110 - INFO - Accuracy: 0.5172
2025-02-16 18:12:20,111 - INFO - F1_score: 0.5078
2025-02-16 18:12:20,112 - INFO - Precision: 0.3571
2025-02-16 18:12:20,113 - INFO - Recall: 0.8784
2025-02-16 18:12:20,119 - INFO - 
Metrics for Horor:
2025-02-16 18:12:20,119 - INFO - Accuracy: 0.8199
2025-02-16 18:12:20,120 - INFO - F1_score: 0.6968
2025-02-16 18:12:20,120 - INFO - Precision: 0.5510
2025-02-16 18:12:20,121 - INFO - Recall: 0.9474
2025-02-16 18:12:20,127 - INFO - 
Metrics for Komedi:
2025-02-16 18:12:20,128 - INFO - Accuracy: 0.4866
2025-02-16 18:12:20,128 - INFO - F1_score: 0.4370
2025-02-16 18:12:20,129 - INFO - Precision: 0.2889
2025-02-16 18:12:20,130 - INFO - Recall: 0.8966
2025-02-16 18:12:20,136 - INFO - 
Metrics for Laga:
2025-02-16 18:12:20,137 - INFO - Accuracy: 0.6782
2025-02-16 18:12:20,137 - INFO - F1_score: 0.3636
2025-02-16 18:12:20,138 - 




2025-02-16 18:12:20,147 - INFO - Precision: 0.3158
2025-02-16 18:12:20,148 - INFO - Recall: 0.7059
2025-02-16 18:12:20,151 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:12:24,334 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:12:24,336 - INFO - Memory usage after evaluation end: 2364.10 MB
2025-02-16 18:12:24,338 - INFO - Trial 6, Epoch 3: Loss = 1.1415, F1 = 0.4883


[I 2025-02-16 18:12:26,264] Trial 6 finished with value: 0.4883122966928707 and parameters: {'batch_size': 4, 'learning_rate': 5e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 5 with value: 0.5122430014825927.


2025-02-16 18:12:26,879 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 5e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 18:12:26,883 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:12:27,777 - INFO - Model and tokenizer setup completed
2025-02-16 18:12:27,778 - INFO - Setting up data loaders...
2025-02-16 18:12:27,779 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:12:27,781 - INFO - Created sampler with 1477 weights
2025-02-16 18:12:27,782 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 18:14:46,039 - INFO - Starting model evaluation...
2025-02-16 18:14:46,041 - INFO - Memory usage after evaluation start: 2376.29 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.47it/s]

2025-02-16 18:14:53,431 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:14:53,437 - INFO - 
Metrics for Drama:
2025-02-16 18:14:53,438 - INFO - Accuracy: 0.5479
2025-02-16 18:14:53,439 - INFO - F1_score: 0.5042
2025-02-16 18:14:53,440 - INFO - Precision: 0.3659
2025-02-16 18:14:53,441 - INFO - Recall: 0.8108
2025-02-16 18:14:53,446 - INFO - 
Metrics for Horor:
2025-02-16 18:14:53,447 - INFO - Accuracy: 0.7203
2025-02-16 18:14:53,447 - INFO - F1_score: 0.6011
2025-02-16 18:14:53,448 - INFO - Precision: 0.4365
2025-02-16 18:14:53,449 - INFO - Recall: 0.9649
2025-02-16 18:14:53,455 - INFO - 
Metrics for Komedi:
2025-02-16 18:14:53,456 - INFO - Accuracy: 0.3103
2025-02-16 18:14:53,457 - INFO - F1_score: 0.3617
2025-02-16 18:14:53,458 - INFO - Precision: 0.2277
2025-02-16 18:14:53,459 - INFO - Recall: 0.8793
2025-02-16 18:14:53,464 - INFO - 
Metrics for Laga:
2025-02-16 18:14:53,465 - INFO - Accuracy: 0.5249
2025-02-16 18:14:53,466 - INFO - F1_score: 0.3261
2025-02-16 18:14:53,466 - 




2025-02-16 18:14:57,652 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:14:57,654 - INFO - Memory usage after evaluation end: 2395.32 MB
2025-02-16 18:14:57,655 - INFO - Trial 7, Epoch 1: Loss = 1.5323, F1 = 0.4231
2025-02-16 18:14:57,656 - ERROR - Error in trial training: 
2025-02-16 18:14:58,408 - ERROR - Error in optimization objective: 
2025-02-16 18:14:58,409 - ERROR - Error in objective: 


[I 2025-02-16 18:14:58,410] Trial 7 pruned. 


2025-02-16 18:14:59,045 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.15}
2025-02-16 18:14:59,049 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:14:59,883 - INFO - Model and tokenizer setup completed
2025-02-16 18:14:59,885 - INFO - Setting up data loaders...
2025-02-16 18:14:59,886 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:14:59,888 - INFO - Created sampler with 1477 weights
2025-02-16 18:14:59,889 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 18:17:46,137 - INFO - Starting model evaluation...
2025-02-16 18:17:46,139 - INFO - Memory usage after evaluation start: 2643.95 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.54it/s]

2025-02-16 18:17:54,064 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:17:54,070 - INFO - 
Metrics for Drama:
2025-02-16 18:17:54,071 - INFO - Accuracy: 0.7050
2025-02-16 18:17:54,072 - INFO - F1_score: 0.5276
2025-02-16 18:17:54,072 - INFO - Precision: 0.4831
2025-02-16 18:17:54,074 - INFO - Recall: 0.5811
2025-02-16 18:17:54,080 - INFO - 
Metrics for Horor:
2025-02-16 18:17:54,080 - INFO - Accuracy: 0.7356
2025-02-16 18:17:54,081 - INFO - F1_score: 0.6145
2025-02-16 18:17:54,082 - INFO - Precision: 0.4508
2025-02-16 18:17:54,083 - INFO - Recall: 0.9649
2025-02-16 18:17:54,089 - INFO - 
Metrics for Komedi:
2025-02-16 18:17:54,089 - INFO - Accuracy: 0.5824
2025-02-16 18:17:54,090 - INFO - F1_score: 0.4734
2025-02-16 18:17:54,091 - INFO - Precision: 0.3289
2025-02-16 18:17:54,091 - INFO - Recall: 0.8448
2025-02-16 18:17:54,098 - INFO - 
Metrics for Laga:
2025-02-16 18:17:54,098 - INFO - Accuracy: 0.6858
2025-02-16 18:17:54,099 - INFO - F1_score: 0.3881
2025-02-16 18:17:54,099 - 




2025-02-16 18:17:54,107 - INFO - Accuracy: 0.4904
2025-02-16 18:17:54,108 - INFO - F1_score: 0.3179
2025-02-16 18:17:54,109 - INFO - Precision: 0.1925
2025-02-16 18:17:54,110 - INFO - Recall: 0.9118
2025-02-16 18:17:54,112 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:17:58,139 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:17:58,140 - INFO - Memory usage after evaluation end: 2648.62 MB
2025-02-16 18:17:58,141 - INFO - Trial 8, Epoch 1: Loss = 1.4403, F1 = 0.4643


                                                                                          

2025-02-16 18:20:44,901 - INFO - Starting model evaluation...
2025-02-16 18:20:44,903 - INFO - Memory usage after evaluation start: 2648.62 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.51it/s]

2025-02-16 18:20:52,840 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:20:52,846 - INFO - 
Metrics for Drama:
2025-02-16 18:20:52,847 - INFO - Accuracy: 0.3985
2025-02-16 18:20:52,848 - INFO - F1_score: 0.4714
2025-02-16 18:20:52,848 - INFO - Precision: 0.3139
2025-02-16 18:20:52,850 - INFO - Recall: 0.9459
2025-02-16 18:20:52,855 - INFO - 
Metrics for Horor:
2025-02-16 18:20:52,856 - INFO - Accuracy: 0.8736
2025-02-16 18:20:52,857 - INFO - F1_score: 0.7481
2025-02-16 18:20:52,858 - INFO - Precision: 0.6622
2025-02-16 18:20:52,859 - INFO - Recall: 0.8596
2025-02-16 18:20:52,865 - INFO - 
Metrics for Komedi:
2025-02-16 18:20:52,865 - INFO - Accuracy: 0.5785
2025-02-16 18:20:52,866 - INFO - F1_score: 0.4608
2025-02-16 18:20:52,866 - INFO - Precision: 0.3219
2025-02-16 18:20:52,867 - INFO - Recall: 0.8103
2025-02-16 18:20:52,874 - INFO - 
Metrics for Laga:
2025-02-16 18:20:52,874 - INFO - Accuracy: 0.5709
2025-02-16 18:20:52,875 - INFO - F1_score: 0.3488
2025-02-16 18:20:52,875 - 




2025-02-16 18:20:52,883 - INFO - 
Metrics for Romantis:
2025-02-16 18:20:52,884 - INFO - Accuracy: 0.7126
2025-02-16 18:20:52,885 - INFO - F1_score: 0.3697
2025-02-16 18:20:52,885 - INFO - Precision: 0.2588
2025-02-16 18:20:52,887 - INFO - Recall: 0.6471
2025-02-16 18:20:52,889 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:20:56,993 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:20:56,994 - INFO - Memory usage after evaluation end: 2654.28 MB
2025-02-16 18:20:56,995 - INFO - Trial 8, Epoch 2: Loss = 1.2426, F1 = 0.4798


                                                                                          

2025-02-16 18:23:43,737 - INFO - Starting model evaluation...
2025-02-16 18:23:43,739 - INFO - Memory usage after evaluation start: 2654.28 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.53it/s]

2025-02-16 18:23:51,669 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:23:51,675 - INFO - 
Metrics for Drama:
2025-02-16 18:23:51,675 - INFO - Accuracy: 0.5134
2025-02-16 18:23:51,676 - INFO - F1_score: 0.4940
2025-02-16 18:23:51,677 - INFO - Precision: 0.3503
2025-02-16 18:23:51,677 - INFO - Recall: 0.8378
2025-02-16 18:23:51,684 - INFO - 
Metrics for Horor:
2025-02-16 18:23:51,684 - INFO - Accuracy: 0.8161
2025-02-16 18:23:51,685 - INFO - F1_score: 0.6842
2025-02-16 18:23:51,685 - INFO - Precision: 0.5474
2025-02-16 18:23:51,686 - INFO - Recall: 0.9123
2025-02-16 18:23:51,692 - INFO - 
Metrics for Komedi:
2025-02-16 18:23:51,693 - INFO - Accuracy: 0.6130
2025-02-16 18:23:51,693 - INFO - F1_score: 0.4925
2025-02-16 18:23:51,694 - INFO - Precision: 0.3475
2025-02-16 18:23:51,696 - INFO - Recall: 0.8448
2025-02-16 18:23:51,701 - INFO - 
Metrics for Laga:
2025-02-16 18:23:51,701 - INFO - Accuracy: 0.7088
2025-02-16 18:23:51,702 - INFO - F1_score: 0.3667
2025-02-16 18:23:51,703 - 




2025-02-16 18:23:51,712 - INFO - Precision: 0.3250
2025-02-16 18:23:51,713 - INFO - Recall: 0.7647
2025-02-16 18:23:51,715 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:23:55,744 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:23:55,745 - INFO - Memory usage after evaluation end: 2659.96 MB
2025-02-16 18:23:55,746 - INFO - Trial 8, Epoch 3: Loss = 1.1345, F1 = 0.4987


[I 2025-02-16 18:23:57,817] Trial 8 finished with value: 0.49870075195998165 and parameters: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 5 with value: 0.5122430014825927.


2025-02-16 18:23:58,478 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 5e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 18:23:58,481 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:23:59,441 - INFO - Model and tokenizer setup completed
2025-02-16 18:23:59,442 - INFO - Setting up data loaders...
2025-02-16 18:23:59,443 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:23:59,444 - INFO - Created sampler with 1477 weights
2025-02-16 18:23:59,445 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 18:26:17,424 - INFO - Starting model evaluation...
2025-02-16 18:26:17,426 - INFO - Memory usage after evaluation start: 2648.64 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.48it/s]

2025-02-16 18:26:24,804 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:26:24,810 - INFO - 
Metrics for Drama:
2025-02-16 18:26:24,811 - INFO - Accuracy: 0.4636
2025-02-16 18:26:24,812 - INFO - F1_score: 0.4776
2025-02-16 18:26:24,812 - INFO - Precision: 0.3299
2025-02-16 18:26:24,813 - INFO - Recall: 0.8649
2025-02-16 18:26:24,820 - INFO - 
Metrics for Horor:
2025-02-16 18:26:24,820 - INFO - Accuracy: 0.3908
2025-02-16 18:26:24,821 - INFO - F1_score: 0.4133
2025-02-16 18:26:24,822 - INFO - Precision: 0.2617
2025-02-16 18:26:24,823 - INFO - Recall: 0.9825
2025-02-16 18:26:24,829 - INFO - 
Metrics for Komedi:
2025-02-16 18:26:24,829 - INFO - Accuracy: 0.4636
2025-02-16 18:26:24,830 - INFO - F1_score: 0.4167
2025-02-16 18:26:24,831 - INFO - Precision: 0.2747
2025-02-16 18:26:24,832 - INFO - Recall: 0.8621
2025-02-16 18:26:24,838 - INFO - 
Metrics for Laga:
2025-02-16 18:26:24,838 - INFO - Accuracy: 0.5096
2025-02-16 18:26:24,839 - INFO - F1_score: 0.3191
2025-02-16 18:26:24,840 - 




2025-02-16 18:26:29,068 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:26:29,069 - INFO - Memory usage after evaluation end: 2652.23 MB
2025-02-16 18:26:29,071 - INFO - Trial 9, Epoch 1: Loss = 1.5398, F1 = 0.4063
2025-02-16 18:26:29,072 - ERROR - Error in trial training: 
2025-02-16 18:26:29,925 - ERROR - Error in optimization objective: 
2025-02-16 18:26:29,926 - ERROR - Error in objective: 


[I 2025-02-16 18:26:29,927] Trial 9 pruned. 


2025-02-16 18:26:30,674 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 3e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 18:26:30,677 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:26:31,569 - INFO - Model and tokenizer setup completed
2025-02-16 18:26:31,570 - INFO - Setting up data loaders...
2025-02-16 18:26:31,571 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:26:31,573 - INFO - Created sampler with 1477 weights
2025-02-16 18:26:31,574 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 18:28:49,546 - INFO - Starting model evaluation...
2025-02-16 18:28:49,548 - INFO - Memory usage after evaluation start: 2644.56 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.47it/s]

2025-02-16 18:28:56,935 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:28:56,941 - INFO - 
Metrics for Drama:
2025-02-16 18:28:56,942 - INFO - Accuracy: 0.5096
2025-02-16 18:28:56,943 - INFO - F1_score: 0.5039
2025-02-16 18:28:56,944 - INFO - Precision: 0.3533
2025-02-16 18:28:56,945 - INFO - Recall: 0.8784
2025-02-16 18:28:56,951 - INFO - 
Metrics for Horor:
2025-02-16 18:28:56,952 - INFO - Accuracy: 0.7126
2025-02-16 18:28:56,952 - INFO - F1_score: 0.5399
2025-02-16 18:28:56,953 - INFO - Precision: 0.4151
2025-02-16 18:28:56,955 - INFO - Recall: 0.7719
2025-02-16 18:28:56,960 - INFO - 
Metrics for Komedi:
2025-02-16 18:28:56,961 - INFO - Accuracy: 0.5939
2025-02-16 18:28:56,962 - INFO - F1_score: 0.4592
2025-02-16 18:28:56,962 - INFO - Precision: 0.3261
2025-02-16 18:28:56,963 - INFO - Recall: 0.7759
2025-02-16 18:28:56,969 - INFO - 
Metrics for Laga:
2025-02-16 18:28:56,970 - INFO - Accuracy: 0.4559
2025-02-16 18:28:56,970 - INFO - F1_score: 0.2970
2025-02-16 18:28:56,971 - 




2025-02-16 18:29:01,065 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:29:01,067 - INFO - Memory usage after evaluation end: 2649.26 MB
2025-02-16 18:29:01,068 - INFO - Trial 10, Epoch 1: Loss = 1.5632, F1 = 0.4299
2025-02-16 18:29:01,070 - ERROR - Error in trial training: 
2025-02-16 18:29:01,928 - ERROR - Error in optimization objective: 
2025-02-16 18:29:01,929 - ERROR - Error in objective: 


[I 2025-02-16 18:29:01,930] Trial 10 pruned. 


2025-02-16 18:29:02,682 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}
2025-02-16 18:29:02,686 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:29:03,539 - INFO - Model and tokenizer setup completed
2025-02-16 18:29:03,540 - INFO - Setting up data loaders...
2025-02-16 18:29:03,541 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:29:03,543 - INFO - Created sampler with 1477 weights
2025-02-16 18:29:03,545 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 18:31:50,349 - INFO - Starting model evaluation...
2025-02-16 18:31:50,351 - INFO - Memory usage after evaluation start: 2869.50 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.53it/s]

2025-02-16 18:31:58,283 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:31:58,289 - INFO - 
Metrics for Drama:
2025-02-16 18:31:58,290 - INFO - Accuracy: 0.5287
2025-02-16 18:31:58,290 - INFO - F1_score: 0.4896
2025-02-16 18:31:58,291 - INFO - Precision: 0.3533
2025-02-16 18:31:58,293 - INFO - Recall: 0.7973
2025-02-16 18:31:58,298 - INFO - 
Metrics for Horor:
2025-02-16 18:31:58,299 - INFO - Accuracy: 0.8544
2025-02-16 18:31:58,300 - INFO - F1_score: 0.7206
2025-02-16 18:31:58,300 - INFO - Precision: 0.6203
2025-02-16 18:31:58,302 - INFO - Recall: 0.8596
2025-02-16 18:31:58,307 - INFO - 
Metrics for Komedi:
2025-02-16 18:31:58,308 - INFO - Accuracy: 0.3142
2025-02-16 18:31:58,309 - INFO - F1_score: 0.3849
2025-02-16 18:31:58,309 - INFO - Precision: 0.2403
2025-02-16 18:31:58,311 - INFO - Recall: 0.9655
2025-02-16 18:31:58,316 - INFO - 
Metrics for Laga:
2025-02-16 18:31:58,317 - INFO - Accuracy: 0.6513
2025-02-16 18:31:58,317 - INFO - F1_score: 0.3636
2025-02-16 18:31:58,318 - 




2025-02-16 18:31:58,326 - INFO - 
Metrics for Romantis:
2025-02-16 18:31:58,327 - INFO - Accuracy: 0.7088
2025-02-16 18:31:58,328 - INFO - F1_score: 0.4154
2025-02-16 18:31:58,329 - INFO - Precision: 0.2812
2025-02-16 18:31:58,329 - INFO - Recall: 0.7941
2025-02-16 18:31:58,331 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:32:02,447 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:32:02,448 - INFO - Memory usage after evaluation end: 2873.33 MB
2025-02-16 18:32:02,449 - INFO - Trial 11, Epoch 1: Loss = 1.4629, F1 = 0.4748


                                                                                          

2025-02-16 18:34:49,903 - INFO - Starting model evaluation...
2025-02-16 18:34:49,905 - INFO - Memory usage after evaluation start: 2873.33 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.51it/s]

2025-02-16 18:34:57,847 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:34:57,853 - INFO - 
Metrics for Drama:
2025-02-16 18:34:57,854 - INFO - Accuracy: 0.5211
2025-02-16 18:34:57,854 - INFO - F1_score: 0.4898
2025-02-16 18:34:57,855 - INFO - Precision: 0.3509
2025-02-16 18:34:57,856 - INFO - Recall: 0.8108
2025-02-16 18:34:57,862 - INFO - 
Metrics for Horor:
2025-02-16 18:34:57,862 - INFO - Accuracy: 0.8161
2025-02-16 18:34:57,863 - INFO - F1_score: 0.6883
2025-02-16 18:34:57,864 - INFO - Precision: 0.5464
2025-02-16 18:34:57,865 - INFO - Recall: 0.9298
2025-02-16 18:34:57,871 - INFO - 
Metrics for Komedi:
2025-02-16 18:34:57,872 - INFO - Accuracy: 0.3985
2025-02-16 18:34:57,873 - INFO - F1_score: 0.4030
2025-02-16 18:34:57,873 - INFO - Precision: 0.2585
2025-02-16 18:34:57,875 - INFO - Recall: 0.9138
2025-02-16 18:34:57,880 - INFO - 
Metrics for Laga:
2025-02-16 18:34:57,880 - INFO - Accuracy: 0.7816
2025-02-16 18:34:57,881 - INFO - F1_score: 0.4124
2025-02-16 18:34:57,882 - 




2025-02-16 18:34:57,891 - INFO - Precision: 0.2500
2025-02-16 18:34:57,892 - INFO - Recall: 0.7353
2025-02-16 18:34:57,895 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:35:02,025 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:35:02,026 - INFO - Memory usage after evaluation end: 2878.96 MB
2025-02-16 18:35:02,027 - INFO - Trial 11, Epoch 2: Loss = 1.2498, F1 = 0.4733


                                                                                          

2025-02-16 18:37:49,512 - INFO - Starting model evaluation...
2025-02-16 18:37:49,514 - INFO - Memory usage after evaluation start: 2878.96 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.51it/s]

2025-02-16 18:37:57,454 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:37:57,460 - INFO - 
Metrics for Drama:
2025-02-16 18:37:57,461 - INFO - Accuracy: 0.5211
2025-02-16 18:37:57,461 - INFO - F1_score: 0.5098
2025-02-16 18:37:57,462 - INFO - Precision: 0.3591
2025-02-16 18:37:57,464 - INFO - Recall: 0.8784
2025-02-16 18:37:57,470 - INFO - 
Metrics for Horor:
2025-02-16 18:37:57,470 - INFO - Accuracy: 0.7931
2025-02-16 18:37:57,471 - INFO - F1_score: 0.6538
2025-02-16 18:37:57,472 - INFO - Precision: 0.5152
2025-02-16 18:37:57,472 - INFO - Recall: 0.8947
2025-02-16 18:37:57,479 - INFO - 
Metrics for Komedi:
2025-02-16 18:37:57,480 - INFO - Accuracy: 0.6360
2025-02-16 18:37:57,481 - INFO - F1_score: 0.4809
2025-02-16 18:37:57,481 - INFO - Precision: 0.3520
2025-02-16 18:37:57,482 - INFO - Recall: 0.7586
2025-02-16 18:37:57,489 - INFO - 
Metrics for Laga:
2025-02-16 18:37:57,490 - INFO - Accuracy: 0.7510
2025-02-16 18:37:57,491 - INFO - F1_score: 0.4144
2025-02-16 18:37:57,492 - 




2025-02-16 18:37:57,499 - INFO - 
Metrics for Romantis:
2025-02-16 18:37:57,500 - INFO - Accuracy: 0.7318
2025-02-16 18:37:57,500 - INFO - F1_score: 0.4531
2025-02-16 18:37:57,501 - INFO - Precision: 0.3085
2025-02-16 18:37:57,503 - INFO - Recall: 0.8529
2025-02-16 18:37:57,504 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:38:01,575 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:38:01,577 - INFO - Memory usage after evaluation end: 2884.75 MB
2025-02-16 18:38:01,578 - INFO - Trial 11, Epoch 3: Loss = 1.0898, F1 = 0.5024


[I 2025-02-16 18:38:03,961] Trial 11 finished with value: 0.5024127613538173 and parameters: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 5 with value: 0.5122430014825927.


2025-02-16 18:38:04,734 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 18:38:04,738 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:38:05,702 - INFO - Model and tokenizer setup completed
2025-02-16 18:38:05,703 - INFO - Setting up data loaders...
2025-02-16 18:38:05,704 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:38:05,706 - INFO - Created sampler with 1477 weights
2025-02-16 18:38:05,707 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 18:40:52,353 - INFO - Starting model evaluation...
2025-02-16 18:40:52,355 - INFO - Memory usage after evaluation start: 2981.68 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.46it/s]

2025-02-16 18:41:00,317 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:41:00,324 - INFO - 
Metrics for Drama:
2025-02-16 18:41:00,324 - INFO - Accuracy: 0.6513
2025-02-16 18:41:00,325 - INFO - F1_score: 0.5517
2025-02-16 18:41:00,325 - INFO - Precision: 0.4341
2025-02-16 18:41:00,326 - INFO - Recall: 0.7568
2025-02-16 18:41:00,333 - INFO - 
Metrics for Horor:
2025-02-16 18:41:00,333 - INFO - Accuracy: 0.7893
2025-02-16 18:41:00,334 - INFO - F1_score: 0.6541
2025-02-16 18:41:00,334 - INFO - Precision: 0.5098
2025-02-16 18:41:00,335 - INFO - Recall: 0.9123
2025-02-16 18:41:00,341 - INFO - 
Metrics for Komedi:
2025-02-16 18:41:00,342 - INFO - Accuracy: 0.7241
2025-02-16 18:41:00,343 - INFO - F1_score: 0.5610
2025-02-16 18:41:00,343 - INFO - Precision: 0.4340
2025-02-16 18:41:00,344 - INFO - Recall: 0.7931
2025-02-16 18:41:00,350 - INFO - 
Metrics for Laga:
2025-02-16 18:41:00,351 - INFO - Accuracy: 0.7433
2025-02-16 18:41:00,351 - INFO - F1_score: 0.3964
2025-02-16 18:41:00,352 - 




2025-02-16 18:41:00,360 - INFO - F1_score: 0.3759
2025-02-16 18:41:00,361 - INFO - Precision: 0.2525
2025-02-16 18:41:00,362 - INFO - Recall: 0.7353
2025-02-16 18:41:00,365 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:41:04,248 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:41:04,250 - INFO - Memory usage after evaluation end: 2985.18 MB
2025-02-16 18:41:04,251 - INFO - Trial 12, Epoch 1: Loss = 1.4341, F1 = 0.5078


                                                                                          

2025-02-16 18:43:51,977 - INFO - Starting model evaluation...
2025-02-16 18:43:51,980 - INFO - Memory usage after evaluation start: 2985.43 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.45it/s]

2025-02-16 18:43:59,948 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:43:59,954 - INFO - 
Metrics for Drama:
2025-02-16 18:43:59,955 - INFO - Accuracy: 0.5594
2025-02-16 18:43:59,955 - INFO - F1_score: 0.5306
2025-02-16 18:43:59,956 - INFO - Precision: 0.3801
2025-02-16 18:43:59,957 - INFO - Recall: 0.8784
2025-02-16 18:43:59,963 - INFO - 
Metrics for Horor:
2025-02-16 18:43:59,963 - INFO - Accuracy: 0.8161
2025-02-16 18:43:59,964 - INFO - F1_score: 0.6883
2025-02-16 18:43:59,965 - INFO - Precision: 0.5464
2025-02-16 18:43:59,966 - INFO - Recall: 0.9298
2025-02-16 18:43:59,972 - INFO - 
Metrics for Komedi:
2025-02-16 18:43:59,972 - INFO - Accuracy: 0.6475
2025-02-16 18:43:59,973 - INFO - F1_score: 0.5208
2025-02-16 18:43:59,973 - INFO - Precision: 0.3731
2025-02-16 18:43:59,974 - INFO - Recall: 0.8621
2025-02-16 18:43:59,980 - INFO - 
Metrics for Laga:
2025-02-16 18:43:59,980 - INFO - Accuracy: 0.7050
2025-02-16 18:43:59,981 - INFO - F1_score: 0.3937
2025-02-16 18:43:59,982 - 




2025-02-16 18:43:59,989 - INFO - Accuracy: 0.8046
2025-02-16 18:43:59,990 - INFO - F1_score: 0.4632
2025-02-16 18:43:59,991 - INFO - Precision: 0.3607
2025-02-16 18:43:59,991 - INFO - Recall: 0.6471
2025-02-16 18:43:59,994 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:44:03,803 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:44:03,805 - INFO - Memory usage after evaluation end: 2990.93 MB
2025-02-16 18:44:03,806 - INFO - Trial 12, Epoch 2: Loss = 1.1741, F1 = 0.5193


                                                                                          

2025-02-16 18:46:51,272 - INFO - Starting model evaluation...
2025-02-16 18:46:51,274 - INFO - Memory usage after evaluation start: 2991.18 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.50it/s]

2025-02-16 18:46:59,217 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:46:59,223 - INFO - 
Metrics for Drama:
2025-02-16 18:46:59,223 - INFO - Accuracy: 0.6437
2025-02-16 18:46:59,224 - INFO - F1_score: 0.5419
2025-02-16 18:46:59,224 - INFO - Precision: 0.4264
2025-02-16 18:46:59,225 - INFO - Recall: 0.7432
2025-02-16 18:46:59,232 - INFO - 
Metrics for Horor:
2025-02-16 18:46:59,233 - INFO - Accuracy: 0.6858
2025-02-16 18:46:59,233 - INFO - F1_score: 0.5729
2025-02-16 18:46:59,234 - INFO - Precision: 0.4074
2025-02-16 18:46:59,236 - INFO - Recall: 0.9649
2025-02-16 18:46:59,241 - INFO - 
Metrics for Komedi:
2025-02-16 18:46:59,242 - INFO - Accuracy: 0.5900
2025-02-16 18:46:59,242 - INFO - F1_score: 0.4880
2025-02-16 18:46:59,244 - INFO - Precision: 0.3377
2025-02-16 18:46:59,245 - INFO - Recall: 0.8793
2025-02-16 18:46:59,250 - INFO - 
Metrics for Laga:
2025-02-16 18:46:59,250 - INFO - Accuracy: 0.7931
2025-02-16 18:46:59,251 - INFO - F1_score: 0.3077
2025-02-16 18:46:59,252 - 




2025-02-16 18:46:59,261 - INFO - Precision: 0.3871
2025-02-16 18:46:59,263 - INFO - Recall: 0.7059
2025-02-16 18:46:59,264 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:47:03,071 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:47:03,072 - INFO - Memory usage after evaluation end: 2996.68 MB
2025-02-16 18:47:03,074 - INFO - Trial 12, Epoch 3: Loss = 0.9537, F1 = 0.4821


[I 2025-02-16 18:47:05,594] Trial 12 finished with value: 0.5193231897362796 and parameters: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 12 with value: 0.5193231897362796.


2025-02-16 18:47:06,414 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 18:47:06,417 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:47:07,445 - INFO - Model and tokenizer setup completed
2025-02-16 18:47:07,446 - INFO - Setting up data loaders...
2025-02-16 18:47:07,447 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:47:07,449 - INFO - Created sampler with 1477 weights
2025-02-16 18:47:07,450 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 18:49:54,104 - INFO - Starting model evaluation...
2025-02-16 18:49:54,106 - INFO - Memory usage after evaluation start: 3192.65 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.43it/s]

2025-02-16 18:50:02,084 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:50:02,090 - INFO - 
Metrics for Drama:
2025-02-16 18:50:02,091 - INFO - Accuracy: 0.5441
2025-02-16 18:50:02,091 - INFO - F1_score: 0.4936
2025-02-16 18:50:02,092 - INFO - Precision: 0.3602
2025-02-16 18:50:02,093 - INFO - Recall: 0.7838
2025-02-16 18:50:02,099 - INFO - 
Metrics for Horor:
2025-02-16 18:50:02,100 - INFO - Accuracy: 0.7969
2025-02-16 18:50:02,101 - INFO - F1_score: 0.6708
2025-02-16 18:50:02,102 - INFO - Precision: 0.5192
2025-02-16 18:50:02,103 - INFO - Recall: 0.9474
2025-02-16 18:50:02,109 - INFO - 
Metrics for Komedi:
2025-02-16 18:50:02,109 - INFO - Accuracy: 0.5057
2025-02-16 18:50:02,110 - INFO - F1_score: 0.4367
2025-02-16 18:50:02,112 - INFO - Precision: 0.2924
2025-02-16 18:50:02,112 - INFO - Recall: 0.8621
2025-02-16 18:50:02,118 - INFO - 
Metrics for Laga:
2025-02-16 18:50:02,119 - INFO - Accuracy: 0.6667
2025-02-16 18:50:02,120 - INFO - F1_score: 0.3650
2025-02-16 18:50:02,120 - 




2025-02-16 18:50:02,129 - INFO - 
Metrics for Romantis:
2025-02-16 18:50:02,130 - INFO - Accuracy: 0.8046
2025-02-16 18:50:02,130 - INFO - F1_score: 0.4632
2025-02-16 18:50:02,131 - INFO - Precision: 0.3607
2025-02-16 18:50:02,133 - INFO - Recall: 0.6471
2025-02-16 18:50:02,134 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:50:05,982 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:50:05,983 - INFO - Memory usage after evaluation end: 3196.40 MB
2025-02-16 18:50:05,984 - INFO - Trial 13, Epoch 1: Loss = 1.4711, F1 = 0.4858


                                                                                          

2025-02-16 18:52:53,664 - INFO - Starting model evaluation...
2025-02-16 18:52:53,665 - INFO - Memory usage after evaluation start: 3196.40 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.47it/s]

2025-02-16 18:53:01,625 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:53:01,631 - INFO - 
Metrics for Drama:
2025-02-16 18:53:01,631 - INFO - Accuracy: 0.4828
2025-02-16 18:53:01,632 - INFO - F1_score: 0.4788
2025-02-16 18:53:01,633 - INFO - Precision: 0.3351
2025-02-16 18:53:01,634 - INFO - Recall: 0.8378
2025-02-16 18:53:01,639 - INFO - 
Metrics for Horor:
2025-02-16 18:53:01,640 - INFO - Accuracy: 0.7433
2025-02-16 18:53:01,641 - INFO - F1_score: 0.6171
2025-02-16 18:53:01,641 - INFO - Precision: 0.4576
2025-02-16 18:53:01,643 - INFO - Recall: 0.9474
2025-02-16 18:53:01,648 - INFO - 
Metrics for Komedi:
2025-02-16 18:53:01,649 - INFO - Accuracy: 0.6475
2025-02-16 18:53:01,649 - INFO - F1_score: 0.4889
2025-02-16 18:53:01,650 - INFO - Precision: 0.3607
2025-02-16 18:53:01,651 - INFO - Recall: 0.7586
2025-02-16 18:53:01,657 - INFO - 
Metrics for Laga:
2025-02-16 18:53:01,658 - INFO - Accuracy: 0.6782
2025-02-16 18:53:01,658 - INFO - F1_score: 0.3731
2025-02-16 18:53:01,659 - 




2025-02-16 18:53:01,667 - INFO - Precision: 0.4773
2025-02-16 18:53:01,668 - INFO - Recall: 0.6176
2025-02-16 18:53:01,671 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:53:05,485 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:53:05,486 - INFO - Memory usage after evaluation end: 3202.02 MB
2025-02-16 18:53:05,487 - INFO - Trial 13, Epoch 2: Loss = 1.2497, F1 = 0.4993


                                                                                          

2025-02-16 18:55:53,086 - INFO - Starting model evaluation...
2025-02-16 18:55:53,087 - INFO - Memory usage after evaluation start: 3202.27 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.52it/s]

2025-02-16 18:56:01,020 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:56:01,026 - INFO - 
Metrics for Drama:
2025-02-16 18:56:01,027 - INFO - Accuracy: 0.3831
2025-02-16 18:56:01,028 - INFO - F1_score: 0.4615
2025-02-16 18:56:01,028 - INFO - Precision: 0.3067
2025-02-16 18:56:01,030 - INFO - Recall: 0.9324
2025-02-16 18:56:01,035 - INFO - 
Metrics for Horor:
2025-02-16 18:56:01,036 - INFO - Accuracy: 0.7854
2025-02-16 18:56:01,036 - INFO - F1_score: 0.6456
2025-02-16 18:56:01,037 - INFO - Precision: 0.5050
2025-02-16 18:56:01,039 - INFO - Recall: 0.8947
2025-02-16 18:56:01,044 - INFO - 
Metrics for Komedi:
2025-02-16 18:56:01,045 - INFO - Accuracy: 0.6820
2025-02-16 18:56:01,046 - INFO - F1_score: 0.5257
2025-02-16 18:56:01,046 - INFO - Precision: 0.3932
2025-02-16 18:56:01,048 - INFO - Recall: 0.7931
2025-02-16 18:56:01,053 - INFO - 
Metrics for Laga:
2025-02-16 18:56:01,054 - INFO - Accuracy: 0.7011
2025-02-16 18:56:01,055 - INFO - F1_score: 0.3710
2025-02-16 18:56:01,055 - 




2025-02-16 18:56:01,064 - INFO - Precision: 0.4565
2025-02-16 18:56:01,065 - INFO - Recall: 0.6176
2025-02-16 18:56:01,067 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:56:04,889 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:56:04,890 - INFO - Memory usage after evaluation end: 3208.02 MB
2025-02-16 18:56:04,892 - INFO - Trial 13, Epoch 3: Loss = 1.1018, F1 = 0.5058


[I 2025-02-16 18:56:07,528] Trial 13 finished with value: 0.5057580218882791 and parameters: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}. Best is trial 12 with value: 0.5193231897362796.


2025-02-16 18:56:08,396 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 18:56:08,400 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 18:56:09,433 - INFO - Model and tokenizer setup completed
2025-02-16 18:56:09,434 - INFO - Setting up data loaders...
2025-02-16 18:56:09,435 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 18:56:09,437 - INFO - Created sampler with 1477 weights
2025-02-16 18:56:09,439 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 18:58:56,193 - INFO - Starting model evaluation...
2025-02-16 18:58:56,195 - INFO - Memory usage after evaluation start: 3316.40 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.48it/s]

2025-02-16 18:59:04,151 - INFO - 
Per-genre Performance Metrics:
2025-02-16 18:59:04,158 - INFO - 
Metrics for Drama:
2025-02-16 18:59:04,158 - INFO - Accuracy: 0.5709
2025-02-16 18:59:04,159 - INFO - F1_score: 0.5294
2025-02-16 18:59:04,160 - INFO - Precision: 0.3841
2025-02-16 18:59:04,161 - INFO - Recall: 0.8514
2025-02-16 18:59:04,167 - INFO - 
Metrics for Horor:
2025-02-16 18:59:04,167 - INFO - Accuracy: 0.8467
2025-02-16 18:59:04,168 - INFO - F1_score: 0.7101
2025-02-16 18:59:04,169 - INFO - Precision: 0.6049
2025-02-16 18:59:04,170 - INFO - Recall: 0.8596
2025-02-16 18:59:04,176 - INFO - 
Metrics for Komedi:
2025-02-16 18:59:04,177 - INFO - Accuracy: 0.5517
2025-02-16 18:59:04,177 - INFO - F1_score: 0.4658
2025-02-16 18:59:04,178 - INFO - Precision: 0.3168
2025-02-16 18:59:04,179 - INFO - Recall: 0.8793
2025-02-16 18:59:04,185 - INFO - 
Metrics for Laga:
2025-02-16 18:59:04,186 - INFO - Accuracy: 0.7241
2025-02-16 18:59:04,186 - INFO - F1_score: 0.4000
2025-02-16 18:59:04,187 - 




2025-02-16 18:59:04,196 - INFO - 
Metrics for Romantis:
2025-02-16 18:59:04,196 - INFO - Accuracy: 0.5211
2025-02-16 18:59:04,197 - INFO - F1_score: 0.3094
2025-02-16 18:59:04,198 - INFO - Precision: 0.1905
2025-02-16 18:59:04,198 - INFO - Recall: 0.8235
2025-02-16 18:59:04,201 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 18:59:08,044 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 18:59:08,045 - INFO - Memory usage after evaluation end: 3321.03 MB
2025-02-16 18:59:08,047 - INFO - Trial 14, Epoch 1: Loss = 1.4688, F1 = 0.4829


                                                                                          

2025-02-16 19:01:55,674 - INFO - Starting model evaluation...
2025-02-16 19:01:55,676 - INFO - Memory usage after evaluation start: 3321.15 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.51it/s]

2025-02-16 19:02:03,615 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:02:03,620 - INFO - 
Metrics for Drama:
2025-02-16 19:02:03,621 - INFO - Accuracy: 0.4904
2025-02-16 19:02:03,622 - INFO - F1_score: 0.4904
2025-02-16 19:02:03,622 - INFO - Precision: 0.3422
2025-02-16 19:02:03,623 - INFO - Recall: 0.8649
2025-02-16 19:02:03,629 - INFO - 
Metrics for Horor:
2025-02-16 19:02:03,630 - INFO - Accuracy: 0.8199
2025-02-16 19:02:03,631 - INFO - F1_score: 0.6803
2025-02-16 19:02:03,632 - INFO - Precision: 0.5556
2025-02-16 19:02:03,633 - INFO - Recall: 0.8772
2025-02-16 19:02:03,639 - INFO - 
Metrics for Komedi:
2025-02-16 19:02:03,639 - INFO - Accuracy: 0.6092
2025-02-16 19:02:03,640 - INFO - F1_score: 0.4457
2025-02-16 19:02:03,640 - INFO - Precision: 0.3254
2025-02-16 19:02:03,641 - INFO - Recall: 0.7069
2025-02-16 19:02:03,647 - INFO - 
Metrics for Laga:
2025-02-16 19:02:03,648 - INFO - Accuracy: 0.5824
2025-02-16 19:02:03,649 - INFO - F1_score: 0.3394
2025-02-16 19:02:03,650 - 




2025-02-16 19:02:03,658 - INFO - Accuracy: 0.7625
2025-02-16 19:02:03,658 - INFO - F1_score: 0.4259
2025-02-16 19:02:03,659 - INFO - Precision: 0.3108
2025-02-16 19:02:03,661 - INFO - Recall: 0.6765
2025-02-16 19:02:03,663 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:02:07,511 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:02:07,512 - INFO - Memory usage after evaluation end: 3326.65 MB
2025-02-16 19:02:07,513 - INFO - Trial 14, Epoch 2: Loss = 1.1864, F1 = 0.4763


                                                                                          

2025-02-16 19:04:55,285 - INFO - Starting model evaluation...
2025-02-16 19:04:55,286 - INFO - Memory usage after evaluation start: 3326.90 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.54it/s]

2025-02-16 19:05:03,213 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:05:03,218 - INFO - 
Metrics for Drama:
2025-02-16 19:05:03,219 - INFO - Accuracy: 0.5326
2025-02-16 19:05:03,219 - INFO - F1_score: 0.5081
2025-02-16 19:05:03,220 - INFO - Precision: 0.3621
2025-02-16 19:05:03,221 - INFO - Recall: 0.8514
2025-02-16 19:05:03,227 - INFO - 
Metrics for Horor:
2025-02-16 19:05:03,228 - INFO - Accuracy: 0.8391
2025-02-16 19:05:03,228 - INFO - F1_score: 0.7083
2025-02-16 19:05:03,229 - INFO - Precision: 0.5862
2025-02-16 19:05:03,230 - INFO - Recall: 0.8947
2025-02-16 19:05:03,236 - INFO - 
Metrics for Komedi:
2025-02-16 19:05:03,237 - INFO - Accuracy: 0.4789
2025-02-16 19:05:03,237 - INFO - F1_score: 0.4380
2025-02-16 19:05:03,238 - INFO - Precision: 0.2880
2025-02-16 19:05:03,239 - INFO - Recall: 0.9138
2025-02-16 19:05:03,245 - INFO - 
Metrics for Laga:
2025-02-16 19:05:03,245 - INFO - Accuracy: 0.7241
2025-02-16 19:05:03,246 - INFO - F1_score: 0.4000
2025-02-16 19:05:03,247 - 




2025-02-16 19:05:03,256 - INFO - Precision: 0.2917
2025-02-16 19:05:03,257 - INFO - Recall: 0.6176
2025-02-16 19:05:03,260 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:05:07,064 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:05:07,065 - INFO - Memory usage after evaluation end: 3333.03 MB
2025-02-16 19:05:07,066 - INFO - Trial 14, Epoch 3: Loss = 1.0667, F1 = 0.4901
2025-02-16 19:05:07,068 - ERROR - Error in trial training: 
2025-02-16 19:05:08,000 - ERROR - Error in optimization objective: 
2025-02-16 19:05:08,002 - ERROR - Error in objective: 


[I 2025-02-16 19:05:08,003] Trial 14 pruned. 


2025-02-16 19:05:08,936 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 19:05:08,940 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 19:05:10,043 - INFO - Model and tokenizer setup completed
2025-02-16 19:05:10,044 - INFO - Setting up data loaders...
2025-02-16 19:05:10,045 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 19:05:10,047 - INFO - Created sampler with 1477 weights
2025-02-16 19:05:10,048 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 19:07:27,998 - INFO - Starting model evaluation...
2025-02-16 19:07:28,001 - INFO - Memory usage after evaluation start: 3335.04 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.47it/s]

2025-02-16 19:07:35,394 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:07:35,399 - INFO - 
Metrics for Drama:
2025-02-16 19:07:35,400 - INFO - Accuracy: 0.5441
2025-02-16 19:07:35,401 - INFO - F1_score: 0.4979
2025-02-16 19:07:35,402 - INFO - Precision: 0.3620
2025-02-16 19:07:35,402 - INFO - Recall: 0.7973
2025-02-16 19:07:35,408 - INFO - 
Metrics for Horor:
2025-02-16 19:07:35,409 - INFO - Accuracy: 0.6782
2025-02-16 19:07:35,410 - INFO - F1_score: 0.4815
2025-02-16 19:07:35,411 - INFO - Precision: 0.3714
2025-02-16 19:07:35,411 - INFO - Recall: 0.6842
2025-02-16 19:07:35,417 - INFO - 
Metrics for Komedi:
2025-02-16 19:07:35,418 - INFO - Accuracy: 0.6743
2025-02-16 19:07:35,419 - INFO - F1_score: 0.3411
2025-02-16 19:07:35,420 - INFO - Precision: 0.3099
2025-02-16 19:07:35,420 - INFO - Recall: 0.3793
2025-02-16 19:07:35,426 - INFO - 
Metrics for Laga:
2025-02-16 19:07:35,426 - INFO - Accuracy: 0.5019
2025-02-16 19:07:35,427 - INFO - F1_score: 0.3299
2025-02-16 19:07:35,428 - 




2025-02-16 19:07:39,274 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:07:39,276 - INFO - Memory usage after evaluation end: 3338.67 MB
2025-02-16 19:07:39,276 - INFO - Trial 15, Epoch 1: Loss = 1.5864, F1 = 0.4032
2025-02-16 19:07:39,278 - ERROR - Error in trial training: 
2025-02-16 19:07:40,423 - ERROR - Error in optimization objective: 
2025-02-16 19:07:40,424 - ERROR - Error in objective: 


[I 2025-02-16 19:07:40,425] Trial 15 pruned. 


2025-02-16 19:07:41,375 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 19:07:41,379 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 19:07:42,384 - INFO - Model and tokenizer setup completed
2025-02-16 19:07:42,385 - INFO - Setting up data loaders...
2025-02-16 19:07:42,386 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 19:07:42,388 - INFO - Created sampler with 1477 weights
2025-02-16 19:07:42,390 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 19:10:29,052 - INFO - Starting model evaluation...
2025-02-16 19:10:29,054 - INFO - Memory usage after evaluation start: 3340.68 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.50it/s]

2025-02-16 19:10:36,999 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:10:37,006 - INFO - 
Metrics for Drama:
2025-02-16 19:10:37,006 - INFO - Accuracy: 0.5402
2025-02-16 19:10:37,007 - INFO - F1_score: 0.4872
2025-02-16 19:10:37,008 - INFO - Precision: 0.3563
2025-02-16 19:10:37,009 - INFO - Recall: 0.7703
2025-02-16 19:10:37,015 - INFO - 
Metrics for Horor:
2025-02-16 19:10:37,016 - INFO - Accuracy: 0.8046
2025-02-16 19:10:37,016 - INFO - F1_score: 0.6752
2025-02-16 19:10:37,017 - INFO - Precision: 0.5300
2025-02-16 19:10:37,018 - INFO - Recall: 0.9298
2025-02-16 19:10:37,024 - INFO - 
Metrics for Komedi:
2025-02-16 19:10:37,025 - INFO - Accuracy: 0.4521
2025-02-16 19:10:37,026 - INFO - F1_score: 0.4392
2025-02-16 19:10:37,026 - INFO - Precision: 0.2843
2025-02-16 19:10:37,027 - INFO - Recall: 0.9655
2025-02-16 19:10:37,034 - INFO - 
Metrics for Laga:
2025-02-16 19:10:37,034 - INFO - Accuracy: 0.6628
2025-02-16 19:10:37,035 - INFO - F1_score: 0.4133
2025-02-16 19:10:37,036 - 




2025-02-16 19:10:37,043 - INFO - 
Metrics for Romantis:
2025-02-16 19:10:37,044 - INFO - Accuracy: 0.6552
2025-02-16 19:10:37,046 - INFO - F1_score: 0.3662
2025-02-16 19:10:37,046 - INFO - Precision: 0.2407
2025-02-16 19:10:37,048 - INFO - Recall: 0.7647
2025-02-16 19:10:37,049 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:10:40,971 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:10:40,973 - INFO - Memory usage after evaluation end: 3344.43 MB
2025-02-16 19:10:40,974 - INFO - Trial 16, Epoch 1: Loss = 1.4523, F1 = 0.4762


                                                                                          

2025-02-16 19:13:28,796 - INFO - Starting model evaluation...
2025-02-16 19:13:28,798 - INFO - Memory usage after evaluation start: 3344.55 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.53it/s]

2025-02-16 19:13:36,726 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:13:36,732 - INFO - 
Metrics for Drama:
2025-02-16 19:13:36,732 - INFO - Accuracy: 0.5134
2025-02-16 19:13:36,733 - INFO - F1_score: 0.5097
2025-02-16 19:13:36,734 - INFO - Precision: 0.3568
2025-02-16 19:13:36,735 - INFO - Recall: 0.8919
2025-02-16 19:13:36,740 - INFO - 
Metrics for Horor:
2025-02-16 19:13:36,741 - INFO - Accuracy: 0.7050
2025-02-16 19:13:36,742 - INFO - F1_score: 0.5882
2025-02-16 19:13:36,743 - INFO - Precision: 0.4231
2025-02-16 19:13:36,744 - INFO - Recall: 0.9649
2025-02-16 19:13:36,750 - INFO - 
Metrics for Komedi:
2025-02-16 19:13:36,751 - INFO - Accuracy: 0.5900
2025-02-16 19:13:36,751 - INFO - F1_score: 0.4880
2025-02-16 19:13:36,752 - INFO - Precision: 0.3377
2025-02-16 19:13:36,754 - INFO - Recall: 0.8793
2025-02-16 19:13:36,760 - INFO - 
Metrics for Laga:
2025-02-16 19:13:36,761 - INFO - Accuracy: 0.5517
2025-02-16 19:13:36,762 - INFO - F1_score: 0.3314
2025-02-16 19:13:36,762 - 




2025-02-16 19:13:36,770 - INFO - 
Metrics for Romantis:
2025-02-16 19:13:36,771 - INFO - Accuracy: 0.8161
2025-02-16 19:13:36,771 - INFO - F1_score: 0.4419
2025-02-16 19:13:36,772 - INFO - Precision: 0.3654
2025-02-16 19:13:36,773 - INFO - Recall: 0.5588
2025-02-16 19:13:36,776 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:13:40,675 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:13:40,677 - INFO - Memory usage after evaluation end: 3350.05 MB
2025-02-16 19:13:40,677 - INFO - Trial 16, Epoch 2: Loss = 1.1865, F1 = 0.4718


                                                                                          

2025-02-16 19:16:28,382 - INFO - Starting model evaluation...
2025-02-16 19:16:28,384 - INFO - Memory usage after evaluation start: 3350.05 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.51it/s]

2025-02-16 19:16:36,326 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:16:36,333 - INFO - 
Metrics for Drama:
2025-02-16 19:16:36,334 - INFO - Accuracy: 0.4828
2025-02-16 19:16:36,334 - INFO - F1_score: 0.4906
2025-02-16 19:16:36,335 - INFO - Precision: 0.3403
2025-02-16 19:16:36,337 - INFO - Recall: 0.8784
2025-02-16 19:16:36,343 - INFO - 
Metrics for Horor:
2025-02-16 19:16:36,343 - INFO - Accuracy: 0.8161
2025-02-16 19:16:36,344 - INFO - F1_score: 0.6800
2025-02-16 19:16:36,345 - INFO - Precision: 0.5484
2025-02-16 19:16:36,346 - INFO - Recall: 0.8947
2025-02-16 19:16:36,352 - INFO - 
Metrics for Komedi:
2025-02-16 19:16:36,353 - INFO - Accuracy: 0.6284
2025-02-16 19:16:36,354 - INFO - F1_score: 0.5026
2025-02-16 19:16:36,355 - INFO - Precision: 0.3577
2025-02-16 19:16:36,356 - INFO - Recall: 0.8448
2025-02-16 19:16:36,362 - INFO - 
Metrics for Laga:
2025-02-16 19:16:36,363 - INFO - Accuracy: 0.6897
2025-02-16 19:16:36,364 - INFO - F1_score: 0.4088
2025-02-16 19:16:36,365 - 




2025-02-16 19:16:36,374 - INFO - 
Metrics for Romantis:
2025-02-16 19:16:36,375 - INFO - Accuracy: 0.7816
2025-02-16 19:16:36,375 - INFO - F1_score: 0.4571
2025-02-16 19:16:36,376 - INFO - Precision: 0.3380
2025-02-16 19:16:36,376 - INFO - Recall: 0.7059
2025-02-16 19:16:36,379 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:16:40,244 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:16:40,246 - INFO - Memory usage after evaluation end: 3355.80 MB
2025-02-16 19:16:40,247 - INFO - Trial 16, Epoch 3: Loss = 1.0191, F1 = 0.5078


[I 2025-02-16 19:16:43,236] Trial 16 finished with value: 0.50780642430608 and parameters: {'batch_size': 2, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 12 with value: 0.5193231897362796.


2025-02-16 19:16:44,222 - INFO - Trial parameter set: {'batch_size': 2, 'learning_rate': 3e-06, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 19:16:44,226 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 19:16:45,301 - INFO - Model and tokenizer setup completed
2025-02-16 19:16:45,302 - INFO - Setting up data loaders...
2025-02-16 19:16:45,303 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 19:16:45,305 - INFO - Created sampler with 1477 weights
2025-02-16 19:16:45,306 - INFO - Created data loaders with batch size 2


                                                                                          

2025-02-16 19:19:31,966 - INFO - Starting model evaluation...
2025-02-16 19:19:31,968 - INFO - Memory usage after evaluation start: 3357.95 MB


Evaluating: 100%|██████████| 131/131 [00:07<00:00, 16.52it/s]

2025-02-16 19:19:39,904 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:19:39,910 - INFO - 
Metrics for Drama:
2025-02-16 19:19:39,911 - INFO - Accuracy: 0.6513
2025-02-16 19:19:39,911 - INFO - F1_score: 0.5081
2025-02-16 19:19:39,912 - INFO - Precision: 0.4234
2025-02-16 19:19:39,913 - INFO - Recall: 0.6351
2025-02-16 19:19:39,919 - INFO - 
Metrics for Horor:
2025-02-16 19:19:39,920 - INFO - Accuracy: 0.6437
2025-02-16 19:19:39,920 - INFO - F1_score: 0.5419
2025-02-16 19:19:39,922 - INFO - Precision: 0.3767
2025-02-16 19:19:39,922 - INFO - Recall: 0.9649
2025-02-16 19:19:39,928 - INFO - 
Metrics for Komedi:
2025-02-16 19:19:39,929 - INFO - Accuracy: 0.6782
2025-02-16 19:19:39,929 - INFO - F1_score: 0.4085
2025-02-16 19:19:39,931 - INFO - Precision: 0.3452
2025-02-16 19:19:39,932 - INFO - Recall: 0.5000
2025-02-16 19:19:39,937 - INFO - 
Metrics for Laga:
2025-02-16 19:19:39,938 - INFO - Accuracy: 0.5594
2025-02-16 19:19:39,938 - INFO - F1_score: 0.3353
2025-02-16 19:19:39,939 - 




2025-02-16 19:19:39,947 - INFO - 
Metrics for Romantis:
2025-02-16 19:19:39,948 - INFO - Accuracy: 0.7701
2025-02-16 19:19:39,948 - INFO - F1_score: 0.4231
2025-02-16 19:19:39,949 - INFO - Precision: 0.3143
2025-02-16 19:19:39,950 - INFO - Recall: 0.6471
2025-02-16 19:19:39,952 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:19:43,713 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:19:43,715 - INFO - Memory usage after evaluation end: 3361.45 MB
2025-02-16 19:19:43,716 - INFO - Trial 17, Epoch 1: Loss = 1.5410, F1 = 0.4434
2025-02-16 19:19:43,717 - ERROR - Error in trial training: 
2025-02-16 19:19:44,845 - ERROR - Error in optimization objective: 
2025-02-16 19:19:44,846 - ERROR - Error in objective: 


[I 2025-02-16 19:19:44,847] Trial 17 pruned. 


2025-02-16 19:19:45,861 - INFO - Trial parameter set: {'batch_size': 8, 'learning_rate': 8e-06, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.1}
2025-02-16 19:19:45,865 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 19:19:46,837 - INFO - Model and tokenizer setup completed
2025-02-16 19:19:46,838 - INFO - Setting up data loaders...
2025-02-16 19:19:46,839 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 19:19:46,841 - INFO - Created sampler with 1477 weights
2025-02-16 19:19:46,842 - INFO - Created data loaders with batch size 8


                                                                                          

2025-02-16 19:22:05,228 - INFO - Starting model evaluation...
2025-02-16 19:22:05,230 - INFO - Memory usage after evaluation start: 3485.26 MB


Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.47it/s]

2025-02-16 19:22:12,618 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:22:12,624 - INFO - 
Metrics for Drama:
2025-02-16 19:22:12,625 - INFO - Accuracy: 0.5019
2025-02-16 19:22:12,626 - INFO - F1_score: 0.4800
2025-02-16 19:22:12,627 - INFO - Precision: 0.3409
2025-02-16 19:22:12,627 - INFO - Recall: 0.8108
2025-02-16 19:22:12,634 - INFO - 
Metrics for Horor:
2025-02-16 19:22:12,634 - INFO - Accuracy: 0.3640
2025-02-16 19:22:12,635 - INFO - F1_score: 0.4029
2025-02-16 19:22:12,636 - INFO - Precision: 0.2534
2025-02-16 19:22:12,637 - INFO - Recall: 0.9825
2025-02-16 19:22:12,643 - INFO - 
Metrics for Komedi:
2025-02-16 19:22:12,644 - INFO - Accuracy: 0.3716
2025-02-16 19:22:12,644 - INFO - F1_score: 0.3881
2025-02-16 19:22:12,645 - INFO - Precision: 0.2476
2025-02-16 19:22:12,646 - INFO - Recall: 0.8966
2025-02-16 19:22:12,651 - INFO - 
Metrics for Laga:
2025-02-16 19:22:12,652 - INFO - Accuracy: 0.6284
2025-02-16 19:22:12,653 - INFO - F1_score: 0.3217
2025-02-16 19:22:12,653 - 




2025-02-16 19:22:16,604 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:22:16,606 - INFO - Memory usage after evaluation end: 3488.76 MB
2025-02-16 19:22:16,607 - INFO - Trial 18, Epoch 1: Loss = 1.5425, F1 = 0.3812
2025-02-16 19:22:16,609 - ERROR - Error in trial training: 
2025-02-16 19:22:17,790 - ERROR - Error in optimization objective: 
2025-02-16 19:22:17,792 - ERROR - Error in objective: 


[I 2025-02-16 19:22:17,793] Trial 18 pruned. 


2025-02-16 19:22:18,839 - INFO - Trial parameter set: {'batch_size': 4, 'learning_rate': 8e-06, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}
2025-02-16 19:22:18,854 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 19:22:19,917 - INFO - Model and tokenizer setup completed
2025-02-16 19:22:19,918 - INFO - Setting up data loaders...
2025-02-16 19:22:19,919 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 19:22:19,921 - INFO - Created sampler with 1477 weights
2025-02-16 19:22:19,923 - INFO - Created data loaders with batch size 4


                                                                                          

2025-02-16 19:24:48,072 - INFO - Starting model evaluation...
2025-02-16 19:24:48,073 - INFO - Memory usage after evaluation start: 3368.29 MB


Evaluating: 100%|██████████| 66/66 [00:07<00:00,  8.39it/s]

2025-02-16 19:24:55,940 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:24:55,946 - INFO - 
Metrics for Drama:
2025-02-16 19:24:55,947 - INFO - Accuracy: 0.3525
2025-02-16 19:24:55,948 - INFO - F1_score: 0.4566
2025-02-16 19:24:55,948 - INFO - Precision: 0.2996
2025-02-16 19:24:55,950 - INFO - Recall: 0.9595
2025-02-16 19:24:55,955 - INFO - 
Metrics for Horor:
2025-02-16 19:24:55,956 - INFO - Accuracy: 0.8046
2025-02-16 19:24:55,956 - INFO - F1_score: 0.6710
2025-02-16 19:24:55,958 - INFO - Precision: 0.5306
2025-02-16 19:24:55,958 - INFO - Recall: 0.9123
2025-02-16 19:24:55,964 - INFO - 
Metrics for Komedi:
2025-02-16 19:24:55,964 - INFO - Accuracy: 0.7548
2025-02-16 19:24:55,965 - INFO - F1_score: 0.3725
2025-02-16 19:24:55,966 - INFO - Precision: 0.4318
2025-02-16 19:24:55,966 - INFO - Recall: 0.3276
2025-02-16 19:24:55,972 - INFO - 
Metrics for Laga:
2025-02-16 19:24:55,973 - INFO - Accuracy: 0.5479
2025-02-16 19:24:55,974 - INFO - F1_score: 0.3295
2025-02-16 19:24:55,974 - 




2025-02-16 19:24:55,982 - INFO - Accuracy: 0.7318
2025-02-16 19:24:55,982 - INFO - F1_score: 0.4167
2025-02-16 19:24:55,984 - INFO - Precision: 0.2907
2025-02-16 19:24:55,984 - INFO - Recall: 0.7353
2025-02-16 19:24:55,986 - INFO - Generating detailed confusion matrices for each genre...
2025-02-16 19:24:59,846 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:24:59,848 - INFO - Memory usage after evaluation end: 3373.04 MB
2025-02-16 19:24:59,849 - INFO - Trial 19, Epoch 1: Loss = 1.4472, F1 = 0.4493
2025-02-16 19:24:59,851 - ERROR - Error in trial training: 
2025-02-16 19:25:01,017 - ERROR - Error in optimization objective: 
2025-02-16 19:25:01,018 - ERROR - Error in objective: 


[I 2025-02-16 19:25:01,019] Trial 19 pruned. 


2025-02-16 19:25:02,065 - INFO - 
Hyperparameter Optimization Results:
2025-02-16 19:25:02,068 - INFO - Best trial number: 12
2025-02-16 19:25:02,069 - INFO - Best F1-score: 0.5193
2025-02-16 19:25:02,070 - INFO - 
Best hyperparameters:
2025-02-16 19:25:02,071 - INFO - batch_size: 2
2025-02-16 19:25:02,072 - INFO - learning_rate: 8e-06
2025-02-16 19:25:02,073 - INFO - weight_decay: 0.01
2025-02-16 19:25:02,074 - INFO - mixup_prob: 0.2
2025-02-16 19:25:02,075 - INFO - smoothing: 0.1
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido

2025-02-16 19:25:04,638 - INFO - 
Best Hyperparameters found:
2025-02-16 19:25:04,640 - INFO - batch_size: 2
2025-02-16 19:25:04,640 - INFO - learning_rate: 8e-06
2025-02-16 19:25:04,642 - INFO - weight_decay: 0.01
2025-02-16 19:25:04,643 - INFO - mixup_prob: 0.2
2025-02-16 19:25:04,644 - INFO - smoothing: 0.1
2025-02-16 19:25:04,645 - INFO - 
Training final model with optimiz

100%|██████████| 1738/1738 [00:00<00:00, 14700.59it/s]

2025-02-16 19:25:04,812 - INFO - Memory usage after preprocessing: 3383.54 MB
2025-02-16 19:25:04,813 - INFO - 
Dataset statistics:
2025-02-16 19:25:04,813 - INFO - Total samples after preprocessing: 1738
2025-02-16 19:25:04,819 - INFO - Genre 'Drama': 510 samples
2025-02-16 19:25:04,819 - INFO - Genre 'Horor': 349 samples
2025-02-16 19:25:04,820 - INFO - Genre 'Komedi': 374 samples
2025-02-16 19:25:04,820 - INFO - Genre 'Laga': 297 samples
2025-02-16 19:25:04,822 - INFO - Genre 'Romantis': 208 samples
2025-02-16 19:25:04,823 - INFO - 
Training set size: 1477
2025-02-16 19:25:04,824 - INFO - Testing set size: 261
2025-02-16 19:25:04,824 - INFO - Setting up model and tokenizer...



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-16 19:25:05,765 - INFO - Model and tokenizer setup completed
2025-02-16 19:25:05,766 - INFO - Setting up data loaders...
2025-02-16 19:25:05,767 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-16 19:25:05,769 - INFO - Created sampler with 1477 weights
2025-02-16 19:25:05,770 - INFO - Created data loaders with batch size 10


Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]
Epoch 1:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 1:   0%|          | 0/148 [00:01<?, ?it/s, training_loss=1.8541][A
Epoch 1:   1%|          | 1/148 [00:01<03:05,  1.26s/it, training_loss=1.8541][A
Epoch 1:   1%|          | 1/148 [00:02<03:05,  1.26s/it, training_loss=1.6993][A
Epoch 1:   1%|▏         | 2/148 [00:02<02:33,  1.05s/it, training_loss=1.6993][A
Epoch 1:   1%|▏         | 2/148 [00:03<02:33,  1.05s/it, training_loss=1.7109][A
Epoch 1:   2%|▏         | 3/148 [00:03<02:22,  1.02it/s, training_loss=1.7109][A
Epoch 1:   2%|▏         | 3/148 [00:03<02:22,  1.02it/s, training_loss=1.6743][A
Epoch 1:   3%|▎         | 4/148 [00:03<02:17,  1.05it/s, training_loss=1.6743][A
Epoch 1:   3%|▎         | 4/148 [00:04<02:17,  1.05it/s, training_loss=1.6275][A
Epoch 1:   3%|▎         | 5/148 [00:04<02:14,  1.07it/s, training_loss=1.6275][A
Epoch 1:   3%|▎         | 5/148 [00:05<02:14,  1.07it/s, training_loss=1.6878

2025-02-16 19:27:21,810 - INFO - Starting model evaluation...
2025-02-16 19:27:21,811 - INFO - Memory usage after evaluation start: 3569.00 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:07,  3.68it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.64it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.62it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.61it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.61it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.62it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.60it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.60it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:05,  3.59it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.60it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.59it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.59it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.59it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.59it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.59it/s][A
Evaluatin

2025-02-16 19:27:29,079 - INFO - Optimizing classification thresholds...
2025-02-16 19:27:29,094 - INFO - Class 'Drama': Optimal threshold = 0.550, F1 Score = 0.520
2025-02-16 19:27:29,108 - INFO - Class 'Horor': Optimal threshold = 0.550, F1 Score = 0.667
2025-02-16 19:27:29,122 - INFO - Class 'Komedi': Optimal threshold = 0.550, F1 Score = 0.519
2025-02-16 19:27:29,135 - INFO - Class 'Laga': Optimal threshold = 0.450, F1 Score = 0.318
2025-02-16 19:27:29,148 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.430
2025-02-16 19:27:29,171 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:27:29,177 - INFO - 
Metrics for Drama:
2025-02-16 19:27:29,178 - INFO - Accuracy: 0.6743
2025-02-16 19:27:29,178 - INFO - F1_score: 0.5198
2025-02-16 19:27:29,179 - INFO - Precision: 0.4466
2025-02-16 19:27:29,180 - INFO - Recall: 0.6216
2025-02-16 19:27:29,186 - INFO - 
Metrics for Horor:
2025-02-16 19:27:29,187 - INFO - Accuracy: 0.8046
2025-02-16 19:27:29,187 - INFO - F1_score: 0




2025-02-16 19:27:33,078 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:27:33,080 - INFO - Memory usage after evaluation end: 3571.75 MB


Training Progress:   0%|          | 0/100 [02:34<?, ?it/s, Train Loss=1.5694, Val Loss=0.0439, Accuracy=0.7065]

2025-02-16 19:27:40,495 - INFO - New best accuracy: 0.7065
2025-02-16 19:27:41,313 - INFO - New best loss: 0.0439
2025-02-16 19:27:41,903 - INFO - Learning rate: 1e-05


Training Progress:   1%|          | 1/100 [02:36<4:17:36, 156.13s/it, Train Loss=1.5694, Val Loss=0.0439, Accuracy=0.7065]
Epoch 2:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 2:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.5767][A
Epoch 2:   1%|          | 1/148 [00:00<02:12,  1.11it/s, training_loss=1.5767][A
Epoch 2:   1%|          | 1/148 [00:01<02:12,  1.11it/s, training_loss=1.6365][A
Epoch 2:   1%|▏         | 2/148 [00:01<02:12,  1.11it/s, training_loss=1.6365][A
Epoch 2:   1%|▏         | 2/148 [00:02<02:12,  1.11it/s, training_loss=1.3667][A
Epoch 2:   2%|▏         | 3/148 [00:02<02:12,  1.09it/s, training_loss=1.3667][A
Epoch 2:   2%|▏         | 3/148 [00:03<02:12,  1.09it/s, training_loss=1.4103][A
Epoch 2:   3%|▎         | 4/148 [00:03<02:11,  1.09it/s, training_loss=1.4103][A
Epoch 2:   3%|▎         | 4/148 [00:04<02:11,  1.09it/s, training_loss=1.5524][A
Epoch 2:   3%|▎         | 5/148 [00:04<02:10,  1.09it/s, training_loss=1.5524][A
Epoch 2:   3%

2025-02-16 19:29:57,410 - INFO - Starting model evaluation...
2025-02-16 19:29:57,412 - INFO - Memory usage after evaluation start: 3876.13 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:06,  3.72it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.65it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.63it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.63it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.63it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.63it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.63it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.63it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.62it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.62it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.62it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.62it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.61it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.62it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.61it/s][A
Evaluatin

2025-02-16 19:30:04,643 - INFO - Optimizing classification thresholds...
2025-02-16 19:30:04,660 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.502
2025-02-16 19:30:04,675 - INFO - Class 'Horor': Optimal threshold = 0.650, F1 Score = 0.759
2025-02-16 19:30:04,689 - INFO - Class 'Komedi': Optimal threshold = 0.700, F1 Score = 0.544
2025-02-16 19:30:04,702 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.466
2025-02-16 19:30:04,715 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.489
2025-02-16 19:30:04,737 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:30:04,742 - INFO - 
Metrics for Drama:
2025-02-16 19:30:04,743 - INFO - Accuracy: 0.5057
2025-02-16 19:30:04,744 - INFO - F1_score: 0.5019
2025-02-16 19:30:04,745 - INFO - Precision: 0.3514
2025-02-16 19:30:04,746 - INFO - Recall: 0.8784
2025-02-16 19:30:04,752 - INFO - 
Metrics for Horor:
2025-02-16 19:30:04,752 - INFO - Accuracy: 0.8927
2025-02-16 19:30:04,753 - INFO - F1_score: 0




2025-02-16 19:30:08,595 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:30:08,596 - INFO - Memory usage after evaluation end: 3881.75 MB


Training Progress:   1%|          | 1/100 [05:10<4:17:36, 156.13s/it, Train Loss=1.4713, Val Loss=0.0483, Accuracy=0.7663]

2025-02-16 19:30:15,798 - INFO - New best accuracy: 0.7663
2025-02-16 19:30:16,858 - INFO - Learning rate: 1e-05


Training Progress:   2%|▏         | 2/100 [05:11<4:13:53, 155.44s/it, Train Loss=1.4713, Val Loss=0.0483, Accuracy=0.7663]
Epoch 3:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 3:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.4680][A
Epoch 3:   1%|          | 1/148 [00:00<02:12,  1.11it/s, training_loss=1.4680][A
Epoch 3:   1%|          | 1/148 [00:01<02:12,  1.11it/s, training_loss=1.2389][A
Epoch 3:   1%|▏         | 2/148 [00:01<02:12,  1.10it/s, training_loss=1.2389][A
Epoch 3:   1%|▏         | 2/148 [00:02<02:12,  1.10it/s, training_loss=1.6273][A
Epoch 3:   2%|▏         | 3/148 [00:02<02:11,  1.10it/s, training_loss=1.6273][A
Epoch 3:   2%|▏         | 3/148 [00:03<02:11,  1.10it/s, training_loss=1.7014][A
Epoch 3:   3%|▎         | 4/148 [00:03<02:10,  1.10it/s, training_loss=1.7014][A
Epoch 3:   3%|▎         | 4/148 [00:04<02:10,  1.10it/s, training_loss=1.3102][A
Epoch 3:   3%|▎         | 5/148 [00:04<02:10,  1.10it/s, training_loss=1.3102][A
Epoch 3:   3%

2025-02-16 19:32:32,049 - INFO - Starting model evaluation...
2025-02-16 19:32:32,050 - INFO - Memory usage after evaluation start: 3882.00 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:06,  3.72it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.65it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.63it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.62it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.63it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.63it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.61it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.61it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.62it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.61it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.60it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.60it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.61it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.61it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.60it/s][A
Evaluatin

2025-02-16 19:32:39,298 - INFO - Optimizing classification thresholds...
2025-02-16 19:32:39,314 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.540
2025-02-16 19:32:39,329 - INFO - Class 'Horor': Optimal threshold = 0.750, F1 Score = 0.741
2025-02-16 19:32:39,345 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.606
2025-02-16 19:32:39,361 - INFO - Class 'Laga': Optimal threshold = 0.500, F1 Score = 0.407
2025-02-16 19:32:39,377 - INFO - Class 'Romantis': Optimal threshold = 0.650, F1 Score = 0.493
2025-02-16 19:32:39,405 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:32:39,411 - INFO - 
Metrics for Drama:
2025-02-16 19:32:39,412 - INFO - Accuracy: 0.6475
2025-02-16 19:32:39,413 - INFO - F1_score: 0.5400
2025-02-16 19:32:39,414 - INFO - Precision: 0.4286
2025-02-16 19:32:39,414 - INFO - Recall: 0.7297
2025-02-16 19:32:39,422 - INFO - 
Metrics for Horor:
2025-02-16 19:32:39,423 - INFO - Accuracy: 0.8659
2025-02-16 19:32:39,423 - INFO - F1_score: 0




2025-02-16 19:32:43,338 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:32:43,340 - INFO - Memory usage after evaluation end: 3887.63 MB


Training Progress:   2%|▏         | 2/100 [07:44<4:13:53, 155.44s/it, Train Loss=1.4407, Val Loss=0.0481, Accuracy=0.7785]

2025-02-16 19:32:50,578 - INFO - New best accuracy: 0.7785
2025-02-16 19:32:51,529 - INFO - Learning rate: 1e-05


Training Progress:   3%|▎         | 3/100 [07:45<4:10:43, 155.09s/it, Train Loss=1.4407, Val Loss=0.0481, Accuracy=0.7785]
Epoch 4:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 4:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.3065][A
Epoch 4:   1%|          | 1/148 [00:00<02:13,  1.11it/s, training_loss=1.3065][A
Epoch 4:   1%|          | 1/148 [00:01<02:13,  1.11it/s, training_loss=1.0766][A
Epoch 4:   1%|▏         | 2/148 [00:01<02:12,  1.10it/s, training_loss=1.0766][A
Epoch 4:   1%|▏         | 2/148 [00:02<02:12,  1.10it/s, training_loss=1.7053][A
Epoch 4:   2%|▏         | 3/148 [00:02<02:12,  1.10it/s, training_loss=1.7053][A
Epoch 4:   2%|▏         | 3/148 [00:03<02:12,  1.10it/s, training_loss=1.3649][A
Epoch 4:   3%|▎         | 4/148 [00:03<02:11,  1.09it/s, training_loss=1.3649][A
Epoch 4:   3%|▎         | 4/148 [00:04<02:11,  1.09it/s, training_loss=1.3751][A
Epoch 4:   3%|▎         | 5/148 [00:04<02:11,  1.09it/s, training_loss=1.3751][A
Epoch 4:   3%

2025-02-16 19:35:06,858 - INFO - Starting model evaluation...
2025-02-16 19:35:06,860 - INFO - Memory usage after evaluation start: 3887.88 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:07,  3.69it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.65it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.63it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.62it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.62it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.62it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.61it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.62it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.62it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.63it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.61it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.61it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.62it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.61it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.60it/s][A
Evaluatin

2025-02-16 19:35:14,100 - INFO - Optimizing classification thresholds...
2025-02-16 19:35:14,114 - INFO - Class 'Drama': Optimal threshold = 0.650, F1 Score = 0.520
2025-02-16 19:35:14,128 - INFO - Class 'Horor': Optimal threshold = 0.600, F1 Score = 0.742
2025-02-16 19:35:14,142 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.578
2025-02-16 19:35:14,156 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.393
2025-02-16 19:35:14,172 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.514
2025-02-16 19:35:14,196 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:35:14,201 - INFO - 
Metrics for Drama:
2025-02-16 19:35:14,202 - INFO - Accuracy: 0.6820
2025-02-16 19:35:14,202 - INFO - F1_score: 0.5202
2025-02-16 19:35:14,203 - INFO - Precision: 0.4545
2025-02-16 19:35:14,204 - INFO - Recall: 0.6081
2025-02-16 19:35:14,211 - INFO - 
Metrics for Horor:
2025-02-16 19:35:14,211 - INFO - Accuracy: 0.8774
2025-02-16 19:35:14,212 - INFO - F1_score: 0




2025-02-16 19:35:18,082 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:35:18,084 - INFO - Memory usage after evaluation end: 3893.38 MB


Training Progress:   3%|▎         | 3/100 [10:19<4:10:43, 155.09s/it, Train Loss=1.3552, Val Loss=0.0469, Accuracy=0.7839]

2025-02-16 19:35:25,291 - INFO - New best accuracy: 0.7839
2025-02-16 19:35:26,393 - INFO - Learning rate: 5e-06


Training Progress:   4%|▍         | 4/100 [10:20<4:07:59, 155.00s/it, Train Loss=1.3552, Val Loss=0.0469, Accuracy=0.7839]
Epoch 5:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 5:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.2095][A
Epoch 5:   1%|          | 1/148 [00:00<02:15,  1.09it/s, training_loss=1.2095][A
Epoch 5:   1%|          | 1/148 [00:01<02:15,  1.09it/s, training_loss=1.5343][A
Epoch 5:   1%|▏         | 2/148 [00:01<02:13,  1.10it/s, training_loss=1.5343][A
Epoch 5:   1%|▏         | 2/148 [00:02<02:13,  1.10it/s, training_loss=1.6707][A
Epoch 5:   2%|▏         | 3/148 [00:02<02:13,  1.09it/s, training_loss=1.6707][A
Epoch 5:   2%|▏         | 3/148 [00:03<02:13,  1.09it/s, training_loss=0.9994][A
Epoch 5:   3%|▎         | 4/148 [00:03<02:11,  1.10it/s, training_loss=0.9994][A
Epoch 5:   3%|▎         | 4/148 [00:04<02:11,  1.10it/s, training_loss=0.9855][A
Epoch 5:   3%|▎         | 5/148 [00:04<02:10,  1.09it/s, training_loss=0.9855][A
Epoch 5:   3%

2025-02-16 19:37:41,922 - INFO - Starting model evaluation...
2025-02-16 19:37:41,923 - INFO - Memory usage after evaluation start: 3893.80 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:06,  3.72it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.66it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.65it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.62it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.62it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.62it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.61it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.60it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:05,  3.59it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.60it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.61it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.60it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.59it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.58it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.58it/s][A
Evaluatin

2025-02-16 19:37:49,175 - INFO - Optimizing classification thresholds...
2025-02-16 19:37:49,193 - INFO - Class 'Drama': Optimal threshold = 0.650, F1 Score = 0.519
2025-02-16 19:37:49,209 - INFO - Class 'Horor': Optimal threshold = 0.700, F1 Score = 0.772
2025-02-16 19:37:49,225 - INFO - Class 'Komedi': Optimal threshold = 0.650, F1 Score = 0.590
2025-02-16 19:37:49,240 - INFO - Class 'Laga': Optimal threshold = 0.450, F1 Score = 0.415
2025-02-16 19:37:49,256 - INFO - Class 'Romantis': Optimal threshold = 0.700, F1 Score = 0.541
2025-02-16 19:37:49,281 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:37:49,287 - INFO - 
Metrics for Drama:
2025-02-16 19:37:49,288 - INFO - Accuracy: 0.7088
2025-02-16 19:37:49,288 - INFO - F1_score: 0.5190
2025-02-16 19:37:49,289 - INFO - Precision: 0.4881
2025-02-16 19:37:49,290 - INFO - Recall: 0.5541
2025-02-16 19:37:49,297 - INFO - 
Metrics for Horor:
2025-02-16 19:37:49,298 - INFO - Accuracy: 0.9004
2025-02-16 19:37:49,298 - INFO - F1_score: 0




2025-02-16 19:37:53,427 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:37:53,428 - INFO - Memory usage after evaluation end: 3899.43 MB


Training Progress:   4%|▍         | 4/100 [12:54<4:07:59, 155.00s/it, Train Loss=1.3408, Val Loss=0.0482, Accuracy=0.7939]

2025-02-16 19:38:00,681 - INFO - New best accuracy: 0.7939
2025-02-16 19:38:01,675 - INFO - Learning rate: 5e-06


Training Progress:   5%|▌         | 5/100 [12:57<4:06:18, 155.56s/it, Train Loss=1.3408, Val Loss=0.0482, Accuracy=0.7939]
Epoch 6:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 6:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.1523][A
Epoch 6:   1%|          | 1/148 [00:00<02:20,  1.05it/s, training_loss=1.1523][A
Epoch 6:   1%|          | 1/148 [00:01<02:20,  1.05it/s, training_loss=1.5845][A
Epoch 6:   1%|▏         | 2/148 [00:01<02:15,  1.08it/s, training_loss=1.5845][A
Epoch 6:   1%|▏         | 2/148 [00:02<02:15,  1.08it/s, training_loss=1.0609][A
Epoch 6:   2%|▏         | 3/148 [00:02<02:13,  1.08it/s, training_loss=1.0609][A
Epoch 6:   2%|▏         | 3/148 [00:03<02:13,  1.08it/s, training_loss=0.9931][A
Epoch 6:   3%|▎         | 4/148 [00:03<02:12,  1.09it/s, training_loss=0.9931][A
Epoch 6:   3%|▎         | 4/148 [00:04<02:12,  1.09it/s, training_loss=1.1156][A
Epoch 6:   3%|▎         | 5/148 [00:04<02:10,  1.09it/s, training_loss=1.1156][A
Epoch 6:   3%

2025-02-16 19:40:18,662 - INFO - Starting model evaluation...
2025-02-16 19:40:18,664 - INFO - Memory usage after evaluation start: 3926.55 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:07,  3.69it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.63it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.62it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.62it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.62it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.62it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.60it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.61it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.61it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.60it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.60it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.61it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.61it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.61it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.60it/s][A
Evaluatin

2025-02-16 19:40:25,910 - INFO - Optimizing classification thresholds...
2025-02-16 19:40:25,925 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.539
2025-02-16 19:40:25,939 - INFO - Class 'Horor': Optimal threshold = 0.750, F1 Score = 0.759
2025-02-16 19:40:25,953 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.557
2025-02-16 19:40:25,967 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.376
2025-02-16 19:40:25,981 - INFO - Class 'Romantis': Optimal threshold = 0.700, F1 Score = 0.548
2025-02-16 19:40:26,004 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:40:26,009 - INFO - 
Metrics for Drama:
2025-02-16 19:40:26,010 - INFO - Accuracy: 0.6398
2025-02-16 19:40:26,010 - INFO - F1_score: 0.5392
2025-02-16 19:40:26,011 - INFO - Precision: 0.4231
2025-02-16 19:40:26,012 - INFO - Recall: 0.7432
2025-02-16 19:40:26,018 - INFO - 
Metrics for Horor:
2025-02-16 19:40:26,018 - INFO - Accuracy: 0.8927
2025-02-16 19:40:26,019 - INFO - F1_score: 0




2025-02-16 19:40:29,972 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:40:29,973 - INFO - Memory usage after evaluation end: 3931.80 MB


Training Progress:   5%|▌         | 5/100 [15:31<4:06:18, 155.56s/it, Train Loss=1.2832, Val Loss=0.0506, Accuracy=0.7701]

2025-02-16 19:40:37,212 - INFO - Learning rate: 5e-06


Training Progress:   6%|▌         | 6/100 [15:31<4:03:01, 155.12s/it, Train Loss=1.2832, Val Loss=0.0506, Accuracy=0.7701]
Epoch 7:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 7:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=0.8773][A
Epoch 7:   1%|          | 1/148 [00:00<02:14,  1.09it/s, training_loss=0.8773][A
Epoch 7:   1%|          | 1/148 [00:01<02:14,  1.09it/s, training_loss=1.5267][A
Epoch 7:   1%|▏         | 2/148 [00:01<02:13,  1.09it/s, training_loss=1.5267][A
Epoch 7:   1%|▏         | 2/148 [00:02<02:13,  1.09it/s, training_loss=1.0047][A
Epoch 7:   2%|▏         | 3/148 [00:02<02:12,  1.09it/s, training_loss=1.0047][A
Epoch 7:   2%|▏         | 3/148 [00:03<02:12,  1.09it/s, training_loss=1.0603][A
Epoch 7:   3%|▎         | 4/148 [00:03<02:11,  1.09it/s, training_loss=1.0603][A
Epoch 7:   3%|▎         | 4/148 [00:04<02:11,  1.09it/s, training_loss=0.9727][A
Epoch 7:   3%|▎         | 5/148 [00:04<02:10,  1.09it/s, training_loss=0.9727][A
Epoch 7:   3%

2025-02-16 19:42:52,533 - INFO - Starting model evaluation...
2025-02-16 19:42:52,534 - INFO - Memory usage after evaluation start: 3931.93 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:06,  3.72it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.64it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.63it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.62it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.63it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.61it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.61it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.61it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.62it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.61it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.61it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.61it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.61it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.61it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.61it/s][A
Evaluatin

2025-02-16 19:42:59,770 - INFO - Optimizing classification thresholds...
2025-02-16 19:42:59,784 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.536
2025-02-16 19:42:59,799 - INFO - Class 'Horor': Optimal threshold = 0.700, F1 Score = 0.773
2025-02-16 19:42:59,813 - INFO - Class 'Komedi': Optimal threshold = 0.700, F1 Score = 0.559
2025-02-16 19:42:59,826 - INFO - Class 'Laga': Optimal threshold = 0.600, F1 Score = 0.348
2025-02-16 19:42:59,840 - INFO - Class 'Romantis': Optimal threshold = 0.750, F1 Score = 0.548
2025-02-16 19:42:59,863 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:42:59,868 - INFO - 
Metrics for Drama:
2025-02-16 19:42:59,869 - INFO - Accuracy: 0.5824
2025-02-16 19:42:59,870 - INFO - F1_score: 0.5362
2025-02-16 19:42:59,870 - INFO - Precision: 0.3913
2025-02-16 19:42:59,871 - INFO - Recall: 0.8514
2025-02-16 19:42:59,877 - INFO - 
Metrics for Horor:
2025-02-16 19:42:59,877 - INFO - Accuracy: 0.8966
2025-02-16 19:42:59,878 - INFO - F1_score: 0




2025-02-16 19:43:03,737 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:43:03,739 - INFO - Memory usage after evaluation end: 3937.68 MB


Training Progress:   6%|▌         | 6/100 [18:05<4:03:01, 155.12s/it, Train Loss=1.2782, Val Loss=0.0509, Accuracy=0.7870]

2025-02-16 19:43:10,973 - INFO - Learning rate: 2.5e-06


Training Progress:   7%|▋         | 7/100 [18:05<3:59:44, 154.67s/it, Train Loss=1.2782, Val Loss=0.0509, Accuracy=0.7870]
Epoch 8:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 8:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.3324][A
Epoch 8:   1%|          | 1/148 [00:00<02:15,  1.08it/s, training_loss=1.3324][A
Epoch 8:   1%|          | 1/148 [00:01<02:15,  1.08it/s, training_loss=0.9637][A
Epoch 8:   1%|▏         | 2/148 [00:01<02:13,  1.09it/s, training_loss=0.9637][A
Epoch 8:   1%|▏         | 2/148 [00:02<02:13,  1.09it/s, training_loss=1.5575][A
Epoch 8:   2%|▏         | 3/148 [00:02<02:12,  1.10it/s, training_loss=1.5575][A
Epoch 8:   2%|▏         | 3/148 [00:03<02:12,  1.10it/s, training_loss=0.9150][A
Epoch 8:   3%|▎         | 4/148 [00:03<02:13,  1.08it/s, training_loss=0.9150][A
Epoch 8:   3%|▎         | 4/148 [00:04<02:13,  1.08it/s, training_loss=1.1439][A
Epoch 8:   3%|▎         | 5/148 [00:04<02:12,  1.08it/s, training_loss=1.1439][A
Epoch 8:   3%

2025-02-16 19:45:26,464 - INFO - Starting model evaluation...
2025-02-16 19:45:26,465 - INFO - Memory usage after evaluation start: 3938.05 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:07,  3.68it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.64it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.62it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.60it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.60it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.60it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.60it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.60it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:05,  3.59it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.59it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.58it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.59it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.60it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.60it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.60it/s][A
Evaluatin

2025-02-16 19:45:33,726 - INFO - Optimizing classification thresholds...
2025-02-16 19:45:33,741 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.533
2025-02-16 19:45:33,756 - INFO - Class 'Horor': Optimal threshold = 0.700, F1 Score = 0.784
2025-02-16 19:45:33,770 - INFO - Class 'Komedi': Optimal threshold = 0.700, F1 Score = 0.571
2025-02-16 19:45:33,784 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.358
2025-02-16 19:45:33,797 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.513
2025-02-16 19:45:33,823 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:45:33,830 - INFO - 
Metrics for Drama:
2025-02-16 19:45:33,830 - INFO - Accuracy: 0.6245
2025-02-16 19:45:33,831 - INFO - F1_score: 0.5333
2025-02-16 19:45:33,832 - INFO - Precision: 0.4118
2025-02-16 19:45:33,833 - INFO - Recall: 0.7568
2025-02-16 19:45:33,839 - INFO - 
Metrics for Horor:
2025-02-16 19:45:33,840 - INFO - Accuracy: 0.8966
2025-02-16 19:45:33,842 - INFO - F1_score: 0




2025-02-16 19:45:37,885 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:45:37,887 - INFO - Memory usage after evaluation end: 3943.68 MB


Training Progress:   7%|▋         | 7/100 [20:39<3:59:44, 154.67s/it, Train Loss=1.2512, Val Loss=0.0519, Accuracy=0.7847]

2025-02-16 19:45:45,143 - INFO - Learning rate: 2.5e-06


Training Progress:   8%|▊         | 8/100 [20:39<3:56:55, 154.51s/it, Train Loss=1.2512, Val Loss=0.0519, Accuracy=0.7847]
Epoch 9:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 9:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=0.9016][A
Epoch 9:   1%|          | 1/148 [00:00<02:14,  1.10it/s, training_loss=0.9016][A
Epoch 9:   1%|          | 1/148 [00:01<02:14,  1.10it/s, training_loss=0.9711][A
Epoch 9:   1%|▏         | 2/148 [00:01<02:13,  1.09it/s, training_loss=0.9711][A
Epoch 9:   1%|▏         | 2/148 [00:02<02:13,  1.09it/s, training_loss=0.8829][A
Epoch 9:   2%|▏         | 3/148 [00:02<02:11,  1.10it/s, training_loss=0.8829][A
Epoch 9:   2%|▏         | 3/148 [00:03<02:11,  1.10it/s, training_loss=1.6129][A
Epoch 9:   3%|▎         | 4/148 [00:03<02:11,  1.10it/s, training_loss=1.6129][A
Epoch 9:   3%|▎         | 4/148 [00:04<02:11,  1.10it/s, training_loss=1.6124][A
Epoch 9:   3%|▎         | 5/148 [00:04<02:10,  1.10it/s, training_loss=1.6124][A
Epoch 9:   3%

2025-02-16 19:48:00,656 - INFO - Starting model evaluation...
2025-02-16 19:48:00,659 - INFO - Memory usage after evaluation start: 3943.80 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:07,  3.70it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.66it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.63it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.61it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.62it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.62it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.60it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.60it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.60it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.60it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.60it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.59it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.59it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.60it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.61it/s][A
Evaluatin

2025-02-16 19:48:07,907 - INFO - Optimizing classification thresholds...
2025-02-16 19:48:07,921 - INFO - Class 'Drama': Optimal threshold = 0.500, F1 Score = 0.560
2025-02-16 19:48:07,935 - INFO - Class 'Horor': Optimal threshold = 0.600, F1 Score = 0.768
2025-02-16 19:48:07,950 - INFO - Class 'Komedi': Optimal threshold = 0.650, F1 Score = 0.535
2025-02-16 19:48:07,964 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.351
2025-02-16 19:48:07,977 - INFO - Class 'Romantis': Optimal threshold = 0.700, F1 Score = 0.563
2025-02-16 19:48:08,000 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:48:08,006 - INFO - 
Metrics for Drama:
2025-02-16 19:48:08,007 - INFO - Accuracy: 0.6513
2025-02-16 19:48:08,007 - INFO - F1_score: 0.5604
2025-02-16 19:48:08,008 - INFO - Precision: 0.4361
2025-02-16 19:48:08,010 - INFO - Recall: 0.7838
2025-02-16 19:48:08,015 - INFO - 
Metrics for Horor:
2025-02-16 19:48:08,016 - INFO - Accuracy: 0.8889
2025-02-16 19:48:08,017 - INFO - F1_score: 0




2025-02-16 19:48:11,942 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:48:11,943 - INFO - Memory usage after evaluation end: 3949.43 MB


Training Progress:   8%|▊         | 8/100 [23:13<3:56:55, 154.51s/it, Train Loss=1.2364, Val Loss=0.0523, Accuracy=0.7770]

2025-02-16 19:48:19,189 - INFO - Learning rate: 2.5e-06


Training Progress:   9%|▉         | 9/100 [23:13<3:54:07, 154.37s/it, Train Loss=1.2364, Val Loss=0.0523, Accuracy=0.7770]
Epoch 10:   0%|          | 0/148 [00:00<?, ?it/s][A
Epoch 10:   0%|          | 0/148 [00:00<?, ?it/s, training_loss=1.1094][A
Epoch 10:   1%|          | 1/148 [00:00<02:13,  1.10it/s, training_loss=1.1094][A
Epoch 10:   1%|          | 1/148 [00:01<02:13,  1.10it/s, training_loss=1.0440][A
Epoch 10:   1%|▏         | 2/148 [00:01<02:13,  1.10it/s, training_loss=1.0440][A
Epoch 10:   1%|▏         | 2/148 [00:02<02:13,  1.10it/s, training_loss=0.9161][A
Epoch 10:   2%|▏         | 3/148 [00:02<02:12,  1.09it/s, training_loss=0.9161][A
Epoch 10:   2%|▏         | 3/148 [00:03<02:12,  1.09it/s, training_loss=1.6043][A
Epoch 10:   3%|▎         | 4/148 [00:03<02:11,  1.10it/s, training_loss=1.6043][A
Epoch 10:   3%|▎         | 4/148 [00:04<02:11,  1.10it/s, training_loss=0.9361][A
Epoch 10:   3%|▎         | 5/148 [00:04<02:10,  1.09it/s, training_loss=0.9361][A
Ep

2025-02-16 19:50:34,667 - INFO - Starting model evaluation...
2025-02-16 19:50:34,668 - INFO - Memory usage after evaluation start: 3949.55 MB



Evaluating:   0%|          | 0/27 [00:00<?, ?it/s][A
Evaluating:   4%|▎         | 1/27 [00:00<00:07,  3.70it/s][A
Evaluating:   7%|▋         | 2/27 [00:00<00:06,  3.63it/s][A
Evaluating:  11%|█         | 3/27 [00:00<00:06,  3.62it/s][A
Evaluating:  15%|█▍        | 4/27 [00:01<00:06,  3.62it/s][A
Evaluating:  19%|█▊        | 5/27 [00:01<00:06,  3.62it/s][A
Evaluating:  22%|██▏       | 6/27 [00:01<00:05,  3.62it/s][A
Evaluating:  26%|██▌       | 7/27 [00:01<00:05,  3.61it/s][A
Evaluating:  30%|██▉       | 8/27 [00:02<00:05,  3.62it/s][A
Evaluating:  33%|███▎      | 9/27 [00:02<00:04,  3.61it/s][A
Evaluating:  37%|███▋      | 10/27 [00:02<00:04,  3.61it/s][A
Evaluating:  41%|████      | 11/27 [00:03<00:04,  3.62it/s][A
Evaluating:  44%|████▍     | 12/27 [00:03<00:04,  3.62it/s][A
Evaluating:  48%|████▊     | 13/27 [00:03<00:03,  3.61it/s][A
Evaluating:  52%|█████▏    | 14/27 [00:03<00:03,  3.61it/s][A
Evaluating:  56%|█████▌    | 15/27 [00:04<00:03,  3.62it/s][A
Evaluatin

2025-02-16 19:50:41,909 - INFO - Optimizing classification thresholds...
2025-02-16 19:50:41,924 - INFO - Class 'Drama': Optimal threshold = 0.600, F1 Score = 0.547
2025-02-16 19:50:41,937 - INFO - Class 'Horor': Optimal threshold = 0.700, F1 Score = 0.772
2025-02-16 19:50:41,950 - INFO - Class 'Komedi': Optimal threshold = 0.600, F1 Score = 0.612
2025-02-16 19:50:41,963 - INFO - Class 'Laga': Optimal threshold = 0.550, F1 Score = 0.355
2025-02-16 19:50:41,976 - INFO - Class 'Romantis': Optimal threshold = 0.600, F1 Score = 0.571
2025-02-16 19:50:41,998 - INFO - 
Per-genre Performance Metrics:
2025-02-16 19:50:42,003 - INFO - 
Metrics for Drama:
2025-02-16 19:50:42,003 - INFO - Accuracy: 0.6513
2025-02-16 19:50:42,004 - INFO - F1_score: 0.5473
2025-02-16 19:50:42,006 - INFO - Precision: 0.4331
2025-02-16 19:50:42,007 - INFO - Recall: 0.7432
2025-02-16 19:50:42,012 - INFO - 
Metrics for Horor:
2025-02-16 19:50:42,013 - INFO - Accuracy: 0.8889
2025-02-16 19:50:42,014 - INFO - F1_score: 0




2025-02-16 19:50:45,849 - INFO - Confusion matrices saved in: /kaggle/working/logs/experiments/20250216_171623/plots/confusion_matrices
2025-02-16 19:50:45,851 - INFO - Memory usage after evaluation end: 3955.30 MB


Training Progress:   9%|▉         | 9/100 [25:47<3:54:07, 154.37s/it, Train Loss=1.2474, Val Loss=0.0517, Accuracy=0.7923]

2025-02-16 19:50:53,065 - INFO - 
Early stopping triggered after 10 epochs


Training Progress:   9%|▉         | 9/100 [25:47<4:20:44, 171.92s/it, Train Loss=1.2474, Val Loss=0.0517, Accuracy=0.7923]


2025-02-16 19:50:55,010 - INFO - Saved training history plots to /kaggle/working/logs/experiments/20250216_171623/plots/training_history.png
2025-02-16 19:50:55,011 - INFO - Training history saved successfully
2025-02-16 19:50:57,465 - INFO - 
Testing model on a sample...
2025-02-16 19:50:57,474 - INFO - Loading and preprocessing data...
2025-02-16 19:50:57,475 - INFO - Memory usage after start: 3959.05 MB
2025-02-16 19:50:57,488 - INFO - Successfully loaded data using utf-8 encoding
2025-02-16 19:50:57,489 - INFO - Memory usage after data loading: 3959.05 MB
2025-02-16 19:50:57,490 - INFO - Taking sample of 1 from 1738 total samples
2025-02-16 19:50:57,491 - INFO - 
Sample data:
2025-02-16 19:50:57,492 - INFO - 
Sample 1:
2025-02-16 19:50:57,493 - INFO - Synopsis: Setelah kematian yang tampak, Siena mampu melihat tanda-tanda bahwa orang-orang akan meninggal. Namu...
2025-02-16 19:50:57,494 - INFO - Genre: Horor
2025-02-16 19:50:57,495 - INFO - 
Preprocessing text data...


100%|██████████| 1/1 [00:00<00:00, 2534.32it/s]

2025-02-16 19:50:57,501 - INFO - Memory usage after preprocessing: 3959.05 MB
2025-02-16 19:50:57,551 - INFO - 
Sample prediction results:
2025-02-16 19:50:57,552 - INFO - Sample text: setelah kematian yang tampak siena mampu melihat tanda tanda bahwa orang orang akan meninggal namun ...
2025-02-16 19:50:57,552 - INFO - Genre: Horor, Probability: 0.9183, Threshold Used: 0.700
2025-02-16 19:50:57,555 - INFO - 
Training completed successfully!
2025-02-16 19:50:57,555 - INFO - All results and models saved in: /kaggle/working/logs/experiments/20250216_171623
2025-02-16 19:50:57,556 - INFO - 
Cleaning up resources...





2025-02-16 19:50:58,831 - INFO - Cleaning up resources...


In [7]:
!zip -r folder.zip /kaggle/working/logs/experiments

  adding: kaggle/working/logs/experiments/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250216_171623/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/best_loss/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/best_loss/config.json (deflated 56%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/best_loss/model.safetensors (deflated 7%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/best_accuracy/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/best_accuracy/config.json (deflated 56%)
  adding: kaggle/working/logs/experiments/20250216_171623/model/best_accuracy/model.safetensors (deflated 7%)
  adding: kaggle/working/logs/experiments/20250216_171623/metrics/ (stored 0%)
  adding: kaggle/working/logs/experiments/20250216_171623/metrics/evaluation_metrics.json (deflated 68%)
  

In [8]:
from IPython.display import FileLink
FileLink(r'folder.zip')