In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/datasets-classificationsynopsis/final_combined_movies_5genres.csv


In [2]:
# -*- coding: utf-8 -*-
"""
Movie Genre Classification with IndoBERT
Environment: Kaggle
"""

!pip install optuna




In [3]:
# BAGIAN PERTAMA - Import dan Konfigurasi
import os
import logging
import datetime
import json
import argparse
import gc
import sys
import codecs
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Union

import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
import torch.nn.functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import re
import psutil
import optuna
from optuna.trial import Trial

# Define Base Path for Kaggle
BASE_PATH = Path('/kaggle/working/genrematics-optuna-app')

In [4]:
# Configuration Constants
class Config:
    # Model Parameters dengan fixed values
    MODEL_PARAMS = {
        'EPOCHS': 20,
        'BATCH_SIZE': 16,  # Fixed
        'LEARNING_RATE': 2e-5,  # Fixed
        'MAX_LENGTH': 512,
        'TEST_SIZE': 0.15,
        'WEIGHT_DECAY': 0.01,  # Fixed
        'MIXUP_PROB': 0.3,  # Fixed
        'PATIENCE': 5,
        'SMOOTHING': 0.1  # Fixed
    }

    # Optimization Parameters dengan fixed options
    OPTIM_PARAMS = {
        'batch_size': [8, 16, 32],  # Fixed values
        'learning_rate': [1e-5, 2e-5, 3e-5],
        'weight_decay': [0.01, 0.02],
        'mixup_prob': [0.2, 0.3],
        'smoothing': [0.1, 0.15]
    }

    # Paths Configuration untuk Kaggle
    BASE_DIR = BASE_PATH
    DATA_DIR = Path('/kaggle/input/datasets-classificationsynopsis')
    LOG_DIR = BASE_DIR / 'logs'
    BACKUP_DIR = BASE_DIR / 'backups'
    TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    EXPERIMENT_DIR = LOG_DIR / 'experiments' / TIMESTAMP

    # Model and Data Paths
    MODEL_SAVE_DIR = EXPERIMENT_DIR / 'model'
    TOKENIZER_SAVE_DIR = EXPERIMENT_DIR / 'tokenizer'
    METRICS_DIR = EXPERIMENT_DIR / 'metrics'
    PLOTS_DIR = EXPERIMENT_DIR / 'plots'
    CM_DIR = PLOTS_DIR / 'confusion_matrices'

    # Model Files
    MODEL_BEST_ACC = MODEL_SAVE_DIR / "best_accuracy"
    MODEL_BEST_LOSS = MODEL_SAVE_DIR / "best_loss"
    TOKENIZER_BEST_ACC = TOKENIZER_SAVE_DIR / "best_accuracy"
    TOKENIZER_BEST_LOSS = TOKENIZER_SAVE_DIR / "best_loss"
    DATA_PATH = DATA_DIR / "final_combined_movies_5genres.csv"

    # Device Configuration
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    SAMPLE_SIZE: Optional[int] = None

    @classmethod
    def create_directories(cls) -> None:
        """Create all necessary directories in Kaggle working directory"""
        directories = [
            cls.LOG_DIR, cls.BACKUP_DIR,
            cls.EXPERIMENT_DIR, cls.MODEL_SAVE_DIR, cls.TOKENIZER_SAVE_DIR,
            cls.METRICS_DIR, cls.PLOTS_DIR, cls.CM_DIR
        ]
        for dir_path in directories:
            dir_path.mkdir(parents=True, exist_ok=True)
            print(f"Created directory: {dir_path}")

    @classmethod
    def setup_logging(cls) -> None:
        """Setup logging configuration untuk Kaggle"""
        log_file = cls.EXPERIMENT_DIR / 'training.log'
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file, encoding='utf-8', mode='a'),
                logging.StreamHandler(sys.stdout)
            ]
        )
        logging.info(f"Log file created at: {log_file}")

# Environment Check Function
def check_environment() -> bool:
    """Verify Kaggle environment and paths"""
    try:
        # Check if data directory exists
        if not Config.DATA_DIR.exists():
            raise RuntimeError(f"Data directory tidak ditemukan di: {Config.DATA_DIR}")

        # Check if dataset exists
        if not Config.DATA_PATH.exists():
            raise RuntimeError(f"Dataset tidak ditemukan di: {Config.DATA_PATH}")

        # Check GPU availability
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert to GB
            print(f"GPU tersedia: {gpu_name}")
            print(f"GPU Memory: {gpu_memory:.2f} GB")
        else:
            print("WARNING: GPU tidak tersedia, menggunakan CPU")

        return True
    except Exception as e:
        print(f"Error dalam setup environment: {str(e)}")
        return False

# Memory Management
class ModelManager:
    """Context manager for model memory management"""
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def __enter__(self):
        return self.model, self.tokenizer

    def __exit__(self, exc_type, exc_val, exc_tb):
        del self.model
        del self.tokenizer
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

# Error Handling
def error_handler(func):
    """Decorator for consistent error handling"""
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            logging.error(f"Error in {func.__name__}: {str(e)}")
            raise
    return wrapper

# Utility Functions
def get_memory_usage() -> float:
    """Get current memory usage of the program"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024  # in MB

def log_memory(step_name: str) -> None:
    """Log memory usage with consistent format"""
    memory = get_memory_usage()
    logging.info(f"Memory usage after {step_name}: {memory:.2f} MB")

def log_system_info() -> None:
    """Log system information including GPU details"""
    logging.info("System Information:")
    logging.info(f"Python Version: {sys.version}")
    logging.info(f"CPU Count: {os.cpu_count()}")
    logging.info(f"Initial Memory Usage: {get_memory_usage():.2f} MB")
    if torch.cuda.is_available():
        logging.info(f"GPU Device: {torch.cuda.get_device_name(0)}")
        logging.info(f"GPU Memory Total: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        logging.info(f"CUDA Version: {torch.version.cuda}")

# Create directories and setup logging
if check_environment():
    Config.create_directories()
    Config.setup_logging()
    log_system_info()
else:
    print("Failed to initialize environment. Please check the setup.")
    sys.exit(1)

GPU tersedia: Tesla T4
GPU Memory: 15.83 GB
Created directory: /kaggle/working/genrematics-optuna-app/logs
Created directory: /kaggle/working/genrematics-optuna-app/backups
Created directory: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331
Created directory: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/model
Created directory: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/tokenizer
Created directory: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/metrics
Created directory: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots
Created directory: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices
2025-02-10 22:03:31,806 - INFO - Log file created at: /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/training.log
2025-02-10 22:03:31,807 - INFO - System Information:
2025-02-10 22:03:31,807 - INFO - P

In [5]:
class MovieDataset(Dataset):
    """Dataset class untuk movie genre classification"""
    def __init__(self, texts: Union[List, np.ndarray],
                 labels: Union[List, np.ndarray],
                 tokenizer,
                 max_length: int = 512):
        # Input validation
        if not isinstance(texts, (list, np.ndarray)):
            raise ValueError("texts must be a list or numpy array")
        if not isinstance(labels, (list, np.ndarray)):
            raise ValueError("labels must be a list or numpy array")
        if len(texts) != len(labels):
            raise ValueError("texts and labels must have the same length")

        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self) -> int:
        return len(self.texts)

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Pastikan labels dalam format yang benar
        if isinstance(self.labels[idx], (list, np.ndarray)):
            labels = self.labels[idx]
        else:
            labels = [self.labels[idx]]

        return {
            'input_ids': encoding['input_ids'].flatten().long(),
            'attention_mask': encoding['attention_mask'].flatten().long(),
            'labels': torch.FloatTensor(labels)
        }

class DataProcessor:
    """Class for handling data preprocessing and loading"""
    @staticmethod
    def clean_text(text: str) -> str:
        """Clean and preprocess text data"""
        if isinstance(text, str):
            text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
            text = re.sub(r'\S+@\S+', '', text)
            text = re.sub(r'[^\w\s]', ' ', text)
            text = re.sub(r'\s+', ' ', text)
            return text.strip().lower()
        return ''

    @staticmethod
    @error_handler
    def load_and_preprocess_data(data_path: Path, sample_size: Optional[int] = None) -> pd.DataFrame:
        """Load and preprocess data with proper encoding handling"""
        if not data_path.exists():
            raise FileNotFoundError(f"Data file not found: {data_path}")

        if sample_size is not None and (not isinstance(sample_size, int) or sample_size <= 0):
            raise ValueError("sample_size must be a positive integer")

        logging.info("Loading and preprocessing data...")
        log_memory("start")
        initial_size = None

        # Try different encodings for Kaggle compatibility
        encodings_to_try = ['utf-8', 'utf-8-sig', 'latin1', 'iso-8859-1', 'cp1252']
        df = None

        for encoding in encodings_to_try:
            try:
                df = pd.read_csv(data_path, encoding=encoding)
                logging.info(f"Successfully loaded data using {encoding} encoding")
                initial_size = len(df)
                break
            except (UnicodeDecodeError, UnicodeError):
                continue

        if df is None:
            raise UnicodeError(f"Failed to read file with any of these encodings: {encodings_to_try}")

        log_memory("data loading")

        # Sample data if requested
        if sample_size:
            if sample_size > initial_size:
                logging.warning(f"Requested sample_size ({sample_size}) is larger than dataset size ({initial_size})")
                sample_size = initial_size
            logging.info(f"Taking sample of {sample_size} from {initial_size} total samples")
            df = df.head(sample_size)
        else:
            logging.info(f"Using full dataset with {initial_size} samples")

        # Log sample data
        logging.info("\nSample data:")
        for i, row in df.head(3).iterrows():
            logging.info(f"\nSample {i+1}:")
            logging.info(f"Synopsis: {row['sinopsis'][:100]}...")
            logging.info(f"Genre: {row['genre']}")

        # Preprocess data
        logging.info("\nPreprocessing text data...")
        tqdm.pandas()
        df['sinopsis'] = df['sinopsis'].progress_apply(DataProcessor.clean_text)
        df['genre'] = df['genre'].str.split(',')
        df = df.dropna(subset=['sinopsis', 'genre'])

        # Calculate and log dataset statistics
        stats = DataProcessor.calculate_dataset_statistics(df)

        # Save statistics to file
        stats_file = Config.METRICS_DIR / 'dataset_statistics.json'
        with open(stats_file, 'w', encoding='utf-8') as f:
            json.dump(stats, f, indent=4, ensure_ascii=False)

        log_memory("preprocessing")
        return df

    @staticmethod
    def prepare_data(df: pd.DataFrame, mlb: MultiLabelBinarizer) -> Tuple:
        """Prepare data for training"""
        genre_labels = mlb.fit_transform(df['genre'])
        return train_test_split(
            df['sinopsis'].values,
            genre_labels,
            test_size=Config.MODEL_PARAMS['TEST_SIZE'],
            random_state=42,
            stratify=genre_labels if len(genre_labels.shape) == 1 else None
        )

    @staticmethod
    def create_weighted_sampler(genre_labels: np.ndarray) -> WeightedRandomSampler:
        """Create weighted sampler for balanced batch sampling"""
        logging.info("Creating weighted sampler for balanced batch sampling...")

        sample_weights = np.zeros(len(genre_labels))
        for i in range(genre_labels.shape[1]):
            sample_weights += genre_labels[:, i] * (1.0 / np.sum(genre_labels[:, i]))

        sample_weights = sample_weights / sample_weights.sum()
        sampler = WeightedRandomSampler(
            weights=sample_weights,
            num_samples=len(sample_weights),
            replacement=True
        )

        logging.info(f"Created sampler with {len(sample_weights)} weights")
        return sampler

    @staticmethod
    def calculate_class_weights(genre_labels: np.ndarray, mlb: MultiLabelBinarizer) -> torch.Tensor:
        """Calculate class weights for handling imbalanced data"""
        class_weights = []
        logging.info("\nCalculating class weights for handling imbalanced data...")

        for i in range(genre_labels.shape[1]):
            genre = mlb.classes_[i]
            positive_samples = np.sum(genre_labels[:, i])
            total_samples = len(genre_labels)

            weights = compute_class_weight(
                class_weight='balanced',
                classes=np.array([0, 1]),
                y=genre_labels[:, i]
            )
            class_weights.append(weights[1])

            logging.info(f"{genre}:")
            logging.info(f"  Positive samples: {positive_samples}")
            logging.info(f"  Negative samples: {total_samples - positive_samples}")
            logging.info(f"  Weight: {weights[1]:.2f}")

        return torch.FloatTensor(class_weights).to(Config.DEVICE)

    @staticmethod
    def calculate_dataset_statistics(df: pd.DataFrame) -> Dict:
        """Calculate comprehensive dataset statistics"""
        logging.info("Calculating dataset statistics...")
        stats = {
            'general_stats': {
                'total_samples': len(df),
                'unique_genres': len(set([g for genres in df['genre'] for g in genres])),
                'avg_synopsis_length': float(df['sinopsis'].str.len().mean()),
                'null_values': df.isnull().sum().to_dict()
            },
            'genre_stats': DataProcessor.analyze_genre_combinations(df['genre'].values),
            'text_stats': DataProcessor.calculate_text_statistics(df['sinopsis'].values)
        }

        logging.info("\nDataset Statistics:")
        logging.info(f"Total samples: {stats['general_stats']['total_samples']}")
        logging.info(f"Unique genres: {stats['general_stats']['unique_genres']}")

        return stats

    @staticmethod
    def calculate_text_statistics(texts: np.ndarray) -> Dict:
        """Calculate detailed text statistics"""
        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
        token_lengths = []
        char_lengths = []
        truncated_count = 0

        logging.info("Calculating text statistics...")
        for text in tqdm(texts, desc="Analyzing texts"):
            tokens = tokenizer.tokenize(text)
            token_lengths.append(len(tokens))
            char_lengths.append(len(text))
            if len(tokens) > Config.MODEL_PARAMS['MAX_LENGTH']:
                truncated_count += 1

        return {
            'avg_token_length': float(np.mean(token_lengths)),
            'max_token_length': int(np.max(token_lengths)),
            'min_token_length': int(np.min(token_lengths)),
            'avg_char_length': float(np.mean(char_lengths)),
            'max_char_length': int(np.max(char_lengths)),
            'min_char_length': int(np.min(char_lengths)),
            'truncated_sequences': truncated_count,
            'total_sequences': len(texts),
            'truncation_percentage': float(truncated_count / len(texts) * 100)
        }

    @staticmethod
    def analyze_genre_combinations(genres: List[List[str]]) -> Dict:
        """Analyze genre combination patterns"""
        logging.info("Analyzing genre combinations...")
        combinations = []
        for genre_list in genres:
            combinations.append(','.join(sorted(genre_list)))

        combination_counts = pd.Series(combinations).value_counts()

        return {
            'total_combinations': len(combination_counts),
            'unique_combinations': combination_counts.to_dict(),
            'top_combinations': combination_counts.head(10).to_dict(),
            'single_genre_count': sum(len(g) == 1 for g in genres),
            'multi_genre_count': sum(len(g) > 1 for g in genres),
            'max_genres_per_item': max(len(g) for g in genres),
            'avg_genres_per_item': float(np.mean([len(g) for g in genres]))
        }

In [6]:
class ModelSetup:
    """Class for handling model setup and data loaders"""
    @staticmethod
    def setup_model_and_tokenizer(num_labels: int) -> Tuple:
        """Setup model dan tokenizer"""
        logging.info("Setting up model and tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
        model = AutoModelForSequenceClassification.from_pretrained(
            'indobenchmark/indobert-base-p1',
            num_labels=num_labels,
            problem_type="multi_label_classification"
        ).to(Config.DEVICE)
        logging.info("Model and tokenizer setup completed")
        return model, tokenizer

    @staticmethod
    def setup_dataloaders(X_train: np.ndarray,
                         X_test: np.ndarray,
                         y_train: np.ndarray,
                         y_test: np.ndarray,
                         tokenizer,
                         batch_size: int) -> Tuple:
        """Setup data loaders"""
        logging.info("Setting up data loaders...")
        train_dataset = MovieDataset(X_train, y_train, tokenizer)
        val_dataset = MovieDataset(X_test, y_test, tokenizer)
        sampler = DataProcessor.create_weighted_sampler(y_train)

        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            sampler=sampler,
            num_workers=2,  # Adjusted for Kaggle
            pin_memory=True if torch.cuda.is_available() else False
        )

        val_loader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            num_workers=2,  # Adjusted for Kaggle
            pin_memory=True if torch.cuda.is_available() else False
        )

        logging.info(f"Created data loaders with batch size {batch_size}")
        logging.info(f"Training batches: {len(train_loader)}")
        logging.info(f"Validation batches: {len(val_loader)}")
        
        return train_loader, val_loader

    @staticmethod
    def validate_model_setup(model, tokenizer, num_labels: int) -> bool:
        """Validate model and tokenizer setup"""
        try:
            # Check model configuration
            if model.num_labels != num_labels:
                raise ValueError(f"Model has {model.num_labels} labels, expected {num_labels}")
            
            # Validate tokenizer
            sample_text = "Contoh teks untuk validasi"
            tokens = tokenizer(
                sample_text,
                max_length=Config.MODEL_PARAMS['MAX_LENGTH'],
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )
            
            # Check token shapes
            if tokens['input_ids'].shape[1] > Config.MODEL_PARAMS['MAX_LENGTH']:
                raise ValueError("Tokenizer producing sequences longer than max_length")
            
            # Test model forward pass
            with torch.no_grad():
                outputs = model(**tokens)
                if outputs.logits.shape[1] != num_labels:
                    raise ValueError(f"Model output shape mismatch: got {outputs.logits.shape[1]}, expected {num_labels}")
            
            logging.info("Model and tokenizer validation successful")
            return True
            
        except Exception as e:
            logging.error(f"Model validation failed: {str(e)}")
            return False

    @staticmethod
    def log_model_info(model) -> None:
        """Log model architecture and parameters"""
        try:
            total_params = sum(p.numel() for p in model.parameters())
            trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
            
            logging.info("\nModel Information:")
            logging.info(f"Model Type: {model.__class__.__name__}")
            logging.info(f"Total Parameters: {total_params:,}")
            logging.info(f"Trainable Parameters: {trainable_params:,}")
            logging.info(f"Non-trainable Parameters: {total_params - trainable_params:,}")
            
            # Log memory usage
            if torch.cuda.is_available():
                memory_allocated = torch.cuda.memory_allocated() / 1024**2
                memory_reserved = torch.cuda.memory_reserved() / 1024**2
                logging.info(f"GPU Memory Allocated: {memory_allocated:.2f} MB")
                logging.info(f"GPU Memory Reserved: {memory_reserved:.2f} MB")
                
        except Exception as e:
            logging.error(f"Error logging model info: {str(e)}")

In [7]:
class LossFunctions:
    """Class untuk menangani berbagai loss functions"""

    @staticmethod
    def focal_loss(predictions: torch.Tensor,
                  targets: torch.Tensor,
                  gamma: float = 2.0,
                  alpha: float = 0.25) -> torch.Tensor:
        """Calculate focal loss for multi-label classification"""
        ce_loss = F.binary_cross_entropy_with_logits(predictions, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = alpha * (1-pt)**gamma * ce_loss
        return focal_loss.mean()

    @staticmethod
    def label_smoothing_loss(outputs: torch.Tensor,
                           targets: torch.Tensor,
                           smoothing: float) -> torch.Tensor:
        """Calculate loss with label smoothing"""
        log_probs = F.log_softmax(outputs, dim=-1)
        targets = torch.clamp(targets * (1.0 - smoothing), min=smoothing / (targets.size(-1) - 1))
        return torch.mean(torch.sum(-targets * log_probs, dim=-1))

class DataAugmentation:
    """Class untuk menangani augmentasi data"""

    @staticmethod
    def apply_mixup(batch: Dict[str, torch.Tensor], alpha: float = 0.2) -> Dict[str, torch.Tensor]:
        """Apply mixup augmentation to batch"""
        # Move tensors to device
        input_ids = batch['input_ids'].to(Config.DEVICE)
        attention_mask = batch['attention_mask'].to(Config.DEVICE)
        labels = batch['labels'].to(Config.DEVICE)

        lam = np.random.beta(alpha, alpha)
        mixed_input_ids = lam * input_ids + (1 - lam) * input_ids.flip(0)
        mixed_attention_mask = lam * attention_mask + (1 - lam) * attention_mask.flip(0)
        mixed_labels = lam * labels + (1 - lam) * labels.flip(0)

        return {
            'input_ids': mixed_input_ids.long(),
            'attention_mask': mixed_attention_mask.long(),
            'labels': mixed_labels
        }

class Visualization:
    """Class untuk menangani visualisasi data"""
    
    @staticmethod
    def plot_training_history(history_data: Dict) -> None:
        """Plot and save training metrics visualization with consistent dict structure
        
        Args:
            history_data: Dictionary containing training history with keys:
                - epoch: List of epoch numbers
                - train_loss: List of training losses
                - val_loss: List of validation losses 
                - val_f1: List of validation F1 scores
                - val_precision: List of validation precision scores
                - val_recall: List of validation recall scores
        """
        plt.figure(figsize=(20, 12))

        # Loss Plot
        plt.subplot(2, 2, 1)
        plt.plot(history_data['epoch'], history_data['train_loss'],
                label='Training Loss', marker='o', linewidth=2)
        plt.plot(history_data['epoch'], history_data['val_loss'],
                label='Validation Loss', marker='o', linewidth=2)
        plt.title('Training vs Validation Loss', fontsize=12, pad=15)
        plt.xlabel('Epoch', fontsize=10)
        plt.ylabel('Loss', fontsize=10)
        plt.legend(fontsize=10)
        plt.grid(True, linestyle='--', alpha=0.7)

        # F1 Score Plot 
        plt.subplot(2, 2, 2)
        plt.plot(history_data['epoch'], history_data['val_f1'],
                label='F1 Score', marker='o', color='green', linewidth=2)
        plt.title('F1 Score Over Time', fontsize=12, pad=15)
        plt.xlabel('Epoch', fontsize=10)
        plt.ylabel('F1 Score', fontsize=10)
        plt.legend(fontsize=10)
        plt.grid(True, linestyle='--', alpha=0.7)

        # Learning Curve
        plt.subplot(2, 2, 3)
        loss_diff = np.array(history_data['train_loss']) - np.array(history_data['val_loss'])
        plt.plot(history_data['epoch'], loss_diff,
                label='Train-Val Loss Difference', marker='o', color='purple', linewidth=2)
        plt.axhline(y=0, color='r', linestyle='--', label='Ideal Difference')
        plt.title('Learning Curve', fontsize=12, pad=15)
        plt.xlabel('Epoch', fontsize=10)
        plt.ylabel('Loss Difference', fontsize=10)
        plt.legend(fontsize=10)
        plt.grid(True, linestyle='--', alpha=0.7)

        # Save plot
        plot_path = Config.PLOTS_DIR / 'training_history.png'
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        plt.close()

        logging.info(f"Training history plots saved to {plot_path}")

    @staticmethod
    def plot_confusion_matrices(predictions: np.ndarray,
                              true_labels: np.ndarray,
                              class_names: List[str]) -> None:
        """Plot confusion matrices for each class
        
        Args:
            predictions: Binary predictions array
            true_labels: True binary labels array
            class_names: List of class names
        """
        for idx, class_name in enumerate(class_names):
            cm = confusion_matrix(true_labels[:, idx], predictions[:, idx])
            
            plt.figure(figsize=(8, 6))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title(f'Confusion Matrix - {class_name}')
            plt.ylabel('True Label')
            plt.xlabel('Predicted Label')
            
            plot_path = Config.CM_DIR / f'confusion_matrix_{class_name}.png'
            plt.savefig(plot_path, dpi=300, bbox_inches='tight')
            plt.close()

        logging.info(f"Training history plots saved to {plot_path}")

In [8]:
class ModelEvaluator:
    """Class untuk evaluasi model"""

    @staticmethod
    @error_handler
    def evaluate_model(model: torch.nn.Module,
                      val_loader: DataLoader,
                      mlb: MultiLabelBinarizer) -> Dict:
        """Evaluate model performance"""
        model.eval()
        all_preds = []
        all_labels = []
        total_loss = 0
        n_batches = 0

        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Evaluating"):
                input_ids = batch['input_ids'].to(Config.DEVICE)
                attention_mask = batch['attention_mask'].to(Config.DEVICE)
                labels = batch['labels'].to(Config.DEVICE)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = LossFunctions.focal_loss(outputs.logits, labels)
                total_loss += loss.item()
                n_batches += 1

                preds = torch.sigmoid(outputs.logits).cpu().numpy()
                preds_binary = (preds > 0.5).astype(int)
                
                all_preds.extend(preds_binary)
                all_labels.extend(labels.cpu().numpy())

        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)

        metrics = {
            'loss': total_loss / n_batches,
            'macro_f1': f1_score(all_labels, all_preds, average='macro'),
            'macro_precision': precision_score(all_labels, all_preds, average='macro'),
            'macro_recall': recall_score(all_labels, all_preds, average='macro'),
            'per_class': {
                genre: {
                    'f1': f1_score(all_labels[:, i], all_preds[:, i]),
                    'precision': precision_score(all_labels[:, i], all_preds[:, i]),
                    'recall': recall_score(all_labels[:, i], all_preds[:, i])
                }
                for i, genre in enumerate(mlb.classes_)
            }
        }

        # Plot confusion matrices
        Visualization.plot_confusion_matrices(all_preds, all_labels, mlb.classes_)

        return metrics

class ModelTrainer:
    """Class untuk menangani training model"""
    
    @staticmethod
    def train(model: torch.nn.Module,
             train_loader: DataLoader,
             val_loader: DataLoader,
             mlb: MultiLabelBinarizer,
             n_epochs: int) -> Dict:
        """Full training loop dengan history tracking yang konsisten
        
        Args:
            model: Model PyTorch yang akan dilatih
            train_loader: DataLoader untuk data training
            val_loader: DataLoader untuk data validasi
            mlb: MultiLabelBinarizer yang sudah difit
            n_epochs: Jumlah epoch untuk training
            
        Returns:
            Dict berisi history training dan metrics terbaik
        """
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=Config.MODEL_PARAMS['LEARNING_RATE'],
            weight_decay=Config.MODEL_PARAMS['WEIGHT_DECAY']
        )
        
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=len(train_loader) * n_epochs
        )

        best_metrics = None
        best_loss = float('inf')
        patience_counter = 0
        
        history = {
            'epoch': [],
            'train_loss': [],
            'val_loss': [],
            'val_f1': [],
            'val_precision': [],
            'val_recall': []
        }

        start_time = datetime.datetime.now()

        for epoch in range(n_epochs):
            epoch_start_time = datetime.datetime.now()
            
            # Training phase
            train_metrics = ModelTrainer.train_epoch(
                model=model,
                train_loader=train_loader,
                optimizer=optimizer,
                scheduler=scheduler,
                epoch=epoch + 1
            )
            
            # Validation phase
            val_metrics = ModelEvaluator.evaluate_model(model, val_loader, mlb)
            
            # Update training history
            history['epoch'].append(epoch + 1)
            history['train_loss'].append(train_metrics['loss'])
            history['val_loss'].append(val_metrics['loss'])
            history['val_f1'].append(val_metrics['macro_f1'])
            history['val_precision'].append(val_metrics['macro_precision'])
            history['val_recall'].append(val_metrics['macro_recall'])

            # Save best model based on F1 score
            if best_metrics is None or val_metrics['macro_f1'] > best_metrics['macro_f1']:
                best_metrics = val_metrics
                ModelTrainer.save_model(model, Config.MODEL_BEST_ACC)
                patience_counter = 0
            else:
                patience_counter += 1

            # Save best model based on loss
            if val_metrics['loss'] < best_loss:
                best_loss = val_metrics['loss']
                ModelTrainer.save_model(model, Config.MODEL_BEST_LOSS)

            # Early stopping check
            if patience_counter >= Config.MODEL_PARAMS['PATIENCE']:
                logging.info(f"Early stopping triggered after {epoch + 1} epochs")
                break

            # Calculate epoch time
            epoch_time = datetime.datetime.now() - epoch_start_time
            
            # Log metrics
            logging.info(
                f"Epoch {epoch+1}/{n_epochs} - Time: {epoch_time.total_seconds():.2f}s - "
                f"Train Loss: {train_metrics['loss']:.4f}, "
                f"Val Loss: {val_metrics['loss']:.4f}, "
                f"Val F1: {val_metrics['macro_f1']:.4f}, "
                f"Val Precision: {val_metrics['macro_precision']:.4f}, "
                f"Val Recall: {val_metrics['macro_recall']:.4f}"
            )

            # Plot training history
            Visualization.plot_training_history(history)
            
            # Save metrics to JSON for web app
            metrics_file = Config.METRICS_DIR / 'training_history.json'
            with open(metrics_file, 'w', encoding='utf-8') as f:
                json.dump({
                    'training_history': {
                        'epochs': history['epoch'],
                        'training_loss': history['train_loss'],
                        'validation_loss': history['val_loss'],
                        'accuracy': history['val_f1'],
                        'best_accuracy': max(history['val_f1']),
                        'best_val_loss': min(history['val_loss'])
                    },
                    'model_info': {
                        'classes': mlb.classes_.tolist(),
                        'total_samples': len(train_loader.dataset),
                        'training_time': str(datetime.datetime.now() - start_time),
                        'parameters': {
                            'total_parameters': sum(p.numel() for p in model.parameters()),
                            'trainable_parameters': sum(p.numel() for p in model.parameters() if p.requires_grad)
                        }
                    }
                }, f, indent=4)

            # Memory cleanup after each epoch
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()

        total_time = datetime.datetime.now() - start_time
        logging.info(f"Training completed in {total_time}")
        
        return history, best_metrics

    @staticmethod
    def train_epoch(model: torch.nn.Module,
                   train_loader: DataLoader,
                   optimizer: torch.optim.Optimizer,
                   scheduler: torch.optim.lr_scheduler._LRScheduler,
                   epoch: int) -> Dict[str, float]:
        """Training untuk satu epoch
        
        Args:
            model: Model PyTorch yang akan dilatih
            train_loader: DataLoader untuk data training
            optimizer: Optimizer yang digunakan
            scheduler: Learning rate scheduler
            epoch: Nomor epoch saat ini
            
        Returns:
            Dict berisi metrics training untuk epoch ini
        """
        model.train()
        total_loss = 0
        n_batches = 0
        
        # Progress bar
        progress_bar = tqdm(train_loader, desc=f'Training Epoch {epoch}')
        
        for batch in progress_bar:
            # Move batch to device
            input_ids = batch['input_ids'].to(Config.DEVICE)
            attention_mask = batch['attention_mask'].to(Config.DEVICE)
            labels = batch['labels'].to(Config.DEVICE)
            
            # Clear gradients
            optimizer.zero_grad()
            
            # Apply mixup augmentation dengan probability
            if np.random.random() < Config.MODEL_PARAMS['MIXUP_PROB']:
                batch = DataAugmentation.apply_mixup(batch)
                input_ids = batch['input_ids']
                attention_mask = batch['attention_mask']
                labels = batch['labels']
            
            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            
            # Calculate loss dengan label smoothing
            loss = LossFunctions.label_smoothing_loss(
                outputs.logits,
                labels,
                Config.MODEL_PARAMS['SMOOTHING']
            )
            
            # Backward pass
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # Update weights
            optimizer.step()
            
            # Update learning rate
            scheduler.step()
            
            # Update metrics
            total_loss += loss.item()
            n_batches += 1
            
            # Update progress bar
            progress_bar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'avg_loss': f'{total_loss/n_batches:.4f}',
                'lr': f'{scheduler.get_last_lr()[0]:.2e}'
            })
            
            # Memory cleanup every few batches
            if n_batches % 10 == 0:
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                gc.collect()
        
        # Calculate average loss
        avg_loss = total_loss / n_batches
        
        return {
            'loss': avg_loss,
            'learning_rate': scheduler.get_last_lr()[0]
        }

    @staticmethod
    def save_model(model: torch.nn.Module, path: Path) -> None:
        """Save model checkpoint dengan proper error handling
        
        Args:
            model: Model PyTorch yang akan disimpan
            path: Path untuk menyimpan model
        """
        try:
            # Create directory if not exists
            path.parent.mkdir(parents=True, exist_ok=True)
            
            # Prepare checkpoint data
            checkpoint = {
                'model_state_dict': model.state_dict(),
                'config': model.config.to_dict(),
                'save_time': datetime.datetime.now().strftime("%Y%m%d_%H%M%S"),
                'device': str(next(model.parameters()).device)
            }
            
            # Save temporary file first
            temp_path = path.parent / f"{path.name}.tmp"
            torch.save(checkpoint, temp_path)
            
            # Atomic rename to final path
            temp_path.replace(path)
            
            logging.info(f"Model saved successfully to {path}")
            
        except Exception as e:
            logging.error(f"Error saving model: {str(e)}")
            if temp_path.exists():
                temp_path.unlink()
            raise

In [9]:
class HyperparameterOptimizer:
    @staticmethod
    def objective(trial: Trial, df: pd.DataFrame, mlb: MultiLabelBinarizer) -> float:
        try:
            params = HyperparameterOptimizer._get_trial_parameters(trial)
            logging.info(f"\nTrial {trial.number} Parameters:")
            for name, value in params.items():
                logging.info(f"{name}: {value}")

            X_train, X_test, y_train, y_test = DataProcessor.prepare_data(df, mlb)

            with ModelManager(*ModelSetup.setup_model_and_tokenizer(len(mlb.classes_))) as (model, tokenizer):
                train_loader, val_loader = ModelSetup.setup_dataloaders(
                    X_train, X_test, y_train, y_test,
                    tokenizer, params['batch_size']
                )

                optimizer = torch.optim.AdamW(
                    model.parameters(),
                    lr=params['learning_rate'],
                    weight_decay=params['weight_decay']
                )

                # Training loop untuk optimization
                best_f1 = 0.0
                for epoch in range(3):  # Reduced epochs for optimization
                    model.train()
                    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/3"):
                        optimizer.zero_grad()

                        if np.random.random() < params['mixup_prob']:
                            batch = DataAugmentation.apply_mixup(batch)

                        input_ids = batch['input_ids'].to(Config.DEVICE)
                        attention_mask = batch['attention_mask'].to(Config.DEVICE)
                        labels = batch['labels'].to(Config.DEVICE)

                        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                        loss = LossFunctions.label_smoothing_loss(
                            outputs.logits, labels, params['smoothing']
                        )

                        loss.backward()
                        optimizer.step()

                    # Evaluation
                    metrics = ModelEvaluator.evaluate_model(model, val_loader, mlb)
                    current_f1 = metrics['macro_f1']
                    best_f1 = max(best_f1, current_f1)

                    trial.report(current_f1, epoch)
                    if trial.should_prune():
                        raise optuna.TrialPruned()

                return best_f1

        except optuna.TrialPruned:
            raise
        except Exception as e:
            logging.error(f"Error in trial: {str(e)}")
            return float('-inf')

    @staticmethod
    def _get_trial_parameters(trial: Trial) -> Dict:
        params = {}
        try:
            for name, values in Config.OPTIM_PARAMS.items():
                params[name] = trial.suggest_categorical(name, values)
        except Exception as e:
            logging.error(f"Error getting trial parameters: {str(e)}")
            raise
        return params

    @staticmethod
    def optimize(df: pd.DataFrame, mlb: MultiLabelBinarizer, n_trials: int = 20) -> Dict:
        study = optuna.create_study(
            direction="maximize",
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.MedianPruner()
        )

        study.optimize(
            lambda trial: HyperparameterOptimizer.objective(trial, df, mlb),
            n_trials=n_trials
        )

        logging.info("\nOptimization Results:")
        logging.info(f"Best trial:")
        logging.info(f"  Value: {study.best_trial.value:.4f}")
        logging.info("  Params:")
        for key, value in study.best_trial.params.items():
            logging.info(f"    {key}: {value}")

        results_file = Config.METRICS_DIR / 'optuna_results.json'
        with open(results_file, 'w', encoding='utf-8') as f:
            json.dump({
                'best_trial': {
                    'number': study.best_trial.number,
                    'value': study.best_trial.value,
                    'params': study.best_trial.params
                },
                'all_trials': [{
                    'number': trial.number,
                    'value': trial.value,
                    'params': trial.params
                } for trial in study.trials if trial.value is not None]
            }, f, indent=4)

        return study.best_trial.params

In [10]:
class Args:
    def __init__(self):
        self.sample_size = None
        self.epochs = Config.MODEL_PARAMS['EPOCHS']
        self.batch_size = Config.MODEL_PARAMS['BATCH_SIZE']
        self.learning_rate = Config.MODEL_PARAMS['LEARNING_RATE']
        self.max_length = Config.MODEL_PARAMS['MAX_LENGTH']
        self.test_size = Config.MODEL_PARAMS['TEST_SIZE']
        self.weight_decay = Config.MODEL_PARAMS['WEIGHT_DECAY']
        self.mixup_prob = Config.MODEL_PARAMS['MIXUP_PROB']
        self.patience = Config.MODEL_PARAMS['PATIENCE']
        self.smoothing = Config.MODEL_PARAMS['SMOOTHING']
        self.n_trials = 20
        self.no_cuda = False
        self.seed = 42

def main():
    args = Args()
    
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    try:
        if not check_environment():
            raise RuntimeError("Environment check failed!")

        logging.info("Starting movie genre classification")
        logging.info(f"Using device: {Config.DEVICE}")

        df = DataProcessor.load_and_preprocess_data(Config.DATA_PATH, Config.SAMPLE_SIZE)
        mlb = MultiLabelBinarizer()

        # Hyperparameter optimization
        logging.info("\nStarting hyperparameter optimization...")
        best_params = HyperparameterOptimizer.optimize(df, mlb, args.n_trials)

        # Update config with best parameters
        for param, value in best_params.items():
            if param in Config.MODEL_PARAMS:
                Config.MODEL_PARAMS[param] = value
                logging.info(f"Updated {param}: {value}")

        # Final training with best parameters
        X_train, X_test, y_train, y_test = DataProcessor.prepare_data(df, mlb)
        
        with ModelManager(*ModelSetup.setup_model_and_tokenizer(len(mlb.classes_))) as (model, tokenizer):
            train_loader, val_loader = ModelSetup.setup_dataloaders(
                X_train, X_test, y_train, y_test,
                tokenizer, Config.MODEL_PARAMS['BATCH_SIZE']
            )

            history, best_metrics = ModelTrainer.train(
                model, train_loader, val_loader, mlb, Config.MODEL_PARAMS['EPOCHS']
            )

            # Save final results
            results = {
                'config': Config.MODEL_PARAMS,
                'best_metrics': best_metrics,
                'history': history,
                'classes': mlb.classes_.tolist()
            }

            with open(Config.METRICS_DIR / 'final_results.json', 'w') as f:
                json.dump(results, f, indent=4)

        logging.info("\nTraining completed successfully!")

    except Exception as e:
        logging.error(f"Training failed: {str(e)}")
        raise
    finally:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

if __name__ == "__main__":
    main()


GPU tersedia: Tesla T4
GPU Memory: 15.83 GB
2025-02-10 22:03:31,980 - INFO - Starting movie genre classification
2025-02-10 22:03:31,981 - INFO - Using device: cuda
2025-02-10 22:03:31,982 - INFO - Loading and preprocessing data...
2025-02-10 22:03:31,984 - INFO - Memory usage after start: 635.15 MB
2025-02-10 22:03:32,048 - INFO - Successfully loaded data using utf-8 encoding
2025-02-10 22:03:32,049 - INFO - Memory usage after data loading: 637.70 MB
2025-02-10 22:03:32,050 - INFO - Using full dataset with 1738 samples
2025-02-10 22:03:32,051 - INFO - 
Sample data:
2025-02-10 22:03:32,052 - INFO - 
Sample 1:
2025-02-10 22:03:32,057 - INFO - Synopsis: Setelah kematian yang tampak, Siena mampu melihat tanda-tanda bahwa orang-orang akan meninggal. Namu...
2025-02-10 22:03:32,057 - INFO - Genre: Horor
2025-02-10 22:03:32,058 - INFO - 
Sample 2:
2025-02-10 22:03:32,060 - INFO - Synopsis: Alfi (Al Ghazali) bertemu dengan Alana (Caitlin Halderman), seorang siswa baru di sekolahnya. Ternya...

100%|██████████| 1738/1738 [00:00<00:00, 14018.25it/s]

2025-02-10 22:03:32,205 - INFO - Calculating dataset statistics...
2025-02-10 22:03:32,208 - INFO - Analyzing genre combinations...





tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/229k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

2025-02-10 22:03:32,975 - INFO - Calculating text statistics...


Analyzing texts: 100%|██████████| 1738/1738 [00:00<00:00, 2841.14it/s]

2025-02-10 22:03:33,596 - INFO - 
Dataset Statistics:
2025-02-10 22:03:33,597 - INFO - Total samples: 1738
2025-02-10 22:03:33,598 - INFO - Unique genres: 5
2025-02-10 22:03:33,600 - INFO - Memory usage after preprocessing: 649.95 MB
2025-02-10 22:03:33,601 - INFO - 
Starting hyperparameter optimization...



[I 2025-02-10 22:03:33,603] A new study created in memory with name: no-name-d8832ea2-9a13-4b18-8461-49fea63a2406


2025-02-10 22:03:33,605 - INFO - 
Trial 0 Parameters:
2025-02-10 22:03:33,606 - INFO - batch_size: 16
2025-02-10 22:03:33,606 - INFO - learning_rate: 1e-05
2025-02-10 22:03:33,607 - INFO - weight_decay: 0.02
2025-02-10 22:03:33,608 - INFO - mixup_prob: 0.3
2025-02-10 22:03:33,608 - INFO - smoothing: 0.15
2025-02-10 22:03:33,617 - INFO - Setting up model and tokenizer...


pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:03:52,145 - INFO - Model and tokenizer setup completed
2025-02-10 22:03:52,147 - INFO - Setting up data loaders...
2025-02-10 22:03:52,148 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:03:52,151 - INFO - Created sampler with 1477 weights
2025-02-10 22:03:52,153 - INFO - Created data loaders with batch size 16
2025-02-10 22:03:52,154 - INFO - Training batches: 93
2025-02-10 22:03:52,155 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:01<00:00,  1.31s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.26it/s]


2025-02-10 22:06:03,814 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:07<00:00,  1.37s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.22it/s]


2025-02-10 22:08:21,053 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.40s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 22:10:41,434 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:10:41,893] Trial 0 finished with value: 0.46768909798416186 and parameters: {'batch_size': 16, 'learning_rate': 1e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 0 with value: 0.46768909798416186.


2025-02-10 22:10:41,895 - INFO - 
Trial 1 Parameters:
2025-02-10 22:10:41,896 - INFO - batch_size: 8
2025-02-10 22:10:41,897 - INFO - learning_rate: 3e-05
2025-02-10 22:10:41,897 - INFO - weight_decay: 0.01
2025-02-10 22:10:41,898 - INFO - mixup_prob: 0.2
2025-02-10 22:10:41,899 - INFO - smoothing: 0.15
2025-02-10 22:10:41,905 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:10:42,906 - INFO - Model and tokenizer setup completed
2025-02-10 22:10:42,907 - INFO - Setting up data loaders...
2025-02-10 22:10:42,908 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:10:42,911 - INFO - Created sampler with 1477 weights
2025-02-10 22:10:42,912 - INFO - Created data loaders with batch size 8
2025-02-10 22:10:42,913 - INFO - Training batches: 185
2025-02-10 22:10:42,913 - INFO - Validation batches: 33


Epoch 1/3: 100%|██████████| 185/185 [02:14<00:00,  1.38it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.33it/s]


2025-02-10 22:13:06,679 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 185/185 [02:14<00:00,  1.37it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.34it/s]


2025-02-10 22:15:30,639 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 185/185 [02:14<00:00,  1.38it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.33it/s]


2025-02-10 22:17:54,619 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:17:55,104] Trial 1 finished with value: 0.49078045350690325 and parameters: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 1 with value: 0.49078045350690325.


2025-02-10 22:17:55,106 - INFO - 
Trial 2 Parameters:
2025-02-10 22:17:55,107 - INFO - batch_size: 16
2025-02-10 22:17:55,108 - INFO - learning_rate: 2e-05
2025-02-10 22:17:55,109 - INFO - weight_decay: 0.01
2025-02-10 22:17:55,110 - INFO - mixup_prob: 0.3
2025-02-10 22:17:55,111 - INFO - smoothing: 0.1
2025-02-10 22:17:55,120 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:17:55,991 - INFO - Model and tokenizer setup completed
2025-02-10 22:17:55,992 - INFO - Setting up data loaders...
2025-02-10 22:17:55,993 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:17:55,996 - INFO - Created sampler with 1477 weights
2025-02-10 22:17:55,997 - INFO - Created data loaders with batch size 16
2025-02-10 22:17:55,998 - INFO - Training batches: 93
2025-02-10 22:17:55,999 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.40s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 22:20:16,018 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.18it/s]


2025-02-10 22:22:36,247 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.40s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 22:24:56,343 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:24:56,899] Trial 2 finished with value: 0.4930665075070929 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.3, 'smoothing': 0.1}. Best is trial 2 with value: 0.4930665075070929.


2025-02-10 22:24:56,902 - INFO - 
Trial 3 Parameters:
2025-02-10 22:24:56,903 - INFO - batch_size: 32
2025-02-10 22:24:56,904 - INFO - learning_rate: 3e-05
2025-02-10 22:24:56,905 - INFO - weight_decay: 0.02
2025-02-10 22:24:56,906 - INFO - mixup_prob: 0.3
2025-02-10 22:24:56,906 - INFO - smoothing: 0.15
2025-02-10 22:24:56,916 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:24:57,790 - INFO - Model and tokenizer setup completed
2025-02-10 22:24:57,790 - INFO - Setting up data loaders...
2025-02-10 22:24:57,791 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:24:57,793 - INFO - Created sampler with 1477 weights
2025-02-10 22:24:57,794 - INFO - Created data loaders with batch size 32
2025-02-10 22:24:57,795 - INFO - Training batches: 47
2025-02-10 22:24:57,795 - INFO - Validation batches: 9


Epoch 1/3: 100%|██████████| 47/47 [02:09<00:00,  2.74s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 22:27:16,394 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 22:29:35,061 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.16it/s]


2025-02-10 22:31:53,786 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:31:54,421] Trial 3 finished with value: 0.45861383892833524 and parameters: {'batch_size': 32, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 2 with value: 0.4930665075070929.


2025-02-10 22:31:54,423 - INFO - 
Trial 4 Parameters:
2025-02-10 22:31:54,424 - INFO - batch_size: 32
2025-02-10 22:31:54,425 - INFO - learning_rate: 2e-05
2025-02-10 22:31:54,427 - INFO - weight_decay: 0.02
2025-02-10 22:31:54,427 - INFO - mixup_prob: 0.3
2025-02-10 22:31:54,428 - INFO - smoothing: 0.15
2025-02-10 22:31:54,433 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:31:55,348 - INFO - Model and tokenizer setup completed
2025-02-10 22:31:55,350 - INFO - Setting up data loaders...
2025-02-10 22:31:55,351 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:31:55,353 - INFO - Created sampler with 1477 weights
2025-02-10 22:31:55,353 - INFO - Created data loaders with batch size 32
2025-02-10 22:31:55,354 - INFO - Training batches: 47
2025-02-10 22:31:55,355 - INFO - Validation batches: 9


Epoch 1/3: 100%|██████████| 47/47 [02:08<00:00,  2.74s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 22:34:13,949 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.16it/s]


2025-02-10 22:36:32,564 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 22:38:51,445 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:38:52,109] Trial 4 finished with value: 0.5059288514665811 and parameters: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.3, 'smoothing': 0.15}. Best is trial 4 with value: 0.5059288514665811.


2025-02-10 22:38:52,111 - INFO - 
Trial 5 Parameters:
2025-02-10 22:38:52,112 - INFO - batch_size: 32
2025-02-10 22:38:52,113 - INFO - learning_rate: 3e-05
2025-02-10 22:38:52,114 - INFO - weight_decay: 0.02
2025-02-10 22:38:52,114 - INFO - mixup_prob: 0.3
2025-02-10 22:38:52,115 - INFO - smoothing: 0.1
2025-02-10 22:38:52,121 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:38:53,021 - INFO - Model and tokenizer setup completed
2025-02-10 22:38:53,022 - INFO - Setting up data loaders...
2025-02-10 22:38:53,023 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:38:53,024 - INFO - Created sampler with 1477 weights
2025-02-10 22:38:53,026 - INFO - Created data loaders with batch size 32
2025-02-10 22:38:53,026 - INFO - Training batches: 47
2025-02-10 22:38:53,028 - INFO - Validation batches: 9


Epoch 1/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 22:41:11,763 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:41:12,425] Trial 5 pruned. 


2025-02-10 22:41:12,428 - INFO - 
Trial 6 Parameters:
2025-02-10 22:41:12,428 - INFO - batch_size: 16
2025-02-10 22:41:12,429 - INFO - learning_rate: 2e-05
2025-02-10 22:41:12,430 - INFO - weight_decay: 0.01
2025-02-10 22:41:12,431 - INFO - mixup_prob: 0.2
2025-02-10 22:41:12,431 - INFO - smoothing: 0.1
2025-02-10 22:41:12,437 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:41:13,667 - INFO - Model and tokenizer setup completed
2025-02-10 22:41:13,668 - INFO - Setting up data loaders...
2025-02-10 22:41:13,669 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:41:13,671 - INFO - Created sampler with 1477 weights
2025-02-10 22:41:13,673 - INFO - Created data loaders with batch size 16
2025-02-10 22:41:13,673 - INFO - Training batches: 93
2025-02-10 22:41:13,674 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 22:43:34,165 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 22:45:54,660 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.40s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 22:48:14,966 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:48:15,663] Trial 6 finished with value: 0.5081454955309341 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 6 with value: 0.5081454955309341.


2025-02-10 22:48:15,666 - INFO - 
Trial 7 Parameters:
2025-02-10 22:48:15,666 - INFO - batch_size: 32
2025-02-10 22:48:15,667 - INFO - learning_rate: 2e-05
2025-02-10 22:48:15,668 - INFO - weight_decay: 0.02
2025-02-10 22:48:15,668 - INFO - mixup_prob: 0.2
2025-02-10 22:48:15,669 - INFO - smoothing: 0.1
2025-02-10 22:48:15,674 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:48:16,575 - INFO - Model and tokenizer setup completed
2025-02-10 22:48:16,576 - INFO - Setting up data loaders...
2025-02-10 22:48:16,577 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:48:16,579 - INFO - Created sampler with 1477 weights
2025-02-10 22:48:16,581 - INFO - Created data loaders with batch size 32
2025-02-10 22:48:16,581 - INFO - Training batches: 47
2025-02-10 22:48:16,582 - INFO - Validation batches: 9


Epoch 1/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 22:50:35,222 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:50:35,912] Trial 7 pruned. 


2025-02-10 22:50:35,915 - INFO - 
Trial 8 Parameters:
2025-02-10 22:50:35,916 - INFO - batch_size: 8
2025-02-10 22:50:35,916 - INFO - learning_rate: 3e-05
2025-02-10 22:50:35,917 - INFO - weight_decay: 0.02
2025-02-10 22:50:35,919 - INFO - mixup_prob: 0.2
2025-02-10 22:50:35,920 - INFO - smoothing: 0.15
2025-02-10 22:50:35,925 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:50:36,840 - INFO - Model and tokenizer setup completed
2025-02-10 22:50:36,841 - INFO - Setting up data loaders...
2025-02-10 22:50:36,842 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:50:36,844 - INFO - Created sampler with 1477 weights
2025-02-10 22:50:36,845 - INFO - Created data loaders with batch size 8
2025-02-10 22:50:36,846 - INFO - Training batches: 185
2025-02-10 22:50:36,847 - INFO - Validation batches: 33


Epoch 1/3: 100%|██████████| 185/185 [02:14<00:00,  1.37it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.33it/s]


2025-02-10 22:53:00,838 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 185/185 [02:14<00:00,  1.37it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.32it/s]


2025-02-10 22:55:24,898 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 185/185 [02:14<00:00,  1.37it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.34it/s]


2025-02-10 22:57:48,812 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 22:57:49,453] Trial 8 finished with value: 0.5039305386522426 and parameters: {'batch_size': 8, 'learning_rate': 3e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.15}. Best is trial 6 with value: 0.5081454955309341.


2025-02-10 22:57:49,455 - INFO - 
Trial 9 Parameters:
2025-02-10 22:57:49,457 - INFO - batch_size: 32
2025-02-10 22:57:49,458 - INFO - learning_rate: 2e-05
2025-02-10 22:57:49,459 - INFO - weight_decay: 0.02
2025-02-10 22:57:49,459 - INFO - mixup_prob: 0.2
2025-02-10 22:57:49,460 - INFO - smoothing: 0.1
2025-02-10 22:57:49,465 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 22:57:50,375 - INFO - Model and tokenizer setup completed
2025-02-10 22:57:50,376 - INFO - Setting up data loaders...
2025-02-10 22:57:50,377 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 22:57:50,379 - INFO - Created sampler with 1477 weights
2025-02-10 22:57:50,380 - INFO - Created data loaders with batch size 32
2025-02-10 22:57:50,381 - INFO - Training batches: 47
2025-02-10 22:57:50,382 - INFO - Validation batches: 9


Epoch 1/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.16it/s]


2025-02-10 23:00:09,060 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 23:02:27,617 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 23:04:46,156 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:04:47,014] Trial 9 finished with value: 0.5077956186041643 and parameters: {'batch_size': 32, 'learning_rate': 2e-05, 'weight_decay': 0.02, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 6 with value: 0.5081454955309341.


2025-02-10 23:04:47,028 - INFO - 
Trial 10 Parameters:
2025-02-10 23:04:47,029 - INFO - batch_size: 16
2025-02-10 23:04:47,029 - INFO - learning_rate: 1e-05
2025-02-10 23:04:47,031 - INFO - weight_decay: 0.01
2025-02-10 23:04:47,031 - INFO - mixup_prob: 0.2
2025-02-10 23:04:47,033 - INFO - smoothing: 0.1
2025-02-10 23:04:47,037 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:04:47,954 - INFO - Model and tokenizer setup completed
2025-02-10 23:04:47,955 - INFO - Setting up data loaders...
2025-02-10 23:04:47,956 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:04:47,958 - INFO - Created sampler with 1477 weights
2025-02-10 23:04:47,959 - INFO - Created data loaders with batch size 16
2025-02-10 23:04:47,960 - INFO - Training batches: 93
2025-02-10 23:04:47,961 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:07:08,090 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:09:28,280 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:11:48,612 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:11:49,326] Trial 10 pruned. 


2025-02-10 23:11:49,332 - INFO - 
Trial 11 Parameters:
2025-02-10 23:11:49,332 - INFO - batch_size: 16
2025-02-10 23:11:49,333 - INFO - learning_rate: 2e-05
2025-02-10 23:11:49,333 - INFO - weight_decay: 0.01
2025-02-10 23:11:49,334 - INFO - mixup_prob: 0.2
2025-02-10 23:11:49,335 - INFO - smoothing: 0.1
2025-02-10 23:11:49,340 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:11:50,221 - INFO - Model and tokenizer setup completed
2025-02-10 23:11:50,222 - INFO - Setting up data loaders...
2025-02-10 23:11:50,223 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:11:50,225 - INFO - Created sampler with 1477 weights
2025-02-10 23:11:50,226 - INFO - Created data loaders with batch size 16
2025-02-10 23:11:50,226 - INFO - Training batches: 93
2025-02-10 23:11:50,227 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.40s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.18it/s]


2025-02-10 23:14:10,357 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:16:30,589 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.40s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:18:50,800 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:18:51,649] Trial 11 finished with value: 0.5063139944895106 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 6 with value: 0.5081454955309341.


2025-02-10 23:18:51,656 - INFO - 
Trial 12 Parameters:
2025-02-10 23:18:51,657 - INFO - batch_size: 32
2025-02-10 23:18:51,658 - INFO - learning_rate: 2e-05
2025-02-10 23:18:51,659 - INFO - weight_decay: 0.01
2025-02-10 23:18:51,660 - INFO - mixup_prob: 0.2
2025-02-10 23:18:51,661 - INFO - smoothing: 0.1
2025-02-10 23:18:51,667 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:18:52,567 - INFO - Model and tokenizer setup completed
2025-02-10 23:18:52,568 - INFO - Setting up data loaders...
2025-02-10 23:18:52,569 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:18:52,571 - INFO - Created sampler with 1477 weights
2025-02-10 23:18:52,572 - INFO - Created data loaders with batch size 32
2025-02-10 23:18:52,573 - INFO - Training batches: 47
2025-02-10 23:18:52,574 - INFO - Validation batches: 9


Epoch 1/3: 100%|██████████| 47/47 [02:09<00:00,  2.75s/it]
Evaluating: 100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


2025-02-10 23:21:11,500 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:21:12,321] Trial 12 pruned. 


2025-02-10 23:21:12,327 - INFO - 
Trial 13 Parameters:
2025-02-10 23:21:12,328 - INFO - batch_size: 16
2025-02-10 23:21:12,328 - INFO - learning_rate: 2e-05
2025-02-10 23:21:12,329 - INFO - weight_decay: 0.01
2025-02-10 23:21:12,329 - INFO - mixup_prob: 0.2
2025-02-10 23:21:12,330 - INFO - smoothing: 0.1
2025-02-10 23:21:12,336 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:21:13,193 - INFO - Model and tokenizer setup completed
2025-02-10 23:21:13,194 - INFO - Setting up data loaders...
2025-02-10 23:21:13,195 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:21:13,198 - INFO - Created sampler with 1477 weights
2025-02-10 23:21:13,199 - INFO - Created data loaders with batch size 16
2025-02-10 23:21:13,199 - INFO - Training batches: 93
2025-02-10 23:21:13,201 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:23:33,932 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:25:54,644 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:28:15,449 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:28:16,370] Trial 13 finished with value: 0.515370293250627 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-10 23:28:16,378 - INFO - 
Trial 14 Parameters:
2025-02-10 23:28:16,378 - INFO - batch_size: 16
2025-02-10 23:28:16,379 - INFO - learning_rate: 2e-05
2025-02-10 23:28:16,381 - INFO - weight_decay: 0.01
2025-02-10 23:28:16,382 - INFO - mixup_prob: 0.2
2025-02-10 23:28:16,384 - INFO - smoothing: 0.1
2025-02-10 23:28:16,388 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:28:17,343 - INFO - Model and tokenizer setup completed
2025-02-10 23:28:17,344 - INFO - Setting up data loaders...
2025-02-10 23:28:17,345 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:28:17,347 - INFO - Created sampler with 1477 weights
2025-02-10 23:28:17,348 - INFO - Created data loaders with batch size 16
2025-02-10 23:28:17,348 - INFO - Training batches: 93
2025-02-10 23:28:17,349 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:11<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:30:38,322 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:32:59,016 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:35:19,772 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:35:20,679] Trial 14 finished with value: 0.5070196760216448 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-10 23:35:20,687 - INFO - 
Trial 15 Parameters:
2025-02-10 23:35:20,688 - INFO - batch_size: 16
2025-02-10 23:35:20,689 - INFO - learning_rate: 2e-05
2025-02-10 23:35:20,690 - INFO - weight_decay: 0.01
2025-02-10 23:35:20,690 - INFO - mixup_prob: 0.2
2025-02-10 23:35:20,691 - INFO - smoothing: 0.1
2025-02-10 23:35:20,697 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:35:21,664 - INFO - Model and tokenizer setup completed
2025-02-10 23:35:21,665 - INFO - Setting up data loaders...
2025-02-10 23:35:21,666 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:35:21,669 - INFO - Created sampler with 1477 weights
2025-02-10 23:35:21,670 - INFO - Created data loaders with batch size 16
2025-02-10 23:35:21,671 - INFO - Training batches: 93
2025-02-10 23:35:21,671 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:37:42,409 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:40:03,052 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:11<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.18it/s]


2025-02-10 23:42:24,163 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:42:25,055] Trial 15 finished with value: 0.4946466454699829 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-10 23:42:25,063 - INFO - 
Trial 16 Parameters:
2025-02-10 23:42:25,064 - INFO - batch_size: 16
2025-02-10 23:42:25,064 - INFO - learning_rate: 1e-05
2025-02-10 23:42:25,065 - INFO - weight_decay: 0.01
2025-02-10 23:42:25,066 - INFO - mixup_prob: 0.2
2025-02-10 23:42:25,067 - INFO - smoothing: 0.1
2025-02-10 23:42:25,072 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:42:26,292 - INFO - Model and tokenizer setup completed
2025-02-10 23:42:26,293 - INFO - Setting up data loaders...
2025-02-10 23:42:26,294 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:42:26,296 - INFO - Created sampler with 1477 weights
2025-02-10 23:42:26,297 - INFO - Created data loaders with batch size 16
2025-02-10 23:42:26,298 - INFO - Training batches: 93
2025-02-10 23:42:26,299 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:44:46,590 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:44:47,456] Trial 16 pruned. 


2025-02-10 23:44:47,461 - INFO - 
Trial 17 Parameters:
2025-02-10 23:44:47,462 - INFO - batch_size: 16
2025-02-10 23:44:47,463 - INFO - learning_rate: 2e-05
2025-02-10 23:44:47,463 - INFO - weight_decay: 0.01
2025-02-10 23:44:47,464 - INFO - mixup_prob: 0.2
2025-02-10 23:44:47,465 - INFO - smoothing: 0.1
2025-02-10 23:44:47,470 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:44:48,383 - INFO - Model and tokenizer setup completed
2025-02-10 23:44:48,384 - INFO - Setting up data loaders...
2025-02-10 23:44:48,385 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:44:48,387 - INFO - Created sampler with 1477 weights
2025-02-10 23:44:48,388 - INFO - Created data loaders with batch size 16
2025-02-10 23:44:48,389 - INFO - Training batches: 93
2025-02-10 23:44:48,389 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:47:08,558 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:49:28,777 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 3/3: 100%|██████████| 93/93 [02:10<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.20it/s]


2025-02-10 23:51:49,033 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:51:50,092] Trial 17 finished with value: 0.5064335798437446 and parameters: {'batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'mixup_prob': 0.2, 'smoothing': 0.1}. Best is trial 13 with value: 0.515370293250627.


2025-02-10 23:51:50,099 - INFO - 
Trial 18 Parameters:
2025-02-10 23:51:50,100 - INFO - batch_size: 8
2025-02-10 23:51:50,100 - INFO - learning_rate: 2e-05
2025-02-10 23:51:50,101 - INFO - weight_decay: 0.01
2025-02-10 23:51:50,103 - INFO - mixup_prob: 0.2
2025-02-10 23:51:50,104 - INFO - smoothing: 0.1
2025-02-10 23:51:50,108 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:51:51,077 - INFO - Model and tokenizer setup completed
2025-02-10 23:51:51,078 - INFO - Setting up data loaders...
2025-02-10 23:51:51,079 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:51:51,081 - INFO - Created sampler with 1477 weights
2025-02-10 23:51:51,082 - INFO - Created data loaders with batch size 8
2025-02-10 23:51:51,083 - INFO - Training batches: 185
2025-02-10 23:51:51,084 - INFO - Validation batches: 33


Epoch 1/3: 100%|██████████| 185/185 [02:14<00:00,  1.38it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.33it/s]


2025-02-10 23:54:14,883 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


Epoch 2/3: 100%|██████████| 185/185 [02:14<00:00,  1.37it/s]
Evaluating: 100%|██████████| 33/33 [00:07<00:00,  4.34it/s]


2025-02-10 23:56:38,766 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:56:39,630] Trial 18 pruned. 


2025-02-10 23:56:39,637 - INFO - 
Trial 19 Parameters:
2025-02-10 23:56:39,638 - INFO - batch_size: 16
2025-02-10 23:56:39,639 - INFO - learning_rate: 1e-05
2025-02-10 23:56:39,640 - INFO - weight_decay: 0.01
2025-02-10 23:56:39,641 - INFO - mixup_prob: 0.2
2025-02-10 23:56:39,642 - INFO - smoothing: 0.1
2025-02-10 23:56:39,647 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:56:40,592 - INFO - Model and tokenizer setup completed
2025-02-10 23:56:40,593 - INFO - Setting up data loaders...
2025-02-10 23:56:40,594 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:56:40,596 - INFO - Created sampler with 1477 weights
2025-02-10 23:56:40,597 - INFO - Created data loaders with batch size 16
2025-02-10 23:56:40,598 - INFO - Training batches: 93
2025-02-10 23:56:40,599 - INFO - Validation batches: 17


Epoch 1/3: 100%|██████████| 93/93 [02:11<00:00,  1.41s/it]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.19it/s]


2025-02-10 23:59:01,072 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png


[I 2025-02-10 23:59:02,013] Trial 19 pruned. 


2025-02-10 23:59:02,014 - INFO - 
Optimization Results:
2025-02-10 23:59:02,015 - INFO - Best trial:
2025-02-10 23:59:02,016 - INFO -   Value: 0.5154
2025-02-10 23:59:02,017 - INFO -   Params:
2025-02-10 23:59:02,019 - INFO -     batch_size: 16
2025-02-10 23:59:02,019 - INFO -     learning_rate: 2e-05
2025-02-10 23:59:02,020 - INFO -     weight_decay: 0.01
2025-02-10 23:59:02,021 - INFO -     mixup_prob: 0.2
2025-02-10 23:59:02,021 - INFO -     smoothing: 0.1
2025-02-10 23:59:02,029 - INFO - Setting up model and tokenizer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-02-10 23:59:02,916 - INFO - Model and tokenizer setup completed
2025-02-10 23:59:02,917 - INFO - Setting up data loaders...
2025-02-10 23:59:02,918 - INFO - Creating weighted sampler for balanced batch sampling...
2025-02-10 23:59:02,920 - INFO - Created sampler with 1477 weights
2025-02-10 23:59:02,921 - INFO - Created data loaders with batch size 16
2025-02-10 23:59:02,922 - INFO - Training batches: 93
2025-02-10 23:59:02,923 - INFO - Validation batches: 17


Training Epoch 1: 100%|██████████| 93/93 [02:19<00:00,  1.50s/it, loss=1.3835, avg_loss=1.5325, lr=1.90e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.26it/s]


2025-02-11 00:01:31,882 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:01:32,546 - INFO - Model saved successfully to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/model/best_accuracy
2025-02-11 00:01:33,219 - INFO - Model saved successfully to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/model/best_loss
2025-02-11 00:01:33,220 - INFO - Epoch 1/20 - Time: 150.29s - Train Loss: 1.5325, Val Loss: 0.0518, Val F1: 0.4668, Val Precision: 0.3382, Val Recall: 0.8325
2025-02-11 00:01:34,526 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 2: 100%|██████████| 93/93 [02:19<00:00,  1.50s/it, loss=0.7769, avg_loss=1.3438, lr=1.80e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.27it/s]


2025-02-11 00:04:04,366 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:04:05,581 - INFO - Model saved successfully to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/model/best_accuracy
2025-02-11 00:04:05,583 - INFO - Epoch 2/20 - Time: 150.10s - Train Loss: 1.3438, Val Loss: 0.0525, Val F1: 0.4788, Val Precision: 0.3538, Val Recall: 0.7805
2025-02-11 00:04:06,979 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 3: 100%|██████████| 93/93 [02:19<00:00,  1.51s/it, loss=0.7898, avg_loss=1.1738, lr=1.70e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.27it/s]


2025-02-11 00:06:37,100 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:06:38,121 - INFO - Model saved successfully to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/model/best_accuracy
2025-02-11 00:06:38,124 - INFO - Epoch 3/20 - Time: 150.16s - Train Loss: 1.1738, Val Loss: 0.0565, Val F1: 0.4930, Val Precision: 0.3606, Val Recall: 0.7932
2025-02-11 00:06:39,580 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 4: 100%|██████████| 93/93 [02:20<00:00,  1.51s/it, loss=0.6067, avg_loss=1.0497, lr=1.60e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.27it/s]


2025-02-11 00:09:09,787 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:09:10,880 - INFO - Model saved successfully to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/model/best_accuracy
2025-02-11 00:09:10,881 - INFO - Epoch 4/20 - Time: 150.29s - Train Loss: 1.0497, Val Loss: 0.0657, Val F1: 0.5257, Val Precision: 0.3991, Val Recall: 0.8363
2025-02-11 00:09:12,322 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 5: 100%|██████████| 93/93 [02:20<00:00,  1.51s/it, loss=0.9857, avg_loss=0.9882, lr=1.50e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.27it/s]


2025-02-11 00:11:42,614 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:11:42,616 - INFO - Epoch 5/20 - Time: 149.28s - Train Loss: 0.9882, Val Loss: 0.0624, Val F1: 0.5051, Val Precision: 0.3871, Val Recall: 0.7478
2025-02-11 00:11:44,164 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 6: 100%|██████████| 93/93 [02:20<00:00,  1.51s/it, loss=1.8860, avg_loss=0.9523, lr=1.40e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.26it/s]


2025-02-11 00:14:14,688 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:14:14,689 - INFO - Epoch 6/20 - Time: 149.50s - Train Loss: 0.9523, Val Loss: 0.0692, Val F1: 0.4775, Val Precision: 0.3819, Val Recall: 0.7078
2025-02-11 00:14:16,176 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 7: 100%|██████████| 93/93 [02:20<00:00,  1.51s/it, loss=0.4812, avg_loss=0.8976, lr=1.30e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.26it/s]


2025-02-11 00:16:47,185 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:16:47,186 - INFO - Epoch 7/20 - Time: 149.98s - Train Loss: 0.8976, Val Loss: 0.0705, Val F1: 0.5053, Val Precision: 0.4084, Val Recall: 0.7064
2025-02-11 00:16:48,742 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 8: 100%|██████████| 93/93 [02:20<00:00,  1.51s/it, loss=1.2877, avg_loss=0.9759, lr=1.20e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.26it/s]


2025-02-11 00:19:19,807 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:19:19,809 - INFO - Epoch 8/20 - Time: 149.98s - Train Loss: 0.9759, Val Loss: 0.0714, Val F1: 0.5207, Val Precision: 0.4215, Val Recall: 0.7136
2025-02-11 00:19:21,361 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/training_history.png


Training Epoch 9: 100%|██████████| 93/93 [02:21<00:00,  1.52s/it, loss=0.4953, avg_loss=0.8092, lr=1.10e-05]
Evaluating: 100%|██████████| 17/17 [00:07<00:00,  2.27it/s]


2025-02-11 00:21:52,781 - INFO - Training history plots saved to /kaggle/working/genrematics-optuna-app/logs/experiments/20250210_220331/plots/confusion_matrices/confusion_matrix_Romantis.png
2025-02-11 00:21:52,783 - INFO - Early stopping triggered after 9 epochs
2025-02-11 00:21:52,784 - INFO - Training completed in 0:22:49.858228
2025-02-11 00:21:53,935 - INFO - 
Training completed successfully!
