In [None]:
# ============================================================================
# SECTION 1: INSTALLATION AND IMPORTS
# ============================================================================

# Install required packages
!pip install -q transformers datasets accelerate scikit-learn pandas numpy matplotlib seaborn emoji
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q scipy openpyxl xlsxwriter

print("✓ Packages installed successfully!")

In [None]:
# ============================================================================
# SECTION 2: IMPORTS
# ============================================================================

import warnings
warnings.filterwarnings('ignore')

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import time
import re
import emoji
import json
from collections import Counter

# Sklearn imports
from sklearn.model_selection import ParameterGrid, train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix,
    classification_report
)

# Scipy for random distributions
from scipy.stats import uniform, randint

# Hugging Face imports
from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)

# Set random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RANDOM_SEED)

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n{'='*80}")
print(f"SYSTEM CONFIGURATION")
print(f"{'='*80}")
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("WARNING: No GPU detected. Training will be very slow!")
print(f"Random Seed: {RANDOM_SEED}")
print(f"{'='*80}\n")


This initial cell prepares the execution environment by importing all essential libraries and establishing key configuration settings necessary for the entire machine learning workflow. It imports core scientific computing libraries: PyTorch (torch) for tensor operations, Pandas (pd) and NumPy (np) for data manipulation, and Matplotlib (plt) and Seaborn (sns) for data visualization. Specialized tools for Natural Language Processing (NLP) are loaded from the Hugging Face transformers library, including AutoTokenizer and the Trainer class for model implementation and training management. Furthermore, the Scikit-learn (sklearn) library provides utilities for experiment control, such as ParameterGrid for hyperparameter searching and train_test_split for dividing the data, along with essential performance evaluation metrics like confusion_matrix and F1 score. Together, these imports establish a robust framework for data processing, model training, and performance analysis.

In [None]:

# ============================================================================
# SECTION 3: DATA LOADING AND PREPROCESSING
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 3: DATA LOADING AND PREPROCESSING")
print(f"{'='*80}\n")

def clean_taglish_text(text):
    """Clean and normalize Taglish text"""
    if pd.isna(text) or text == "":
        return ""

    text = str(text)

    # Handle emojis
    text = emoji.demojize(text)

    # Remove URLs
    text = re.sub(r'http\S+|www\S+', '', text)

    # Handle repeated characters (e.g., "sooooo" -> "soo")
    text = re.sub(r'(.)\1{3,}', r'\1\1', text)

    # Handle excessive punctuation
    text = re.sub(r'[!?]{2,}', '!', text)
    text = re.sub(r'\.{2,}', '.', text)

    # Remove extra whitespaces
    text = ' '.join(text.split())

    return text.strip()

def preprocess_dataset(dataset):
    """Preprocess the entire dataset"""
    processed_data = []

    for item in dataset:
        # Find text field (dataset uses 'review')
        text = None
        for field in ['review', 'text', 'review_text', 'content']:
            if field in item:
                text = item[field]
                break

        # Find label field (dataset uses 'sentiment')
        label = None
        for field in ['sentiment', 'label', 'sentiment_label']:
            if field in item:
                label = item[field]
                break

        if text and label is not None:
            cleaned_text = clean_taglish_text(text)

            # Skip empty texts
            if not cleaned_text:
                continue

            # Convert sentiment labels (1-4) to binary complaint labels
            # 1 = Negative → Complaint (1)
            # 2 = Neutral  → Non-Complaint (0)
            # 3 = Positive → Non-Complaint (0)
            # 4 = Mixed    → Complaint (1)
            try:
                label = int(label)
                if label in [1, 4]:  # Negative or Mixed → Complaint
                    binary_label = 1
                elif label in [2, 3]:  # Neutral or Positive → Non-Complaint
                    binary_label = 0
                else:
                    binary_label = 0
            except:
                # If labels are strings
                binary_label = 1 if str(label).lower() in ['negative', 'mixed', '1', '4'] else 0

            processed_data.append({
                'text': cleaned_text,
                'label': binary_label
            })

    return processed_data

# Load the SentiTaglish dataset from HuggingFace
print("Loading SentiTaglish Products & Services dataset...")
print("Source: ccosme/SentiTaglishProductsAndServices\n")

try:
    dataset = load_dataset("ccosme/SentiTaglishProductsAndServices")
    print("✓ Dataset loaded successfully from HuggingFace!")
    print(f"  Available splits: {list(dataset.keys())}")

    if 'train' in dataset:
        print(f"  Total samples: {len(dataset['train'])}")

except Exception as e:
    print(f"✗ Error loading dataset: {e}")
    print("\nPlease check your internet connection or try again later.")
    raise

# Preprocess the data
print("\nPreprocessing data...")
if 'train' in dataset:
    processed_data = preprocess_dataset(dataset['train'])
else:
    # Fallback if structure is different
    processed_data = preprocess_dataset(dataset)

print(f"✓ Processed {len(processed_data)} samples")

# Convert to DataFrame
df = pd.DataFrame(processed_data)

# Check label distribution
label_counts = df['label'].value_counts()
print(f"\nLabel Distribution:")
print(f"  Complaints (1): {label_counts.get(1, 0)} ({label_counts.get(1, 0)/len(df)*100:.1f}%)")
print(f"  Non-complaints (0): {label_counts.get(0, 0)} ({label_counts.get(0, 0)/len(df)*100:.1f}%)")

# Split data: 70% train, 15% validation, 15% test
print(f"\nSplitting data (70% train, 15% val, 15% test)...")

# First split: separate test set (15%)
train_val_df, test_df = train_test_split(
    df,
    test_size=0.15,
    stratify=df['label'],
    random_state=RANDOM_SEED
)

# Second split: separate validation from training (15% of total ≈ 17.6% of remaining)
train_df, val_df = train_test_split(
    train_val_df,
    test_size=0.176,  # 0.176 ≈ 15% of original dataset
    stratify=train_val_df['label'],
    random_state=RANDOM_SEED
)

print(f"✓ Data split completed:")
print(f"  Training set: {len(train_df)} samples ({len(train_df)/len(df)*100:.1f}%)")
print(f"  Validation set: {len(val_df)} samples ({len(val_df)/len(df)*100:.1f}%)")
print(f"  Test set: {len(test_df)} samples ({len(test_df)/len(df)*100:.1f}%)")

# Verify stratification
print(f"\nVerifying stratification:")
for name, split_df in [("Train", train_df), ("Validation", val_df), ("Test", test_df)]:
    complaint_ratio = split_df['label'].mean()
    print(f"  {name:12s}: {complaint_ratio:.2%} complaints")

# Display sample reviews
print(f"\nSample reviews:")
for i in range(3):
    sample = train_df.iloc[i]
    label_text = "COMPLAINT" if sample['label'] == 1 else "NON-COMPLAINT"
    print(f"\n  [{label_text}] {sample['text'][:100]}...")

print(f"\n✓ Data preparation complete!")

This cell defines the clean_taglish_text function for necessary text normalization. Such as removing URLs and standardizing emojis and punctuation and the preprocess_dataset function, which loads the SentiTaglish data and performs a crucial binary classification conversion where Negative (1) and Mixed (4) sentiment reviews are re-mapped to Complaint (1), while Neutral (2) and Positive (3) become Non-Complaint (0), thus re-framing the task. After the data is converted to a DataFrame, the label distribution is confirmed, and the data is finally split into approximately 70% training, 15% validation, and 15% testing sets using stratification to ensure the complaint/non-complaint ratios are identical across all subsets.

In [None]:
# ============================================================================
# SECTION 4: DEFINE HYPERPARAMETER SEARCH SPACES
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 4: DEFINING REDUCED HYPERPARAMETER SEARCH SPACES")
print(f"{'='*80}\n")

# *** NOTE: per_device_train_batch_size is REMOVED from the search ***
# *** A fixed batch size of 16 will be used instead ***

# Grid Search - A much smaller, pruned space
grid_search_space = {
    'learning_rate': [1e-5, 2e-5, 3e-5],        # (3 options) - Still important to test
    'num_train_epochs': [2, 3],                # (2 options) - Pruned from [2, 3, 4]
    'warmup_steps': [100],                     # (1 option)  - Pruned from [50, 100, 200]
    'weight_decay': [0.01, 0.1]                # (2 options) - Pruned from [0.01, 0.05, 0.1]
}
# Total combinations: 3 * 2 * 1 * 2 = 12

# Random Search - A comparable space for a fair comparison
random_search_space = {
    'learning_rate': uniform(1e-5, 4e-5),  # Uniform dist roughly covering the grid's range
    'num_train_epochs': [2, 3],             # Discrete choice matching the grid
    'warmup_steps': randint(50, 150),       # Random ints in a sensible, narrow range
    'weight_decay': uniform(0.01, 0.1)      # Uniform dist matching the grid's range
}

total_grid_combinations = len(list(ParameterGrid(grid_search_space)))
n_random_iterations = total_grid_combinations # We will match the grid count

print(f"Grid Search Configuration (Reduced):")
print(f"  Parameter space: {grid_search_space}")
print(f"  Total combinations: {total_grid_combinations}")
print(f"  Estimated time (at ~2-3 min/exp): {total_grid_combinations * 2:.0f}-{total_grid_combinations * 3:.0f} minutes")

print(f"\nRandom Search Configuration (Reduced):")
print(f"  Parameter distributions: (see code)")
print(f"  Number of samples: {n_random_iterations} (matching Grid Search for fair comparison)")
print(f"  Estimated time (at ~2-3 min/exp): {n_random_iterations * 2:.0f}-{n_random_iterations * 3:.0f} minutes")

print(f"\n{'='*80}")
print(f"TOTAL ESTIMATED RUNTIME: {(total_grid_combinations + n_random_iterations) * 2 / 60:.1f} - {(total_grid_combinations + n_random_iterations) * 3 / 60:.1f} hours")
print(f"{'='*80}\n")
print(f"✓ Search spaces defined!")


This cell establishes the hyperparameter search configurations for both Grid Search and Random Search optimization strategies, operating under a fixed training batch size of 16 to reduce complexity and allow for a more direct comparison between the two methods. The Grid Search space is deliberately restricted to 12 total combinations for efficiency, discretely sampling the learning_rate (1e-5, 2e-5, 3e-5), the num_train_epochs (2, 3), a fixed warmup_steps of 100, and the weight_decay (0.01, 0.1). To ensure a fair comparison, the Random Search space is configured to run for the same number of iterations (12), but it samples continuous hyperparameters from statistical distributions defined by scipy.stats, specifically using uniform distributions for learning_rate and weight_decay, while sampling warmup_steps from a randint distribution and keeping num_train_epochs as a discrete choice. The cell concludes by calculating and printing the total number of experiments and providing a practical estimate of the total runtime for the combined optimization process, setting expectations for the forthcoming model training phase.

In [None]:
# ============================================================================
# SECTION 5: HELPER FUNCTIONS
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 5: DEFINING HELPER FUNCTIONS")
print(f"{'='*80}\n")

# *** THIS IS THE FIXED BATCH SIZE THAT WILL BE USED FOR ALL EXPERIMENTS ***
FIXED_BATCH_SIZE = 16

def compute_metrics(eval_pred):
    """Compute evaluation metrics for the model"""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='macro'
    )

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

def prepare_dataset_for_transformer(df, tokenizer, max_length=128):
    """Prepare dataset for transformer training"""
    def tokenize_function(examples):
        return tokenizer(
            examples['text'],
            padding='max_length',
            truncation=True,
            max_length=max_length
        )

    # Convert DataFrame to HuggingFace Dataset
    dataset = Dataset.from_pandas(df.reset_index(drop=True))

    # Tokenize
    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    # Rename label column to labels (required by Trainer)
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

    # Set format for PyTorch
    tokenized_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

    return tokenized_dataset

def train_and_evaluate_model(params, train_df, val_df, test_df, tokenizer,
                             experiment_id, search_type, total_experiments):
    """
    Train model with given hyperparameters and return comprehensive results
    """
    print(f"\n{'='*80}")
    print(f"{search_type} - Experiment {experiment_id}/{total_experiments}")
    print(f"{'='*80}")
    print(f"Hyperparameters:")
    # Print the fixed batch size
    print(f"  {'per_device_train_batch_size':30s}: {FIXED_BATCH_SIZE} (Fixed)")
    # Print the hyperparameters being tuned
    for key, value in params.items():
        if isinstance(value, float) and value < 0.001:
            print(f"  {key:30s}: {value:.2e}")
        else:
            print(f"  {key:30s}: {value}")

    start_time = time.time()

    try:
        # Load fresh model for each experiment
        print(f"\nLoading model...")
        model = AutoModelForSequenceClassification.from_pretrained(
            "jcblaise/roberta-tagalog-base",
            num_labels=2
        ).to(device)

        # Prepare datasets
        print(f"Preparing datasets...")
        train_dataset = prepare_dataset_for_transformer(train_df, tokenizer, max_length=128)
        val_dataset = prepare_dataset_for_transformer(val_df, tokenizer, max_length=128)
        test_dataset = prepare_dataset_for_transformer(test_df, tokenizer, max_length=128)

        # Training arguments
        training_args = TrainingArguments(
            output_dir=f"./results/{search_type.lower().replace(' ', '_')}/exp_{experiment_id}",
            num_train_epochs=params['num_train_epochs'],
            per_device_train_batch_size=FIXED_BATCH_SIZE, # Use fixed batch size
            per_device_eval_batch_size=8,
            learning_rate=params['learning_rate'],
            warmup_steps=params['warmup_steps'],
            weight_decay=params['weight_decay'],
            logging_dir=f'./logs/{search_type.lower()}/exp_{experiment_id}',
            logging_steps=50,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            greater_is_better=True,
            fp16=torch.cuda.is_available(),
            report_to="none",
            seed=RANDOM_SEED,
            save_total_limit=1,  # Only keep best model to save space
            disable_tqdm=False
        )

        # Create trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
        )

        # Train
        print(f"\nStarting training...")
        train_result = trainer.train()
        training_time = time.time() - start_time

        # Evaluate on validation set
        print(f"Evaluating on validation set...")
        val_results = trainer.evaluate(eval_dataset=val_dataset)

        # Evaluate on test set
        print(f"Evaluating on test set...")
        test_results = trainer.evaluate(eval_dataset=test_dataset)

        # Compile results
        result = {
            'search_type': search_type,
            'experiment_id': experiment_id,
            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),

            # Hyperparameters
            'learning_rate': params['learning_rate'],
            'batch_size': FIXED_BATCH_SIZE, # Log the fixed batch size
            'epochs': params['num_train_epochs'],
            'warmup_steps': params['warmup_steps'],
            'weight_decay': params['weight_decay'],

            # Training metrics
            'training_time_seconds': training_time,
            'training_time_minutes': training_time / 60,
            'training_time_hours': training_time / 3600,
            'total_train_steps': train_result.global_step,
            'train_loss': train_result.training_loss,

            # Validation metrics
            'val_loss': val_results['eval_loss'],
            'val_accuracy': val_results['eval_accuracy'],
            'val_precision': val_results['eval_precision'],
            'val_recall': val_results['eval_recall'],
            'val_f1': val_results['eval_f1'],

            # Test metrics
            'test_loss': test_results['eval_loss'],
            'test_accuracy': test_results['eval_accuracy'],
            'test_precision': test_results['eval_precision'],
            'test_recall': test_results['eval_recall'],
            'test_f1': test_results['eval_f1'],

            # Status
            'status': 'SUCCESS'
        }

        print(f"\n{'='*80}")
        print(f"✓ Experiment {experiment_id} completed successfully!")
        print(f"  Training time: {training_time/60:.2f} minutes")
        print(f"  Validation F1: {val_results['eval_f1']:.4f}")
        print(f"  Test F1: {test_results['eval_f1']:.4f}")
        print(f"{'='*80}")

        # Clean up to save memory
        del model
        del trainer
        del train_dataset
        del val_dataset
        del test_dataset
        torch.cuda.empty_cache()

        return result

    except Exception as e:
        error_time = time.time() - start_time
        print(f"\n{'='*80}")
        print(f"✗ Experiment {experiment_id} FAILED")
        print(f"  Error: {str(e)}")
        print(f"  Time before failure: {error_time/60:.2f} minutes")
        print(f"{'='*80}")

        result = {
            'search_type': search_type,
            'experiment_id': experiment_id,
            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'learning_rate': params['learning_rate'],
            'batch_size': FIXED_BATCH_SIZE,
            'epochs': params['num_train_epochs'],
            'warmup_steps': params['warmup_steps'],
            'weight_decay': params['weight_decay'],
            'training_time_seconds': error_time,
            'training_time_minutes': error_time / 60,
            'training_time_hours': error_time / 3600,
            'status': 'FAILED',
            'error_message': str(e)[:500]  # Limit error message length
        }

        # Clean up
        torch.cuda.empty_cache()

        return result

print("✓ Helper functions defined!")

In [None]:
# ============================================================================
# SECTION 6: GRID SEARCH IMPLEMENTATION
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 6: GRID SEARCH IMPLEMENTATION")
print(f"{'='*80}\n")

def run_grid_search(train_df, val_df, test_df, tokenizer):
    """
    Exhaustive Grid Search over all parameter combinations
    """
    print(f"{'='*80}")
    print(f"STARTING GRID SEARCH")
    print(f"{'='*80}\n")

    # Generate all parameter combinations
    param_grid = list(ParameterGrid(grid_search_space))
    total_experiments = len(param_grid)

    print(f"Configuration:")
    print(f"  Total combinations: {total_experiments}")
    print(f"  Estimated time: {total_experiments * 2:.0f}-{total_experiments * 3:.0f} minutes")
    print(f"  (This is now much faster!)\n")

    user_input = input(f"Proceed with {total_experiments} experiments? (yes/no): ")
    if user_input.lower() not in ['yes', 'y']:
        print("Grid Search cancelled.")
        return [], 0

    grid_results = []
    overall_start = time.time()

    for idx, params in enumerate(param_grid, 1):
        result = train_and_evaluate_model(
            params=params,
            train_df=train_df,
            val_df=val_df,
            test_df=test_df,
            tokenizer=tokenizer,
            experiment_id=idx,
            search_type="Grid Search",
            total_experiments=total_experiments
        )

        grid_results.append(result)

        # Save intermediate results after each experiment
        intermediate_df = pd.DataFrame(grid_results)
        intermediate_df.to_excel('grid_search_results_intermediate.xlsx', index=False)

        # Progress update
        elapsed = time.time() - overall_start
        avg_time = elapsed / idx
        remaining_experiments = total_experiments - idx
        eta = remaining_experiments * avg_time

        print(f"\nProgress Summary:")
        print(f"  Completed: {idx}/{total_experiments} ({idx/total_experiments*100:.1f}%)")
        print(f"  Elapsed time: {elapsed/60:.1f} minutes")
        print(f"  Avg time/experiment: {avg_time/60:.1f} minutes")
        print(f"  ETA: {eta/60:.1f} minutes")

        # Show best so far
        if idx > 0:
            successful = [r for r in grid_results if r.get('status') == 'SUCCESS']
            if successful:
                best_so_far = max(successful, key=lambda x: x.get('test_f1', 0))
                print(f"  Best F1 so far: {best_so_far.get('test_f1', 0):.4f} (Experiment #{best_so_far['experiment_id']})")

    total_time = time.time() - overall_start

    print(f"\n{'='*80}")
    print(f"GRID SEARCH COMPLETED")
    print(f"{'='*80}")
    print(f"Total time: {total_time/3600:.2f} hours ({total_time/60:.2f} minutes)")
    print(f"Average time per experiment: {total_time/total_experiments/60:.2f} minutes")
    print(f"Success rate: {sum(1 for r in grid_results if r.get('status')=='SUCCESS')}/{total_experiments}")
    print(f"{'='*80}\n")

    return grid_results, total_time

print("✓ Grid Search function defined!")

This cell defines the run_grid_search function, which systematically executes the Grid Search hyperparameter optimization process by exhaustively testing every predefined combination of hyperparameters from the grid search space, using a tool called ParameterGrid to generate all combinations. The function requires a user confirmation via input before starting the process. Inside the loop, an external function called train_and_evaluate_model is called for each combination. Critically, after each experiment, the results are immediately saved to an intermediate Excel file (grid_search_results_intermediate.xlsx) for fault tolerance and progress tracking. The function also provides a detailed progress summary, including elapsed time, estimated time remaining (ETA), and the performance of the best F1 score found so far, concluding with a final summary of the total execution time and success rate.

In [None]:
# ============================================================================
# SECTION 7: RANDOM SEARCH IMPLEMENTATION
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 7: RANDOM SEARCH IMPLEMENTATION")
print(f"{'='*80}\n")

def sample_random_params(search_space, n_samples=20):
    """
    Sample random parameter combinations from distributions
    """
    random_params = []

    for _ in range(n_samples):
        params = {}
        for key, value in search_space.items():
            if isinstance(value, list):
                # Discrete choice
                params[key] = np.random.choice(value)
            elif hasattr(value, 'rvs'):  # scipy distribution
                sampled = value.rvs()
                if key in ['learning_rate', 'weight_decay']:
                    params[key] = float(sampled)
                else:
                    params[key] = int(sampled)
            else:
                params[key] = value

        random_params.append(params)

    return random_params

def run_random_search(train_df, val_df, test_df, tokenizer, n_iterations=20):
    """
    Random Search with specified number of iterations
    """
    print(f"{'='*80}")
    print(f"STARTING RANDOM SEARCH")
    print(f"{'='*80}\n")

    # Sample random parameter combinations
    random_params = sample_random_params(random_search_space, n_samples=n_iterations)

    print(f"Configuration:")
    print(f"  Random samples: {n_iterations}")
    print(f"  Estimated time: {n_iterations * 2:.0f}-{n_iterations * 3:.0f} minutes\n")

    user_input = input(f"Proceed with {n_iterations} random experiments? (yes/no): ")
    if user_input.lower() not in ['yes', 'y']:
        print("Random Search cancelled.")
        return [], 0

    random_results = []
    overall_start = time.time()

    for idx, params in enumerate(random_params, 1):
        result = train_and_evaluate_model(
            params=params,
            train_df=train_df,
            val_df=val_df,
            test_df=test_df,
            tokenizer=tokenizer,
            experiment_id=idx,
            search_type="Random Search",
            total_experiments=n_iterations
        )

        random_results.append(result)

        # Save intermediate results
        intermediate_df = pd.DataFrame(random_results)
        intermediate_df.to_excel('random_search_results_intermediate.xlsx', index=False)

        # Progress update
        elapsed = time.time() - overall_start
        avg_time = elapsed / idx
        remaining_experiments = n_iterations - idx
        eta = remaining_experiments * avg_time

        print(f"\nProgress Summary:")
        print(f"  Completed: {idx}/{n_iterations} ({idx/n_iterations*100:.1f}%)")
        print(f"  Elapsed time: {elapsed/60:.1f} minutes")
        print(f"  Avg time/experiment: {avg_time/60:.1f} minutes")
        print(f"  ETA: {eta/60:.1f} minutes")

        # Show best so far
        if idx > 0:
            successful = [r for r in random_results if r.get('status') == 'SUCCESS']
            if successful:
                best_so_far = max(successful, key=lambda x: x.get('test_f1', 0))
                print(f"  Best F1 so far: {best_so_far.get('test_f1', 0):.4f} (Experiment #{best_so_far['experiment_id']})")

    total_time = time.time() - overall_start

    print(f"\n{'='*80}")
    print(f"RANDOM SEARCH COMPLETED")
    print(f"{'='*80}")
    print(f"Total time: {total_time/3600:.2f} hours ({total_time/60:.2f} minutes)")
    print(f"Average time per experiment: {total_time/n_iterations/60:.2f} minutes")
    print(f"Success rate: {sum(1 for r in random_results if r.get('status')=='SUCCESS')}/{n_iterations}")
    print(f"{'='*80}\n")

    return random_results, total_time

print("✓ Random Search function defined!")

This cell defines two functions to implement the Random Search hyperparameter optimization. The sample_random_params function first takes the defined random search space and randomly selects a specified number of parameter combinations; for discrete settings (like epochs) it picks from a list, and for continuous settings (like learning rate) it samples a floating-point number from the defined scipy distributions. The run_random_search function then executes the optimization by iterating over these randomly sampled parameters, calling the external train_and_evaluate_model for each run. Similar to the Grid Search function, it requires user input to confirm the start, saves all intermediate results to an Excel file (random_search_results_intermediate.xlsx) after every experiment for progress safety, and provides a detailed summary of the progress, best F1 score so far, and the final total execution time.

In [None]:
# ============================================================================
# SECTION 8: LOAD TOKENIZER
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 8: LOADING TOKENIZER")
print(f"{'='*80}\n")

model_name = "jcblaise/roberta-tagalog-base"
print(f"Loading tokenizer: {model_name}")

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add padding token if needed
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    print("  Added padding token")

print(f"✓ Tokenizer loaded successfully!")
print(f"  Vocabulary size: {len(tokenizer)}")
print(f"  Model max length: {tokenizer.model_max_length}")

This cell is responsible for initializing the tokenizer that will convert the cleaned Taglish text into a format the model can understand. The model name chosen is jcblaise/roberta-tagalog-base, which is an adapted RoBERTa model for the Tagalog language. The AutoTokenizer.from_pretrained function automatically downloads the necessary vocabulary and rules associated with the specified model_name. Crucially, the code checks if a padding token (pad_token) is already defined and, if not, assigns the end-of-sequence token (eos_token) as the padding token; this is necessary because all input texts must be padded to the same length for batch processing during model training. The cell concludes by printing confirmation of the successful load and displaying key tokenizer properties, such as the vocabulary size and the model's maximum input length.

In [None]:
# ============================================================================
# SECTION 9: EXECUTE EXPERIMENTS
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 9: EXECUTING HYPERPARAMETER OPTIMIZATION")
print(f"{'='*80}\n")

print("IMPORTANT NOTES:")
print("  - This will take approx 1-1.5 hours to complete")
print("  - Results are saved after each experiment")
print("  - You can stop and resume if needed (check intermediate files)")
print("  - Make sure you have stable internet connection")
print("  - Don't close this notebook during execution\n")

# Run Grid Search
print("\n" + "="*80)
print("PHASE 1: GRID SEARCH")
print("="*80 + "\n")

grid_results, grid_time = run_grid_search(train_df, val_df, test_df, tokenizer)

# Run Random Search (same number of iterations for fair comparison)
print("\n" + "="*80)
print("PHASE 2: RANDOM SEARCH")
print("="*80 + "\n")

# Use the number of grid combinations (12) as the number of random iterations
n_random = total_grid_combinations
if n_random == 0:
    print("Grid search was skipped or failed. Setting Random Search to 12 iterations.")
    n_random = 12 # Fallback just in case

random_results, random_time = run_random_search(train_df, val_df, test_df, tokenizer, n_iterations=n_random)

This cell is the main execution block for the hyperparameter optimization, starting with the Grid Search (Phase 1) and immediately followed by the Random Search (Phase 2). Before execution, the cell prints important warnings regarding the estimated 1 to 1.5 hour runtime, the importance of a stable internet connection, and the feature of saving intermediate results for potential resumption. The run_grid_search function is called first to exhaustively test all combinations, and the resulting run time (grid_time) and results (grid_results) are captured. Following this, the run_random_search function is called, explicitly using the same number of iterations as the Grid Search (total_grid_combinations) to ensure a fair comparison of the two search methodologies, capturing its respective results (random_results) and time (random_time).

In [3]:
# ============================================================================
# SECTION 10: COMBINE AND SAVE RESULTS
# ============================================================================

print(f"\n{'='*80}")
print(f"SECTION 10: SAVING FINAL RESULTS")
print(f"{'='*80}\n")

# Combine results
all_results = grid_results + random_results
results_df = pd.DataFrame(all_results)

# Save comprehensive results
results_df.to_excel('hyperparameter_optimization_results_FINAL.xlsx', index=False)
results_df.to_csv('hyperparameter_optimization_results_FINAL.csv', index=False)

print(f"✓ Results saved:")
print(f"  - hyperparameter_optimization_results_FINAL.xlsx")
print(f"  - hyperparameter_optimization_results_FINAL.csv")
print(f"  - grid_search_results_intermediate.xlsx")
print(f"  - random_search_results_intermediate.xlsx")

print(f"\n✓ ALL EXPERIMENTS COMPLETED!")
print(f"\nTotal experiments: {len(all_results)}")
print(f"Total time: {(grid_time + random_time)/3600:.2f} hours")

✓ Packages installed successfully!

SYSTEM CONFIGURATION
Device: cuda
GPU: Tesla T4
GPU Memory: 15.83 GB
Random Seed: 42


SECTION 3: DATA LOADING AND PREPROCESSING

Loading SentiTaglish Products & Services dataset...
Source: ccosme/SentiTaglishProductsAndServices

✓ Dataset loaded successfully from HuggingFace!
  Available splits: ['train']
  Total samples: 10510

Preprocessing data...
✓ Processed 10510 samples

Label Distribution:
  Complaints (1): 6805 (64.7%)
  Non-complaints (0): 3705 (35.3%)

Splitting data (70% train, 15% val, 15% test)...
✓ Data split completed:
  Training set: 7360 samples (70.0%)
  Validation set: 1573 samples (15.0%)
  Test set: 1577 samples (15.0%)

Verifying stratification:
  Train       : 64.76% complaints
  Validation  : 64.72% complaints
  Test        : 64.74% complaints

Sample reviews:

  [COMPLAINT] maganda xa kung sa maganda. kaya lang na dismaya tlga aq. kc ung enexpect ko na magagamit ko kahit w...

  [COMPLAINT] ang dami ko order 6pcs bench brief

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2472,0.281869,0.889383,0.887344,0.867418,0.87587
2,0.2096,0.286309,0.895741,0.896403,0.872739,0.882558


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 1 completed successfully!
  Training time: 2.53 minutes
  Validation F1: 0.8826
  Test F1: 0.9025

Progress Summary:
  Completed: 1/12 (8.3%)
  Elapsed time: 2.7 minutes
  Avg time/experiment: 2.7 minutes
  ETA: 29.2 minutes
  Best F1 so far: 0.9025 (Experiment #1)

Grid Search - Experiment 2/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 1.00e-05
  num_train_epochs              : 2
  warmup_steps                  : 100
  weight_decay                  : 0.1

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2505,0.281624,0.890019,0.88672,0.869548,0.876986
2,0.2115,0.288856,0.891291,0.889748,0.869301,0.877953


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 2 completed successfully!
  Training time: 2.49 minutes
  Validation F1: 0.8780
  Test F1: 0.9008

Progress Summary:
  Completed: 2/12 (16.7%)
  Elapsed time: 5.3 minutes
  Avg time/experiment: 2.6 minutes
  ETA: 26.3 minutes
  Best F1 so far: 0.9025 (Experiment #1)

Grid Search - Experiment 3/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 1.00e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.01

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2521,0.28187,0.891291,0.888177,0.87094,0.878408
2,0.2063,0.292047,0.895741,0.894681,0.874378,0.883004
3,0.1451,0.347866,0.898919,0.902232,0.874376,0.885641


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 3 completed successfully!
  Training time: 3.46 minutes
  Validation F1: 0.8856
  Test F1: 0.9002

Progress Summary:
  Completed: 3/12 (25.0%)
  Elapsed time: 8.8 minutes
  Avg time/experiment: 2.9 minutes
  ETA: 26.5 minutes
  Best F1 so far: 0.9025 (Experiment #1)

Grid Search - Experiment 4/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 1.00e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.1

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2504,0.282682,0.891926,0.890688,0.869792,0.878609
2,0.2066,0.293318,0.897648,0.897521,0.875852,0.884981
3,0.1451,0.349351,0.904005,0.908724,0.879534,0.891288


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 4 completed successfully!
  Training time: 3.77 minutes
  Validation F1: 0.8913
  Test F1: 0.9033

Progress Summary:
  Completed: 4/12 (33.3%)
  Elapsed time: 12.7 minutes
  Avg time/experiment: 3.2 minutes
  ETA: 25.5 minutes
  Best F1 so far: 0.9033 (Experiment #4)

Grid Search - Experiment 5/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 2.00e-05
  num_train_epochs              : 2
  warmup_steps                  : 100
  weight_decay                  : 0.01

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2342,0.263806,0.900191,0.897979,0.881094,0.888466
2,0.1756,0.307024,0.902098,0.902454,0.880929,0.890034


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 5 completed successfully!
  Training time: 2.44 minutes
  Validation F1: 0.8900
  Test F1: 0.9056

Progress Summary:
  Completed: 5/12 (41.7%)
  Elapsed time: 15.3 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 21.4 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 6/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 2.00e-05
  num_train_epochs              : 2
  warmup_steps                  : 100
  weight_decay                  : 0.1

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.231,0.261365,0.902098,0.899574,0.883797,0.890748
2,0.175,0.301119,0.902734,0.90297,0.88183,0.8908


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 6 completed successfully!
  Training time: 2.26 minutes
  Validation F1: 0.8908
  Test F1: 0.9031

Progress Summary:
  Completed: 6/12 (50.0%)
  Elapsed time: 17.7 minutes
  Avg time/experiment: 2.9 minutes
  ETA: 17.7 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 7/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 2.00e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.01

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2299,0.257676,0.902734,0.900493,0.884288,0.891408
2,0.1891,0.301953,0.904641,0.906766,0.882484,0.892584
3,0.0897,0.400258,0.907819,0.908847,0.887398,0.89651


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 7 completed successfully!
  Training time: 3.60 minutes
  Validation F1: 0.8965
  Test F1: 0.8991

Progress Summary:
  Completed: 7/12 (58.3%)
  Elapsed time: 21.4 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 15.3 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 8/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 2.00e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.1

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2337,0.258934,0.900826,0.89851,0.881995,0.889228
2,0.1822,0.315224,0.903369,0.90982,0.877404,0.890187
3,0.0957,0.394921,0.904641,0.904096,0.884942,0.893192


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 8 completed successfully!
  Training time: 3.45 minutes
  Validation F1: 0.8932
  Test F1: 0.9000

Progress Summary:
  Completed: 8/12 (66.7%)
  Elapsed time: 25.0 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 12.5 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 9/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.00e-05
  num_train_epochs              : 2
  warmup_steps                  : 100
  weight_decay                  : 0.01

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2262,0.249565,0.909727,0.910379,0.890101,0.898794
2,0.1608,0.309011,0.909727,0.907871,0.892559,0.899353


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 9 completed successfully!
  Training time: 2.49 minutes
  Validation F1: 0.8994
  Test F1: 0.9023

Progress Summary:
  Completed: 9/12 (75.0%)
  Elapsed time: 27.6 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 9.2 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 10/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.00e-05
  num_train_epochs              : 2
  warmup_steps                  : 100
  weight_decay                  : 0.1

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2253,0.249855,0.908455,0.909807,0.887889,0.897175
2,0.1601,0.305235,0.910362,0.910028,0.891821,0.89974


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 10 completed successfully!
  Training time: 2.51 minutes
  Validation F1: 0.8997
  Test F1: 0.9033

Progress Summary:
  Completed: 10/12 (83.3%)
  Elapsed time: 30.2 minutes
  Avg time/experiment: 3.0 minutes
  ETA: 6.0 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 11/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.00e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.01

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2263,0.246942,0.904641,0.904954,0.884123,0.892992
2,0.1671,0.307121,0.902734,0.90774,0.877732,0.889739
3,0.0763,0.389342,0.909091,0.907746,0.891249,0.898505


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 11 completed successfully!
  Training time: 3.46 minutes
  Validation F1: 0.8985
  Test F1: 0.8989

Progress Summary:
  Completed: 11/12 (91.7%)
  Elapsed time: 33.8 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 3.1 minutes
  Best F1 so far: 0.9056 (Experiment #5)

Grid Search - Experiment 12/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.00e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.1

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2292,0.246524,0.908455,0.90682,0.890757,0.897842
2,0.1727,0.306353,0.908455,0.912683,0.885431,0.896581
3,0.0858,0.393028,0.914812,0.915787,0.896079,0.904585


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 12 completed successfully!
  Training time: 3.70 minutes
  Validation F1: 0.9046
  Test F1: 0.8997

Progress Summary:
  Completed: 12/12 (100.0%)
  Elapsed time: 37.6 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 0.0 minutes
  Best F1 so far: 0.9056 (Experiment #5)

GRID SEARCH COMPLETED
Total time: 0.63 hours (37.62 minutes)
Average time per experiment: 3.14 minutes
Success rate: 12/12


PHASE 2: RANDOM SEARCH

STARTING RANDOM SEARCH

Configuration:
  Random samples: 12
  Estimated time: 24-36 minutes

Proceed with 12 random experiments? (yes/no): yes

Random Search - Experiment 1/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 2.50e-05
  num_train_epochs              : 2
  warmup_steps                  : 64
  weight_decay                  : 0.0831993941811405

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2232,0.255723,0.909727,0.907871,0.892559,0.899353
2,0.1635,0.309134,0.907819,0.907115,0.889037,0.896896


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 1 completed successfully!
  Training time: 2.49 minutes
  Validation F1: 0.8994
  Test F1: 0.8950

Progress Summary:
  Completed: 1/12 (8.3%)
  Elapsed time: 2.6 minutes
  Avg time/experiment: 2.6 minutes
  ETA: 28.7 minutes
  Best F1 so far: 0.8950 (Experiment #1)

Random Search - Experiment 2/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.39e-05
  num_train_epochs              : 2
  warmup_steps                  : 132
  weight_decay                  : 0.019997491581800288

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2268,0.248635,0.905277,0.908222,0.882565,0.893146
2,0.1588,0.306978,0.91227,0.911168,0.894934,0.902099


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 2 completed successfully!
  Training time: 2.33 minutes
  Validation F1: 0.9021
  Test F1: 0.9035

Progress Summary:
  Completed: 2/12 (16.7%)
  Elapsed time: 5.1 minutes
  Avg time/experiment: 2.5 minutes
  ETA: 25.3 minutes
  Best F1 so far: 0.9035 (Experiment #2)

Random Search - Experiment 3/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 2.84e-05
  num_train_epochs              : 2
  warmup_steps                  : 149
  weight_decay                  : 0.02428668179219408

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2204,0.249487,0.907184,0.90745,0.887317,0.895943
2,0.1621,0.300291,0.91227,0.910764,0.895343,0.902188


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 3 completed successfully!
  Training time: 2.50 minutes
  Validation F1: 0.9022
  Test F1: 0.9024

Progress Summary:
  Completed: 3/12 (25.0%)
  Elapsed time: 7.7 minutes
  Avg time/experiment: 2.6 minutes
  ETA: 23.0 minutes
  Best F1 so far: 0.9035 (Experiment #2)

Random Search - Experiment 4/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.60e-05
  num_train_epochs              : 2
  warmup_steps                  : 51
  weight_decay                  : 0.08219987722668247

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2199,0.24899,0.905277,0.908222,0.882565,0.893146
2,0.1445,0.317968,0.907184,0.906179,0.888546,0.896233


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 4 completed successfully!
  Training time: 2.59 minutes
  Validation F1: 0.8962
  Test F1: 0.9024

Progress Summary:
  Completed: 4/12 (33.3%)
  Elapsed time: 10.4 minutes
  Avg time/experiment: 2.6 minutes
  ETA: 20.8 minutes
  Best F1 so far: 0.9035 (Experiment #2)

Random Search - Experiment 5/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 4.75e-05
  num_train_epochs              : 3
  warmup_steps                  : 113
  weight_decay                  : 0.10922115592912175

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.249,0.252972,0.898284,0.900759,0.874294,0.88509
2,0.18,0.334439,0.902734,0.913994,0.873225,0.888501
3,0.0787,0.393106,0.907184,0.90745,0.887317,0.895943


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 5 completed successfully!
  Training time: 3.68 minutes
  Validation F1: 0.8959
  Test F1: 0.9080

Progress Summary:
  Completed: 5/12 (41.7%)
  Elapsed time: 14.2 minutes
  Avg time/experiment: 2.8 minutes
  ETA: 19.9 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 6/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.47e-05
  num_train_epochs              : 3
  warmup_steps                  : 71
  weight_decay                  : 0.01070663052197174

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.225,0.246222,0.908455,0.907635,0.889938,0.897654
2,0.1655,0.314494,0.905912,0.911761,0.881008,0.89329
3,0.0797,0.406448,0.914812,0.916695,0.895259,0.904406


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 6 completed successfully!
  Training time: 3.68 minutes
  Validation F1: 0.9044
  Test F1: 0.8959

Progress Summary:
  Completed: 6/12 (50.0%)
  Elapsed time: 18.0 minutes
  Avg time/experiment: 3.0 minutes
  ETA: 18.0 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 7/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 1.09e-05
  num_train_epochs              : 2
  warmup_steps                  : 108
  weight_decay                  : 0.04998609717152555

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2495,0.281213,0.893198,0.891746,0.871594,0.880151
2,0.2041,0.287007,0.895741,0.895527,0.873559,0.882783


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 7 completed successfully!
  Training time: 2.51 minutes
  Validation F1: 0.8828
  Test F1: 0.9016

Progress Summary:
  Completed: 7/12 (58.3%)
  Elapsed time: 20.6 minutes
  Avg time/experiment: 2.9 minutes
  ETA: 14.7 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 8/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 1.19e-05
  num_train_epochs              : 3
  warmup_steps                  : 129
  weight_decay                  : 0.033277134043030426

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2491,0.278334,0.894469,0.892405,0.873806,0.881799
2,0.1991,0.29318,0.899555,0.899512,0.878145,0.887178
3,0.1278,0.36138,0.898919,0.901266,0.875195,0.885864


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 8 completed successfully!
  Training time: 3.58 minutes
  Validation F1: 0.8872
  Test F1: 0.9056

Progress Summary:
  Completed: 8/12 (66.7%)
  Elapsed time: 24.3 minutes
  Avg time/experiment: 3.0 minutes
  ETA: 12.2 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 9/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 1.36e-05
  num_train_epochs              : 3
  warmup_steps                  : 100
  weight_decay                  : 0.04824619912671628

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2419,0.271385,0.898919,0.897317,0.878883,0.886836
2,0.1893,0.307518,0.902098,0.904272,0.87929,0.889613
3,0.1165,0.375356,0.903369,0.903924,0.882321,0.891462


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 9 completed successfully!
  Training time: 3.88 minutes
  Validation F1: 0.8915
  Test F1: 0.9052

Progress Summary:
  Completed: 9/12 (75.0%)
  Elapsed time: 28.3 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 9.4 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 10/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 4.93e-05
  num_train_epochs              : 2
  warmup_steps                  : 52
  weight_decay                  : 0.09599404067363206

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2186,0.250028,0.905912,0.904319,0.887563,0.894908
2,0.1676,0.311447,0.909727,0.906351,0.894198,0.899715


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 10 completed successfully!
  Training time: 2.48 minutes
  Validation F1: 0.8997
  Test F1: 0.9075

Progress Summary:
  Completed: 10/12 (83.3%)
  Elapsed time: 31.0 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 6.2 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 11/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 3.72e-05
  num_train_epochs              : 2
  warmup_steps                  : 88
  weight_decay                  : 0.011326496115986653

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2249,0.246046,0.904005,0.909242,0.879124,0.891181
2,0.1581,0.305885,0.910998,0.908923,0.894361,0.900861


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 11 completed successfully!
  Training time: 2.40 minutes
  Validation F1: 0.9009
  Test F1: 0.8988

Progress Summary:
  Completed: 11/12 (91.7%)
  Elapsed time: 33.5 minutes
  Avg time/experiment: 3.0 minutes
  ETA: 3.0 minutes
  Best F1 so far: 0.9080 (Experiment #5)

Random Search - Experiment 12/12
Hyperparameters:
  per_device_train_batch_size   : 16 (Fixed)
  learning_rate                 : 4.77e-05
  num_train_epochs              : 3
  warmup_steps                  : 58
  weight_decay                  : 0.01159662522202142

Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing datasets...


Map:   0%|          | 0/7360 [00:00<?, ? examples/s]

Map:   0%|          | 0/1573 [00:00<?, ? examples/s]

Map:   0%|          | 0/1577 [00:00<?, ? examples/s]


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2431,0.248813,0.900191,0.899188,0.879865,0.888155
2,0.174,0.308973,0.900191,0.903237,0.876177,0.88719
3,0.0713,0.408957,0.911634,0.912353,0.892394,0.900978


Evaluating on validation set...


Evaluating on test set...

✓ Experiment 12 completed successfully!
  Training time: 3.67 minutes
  Validation F1: 0.9010
  Test F1: 0.9048

Progress Summary:
  Completed: 12/12 (100.0%)
  Elapsed time: 37.3 minutes
  Avg time/experiment: 3.1 minutes
  ETA: 0.0 minutes
  Best F1 so far: 0.9080 (Experiment #5)

RANDOM SEARCH COMPLETED
Total time: 0.62 hours (37.26 minutes)
Average time per experiment: 3.10 minutes
Success rate: 12/12


SECTION 10: SAVING FINAL RESULTS

✓ Results saved:
  - hyperparameter_optimization_results_FINAL.xlsx
  - hyperparameter_optimization_results_FINAL.csv
  - grid_search_results_intermediate.xlsx
  - random_search_results_intermediate.xlsx

✓ ALL EXPERIMENTS COMPLETED!

Total experiments: 24
Total time: 1.25 hours


This cell finalizes the hyperparameter optimization process by first combining all results from both the grid_results and random_results lists into a single list (all_results), which is then converted into a Pandas DataFrame (results_df). This DataFrame contains the performance metrics and parameters of every single experiment conducted. The cell concludes by displaying the total number of experiments conducted and the combined total time taken for both search methodologies.