In [1]:
!pip install transformers datasets accelerate optuna -U



In [2]:
# This script performs Random Search for hyperparameter optimization
# on a clickbait detection dataset using DistilBERT multilingual model.
# NOTE: This process will involve multiple full training runs and may take
# several hours to complete on Google Colab.

# 1. SETUP AND INSTALLATION
# Run this command first in your Colab notebook:
# !pip install transformers datasets accelerate ray[tune] optuna -U

import torch
import os
import numpy as np
import pandas as pd
from datasets import Dataset
from transformers import (
    DistilBertForSequenceClassification,
    DistilBertTokenizerFast,
    TrainingArguments,
    Trainer,
    set_seed
)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from google.colab import files, drive

# Set a consistent seed for reproducibility across runs
set_seed(42)

# Ensure GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU.")

# Mount Google Drive to save results
drive.mount('/content/drive')

# --- 2. DATA PREPARATION ---

print("\n--- Loading Dataset ---")
# Upload the CSV file
uploaded = files.upload()

# Load the clickbait dataset
file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)

print(f"\nDataset shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(df.head())

# Check label distribution
print(f"\nLabel Distribution:")
print(df['clickbait'].value_counts())

# Limit data size to make Grid Search feasible
# Using 3000 training samples and 600 evaluation samples
from sklearn.model_selection import train_test_split

train_df, eval_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df['clickbait']
)

# Further limit for faster experiments
train_df = train_df.sample(n=min(3000, len(train_df)), random_state=42)
eval_df = eval_df.sample(n=min(600, len(eval_df)), random_state=42)

print(f"\nTrain size: {len(train_df)}")
print(f"Eval size: {len(eval_df)}")

# Convert to Hugging Face Dataset format
train_dataset = Dataset.from_pandas(train_df[['headline', 'clickbait']].reset_index(drop=True))
eval_dataset = Dataset.from_pandas(eval_df[['headline', 'clickbait']].reset_index(drop=True))

# Rename 'clickbait' to 'labels' for compatibility
train_dataset = train_dataset.rename_column("clickbait", "labels")
eval_dataset = eval_dataset.rename_column("clickbait", "labels")

# Initialize Tokenizer
MODEL_NAME = "distilbert-base-multilingual-cased"
tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["headline"], truncation=True, padding=True, max_length=128)

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

# Set format to PyTorch tensors
tokenized_train.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])
tokenized_eval.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])

print("\n--- Data Preparation Complete ---")

# --- 3. MODEL, METRICS, AND HYPERPARAMETER DEFINITION ---

# Function to initialize a fresh model for each grid search run
def model_init():
    return DistilBertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2).to(device)

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    precision = precision_score(p.label_ids, preds, average="binary", zero_division=0)
    recall = recall_score(p.label_ids, preds, average="binary", zero_division=0)
    f1 = f1_score(p.label_ids, preds, average="binary", zero_division=0)
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# --- HYPERPARAMETER RANDOM SEARCH DEFINITION ---
def tune_hp(trial):
    """
    This function defines the hyperparameter space for RANDOM SEARCH.
    Random Search samples randomly from continuous and categorical distributions.

    Based on your assigned hyperparameters:
    - num_train_epochs: Random integer between 2-5
    - per_device_train_batch_size: Random choice from [16, 32, 64]
    - weight_decay: Random float between 0.01-0.1

    This allows more flexibility than Grid Search!
    """

    # 1. Number of training epochs (random integer)
    num_train_epochs = trial.suggest_int("num_train_epochs", 2, 5)

    # 2. Batch Size (random categorical)
    per_device_train_batch_size = trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64])

    # 3. Weight Decay (random float - continuous)
    weight_decay = trial.suggest_float("weight_decay", 0.01, 0.1)

    return {
        "num_train_epochs": num_train_epochs,
        "per_device_train_batch_size": per_device_train_batch_size,
        "weight_decay": weight_decay,
        "learning_rate": 2e-5,  # Fixed learning rate
    }


# --- 4. TRAINING ARGUMENTS (Fixed for all runs) ---
training_args = TrainingArguments(
    output_dir="./random_search_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    fp16=torch.cuda.is_available(),
    report_to="none",
    warmup_steps=100,
    logging_steps=50,
    seed=42,
)

# Initialize the Trainer
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

# --- 5. EXECUTION OF RANDOM SEARCH ---
print("\n" + "="*70)
print("STARTING RANDOM SEARCH")
print("="*70)
print("\nOptimizing for 'f1' score...")
print("Total random trials: 10")
print("Random Search samples randomly from the hyperparameter space.\n")

import time
start_time = time.time()

best_trial = trainer.hyperparameter_search(
    backend="optuna",
    hp_space=tune_hp,
    direction="maximize",
    n_trials=10,
)

end_time = time.time()
duration = end_time - start_time

print("\n" + "="*70)
print("RANDOM SEARCH COMPLETE")
print("="*70)
print(f"\nTotal time taken: {duration:.2f} seconds ({duration/60:.2f} minutes)")

# --- 6. RESULTS ---
print("\n--- BEST HYPERPARAMETERS FOUND ---")

if best_trial:
    print(f"\nBest Trial Object: {best_trial}")

    best_hps = best_trial.hyperparameters
    best_f1 = best_trial.objective

    print("\n📊 Best Hyperparameters:")
    for key, value in best_hps.items():
        print(f"  {key}: {value}")

    print(f"\n🎯 Best F1 Score: {best_f1:.4f}")

    # Save results to Excel
    results_data = {
        "Hyperparameter": list(best_hps.keys()),
        "Value": list(best_hps.values())
    }
    results_df = pd.DataFrame(results_data)
    results_df.loc[len(results_df)] = ["Best F1 Score", best_f1]
    results_df.loc[len(results_df)] = ["Total Time (minutes)", duration/60]

    excel_path = "/content/drive/MyDrive/random_search_results.xlsx"
    results_df.to_excel(excel_path, index=False)

    print(f"\n✅ Results saved to: {excel_path}")
else:
    print("❌ Search failed or no best trial found.")

print("\n" + "="*70)
print("To train final model with best hyperparameters:")
print("Use the best_hps dictionary shown above in a new TrainingArguments.")
print("="*70)

Using GPU: Tesla T4
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

--- Loading Dataset ---


Saving clickbait_data.csv to clickbait_data (2).csv

Dataset shape: (32000, 2)
Columns: ['headline', 'clickbait']
                                            headline  clickbait
0                                 Should I Get Bings          1
1      Which TV Female Friend Group Do You Belong In          1
2  The New "Star Wars: The Force Awakens" Trailer...          1
3  This Vine Of New York On "Celebrity Big Brothe...          1
4  A Couple Did A Stunning Photo Shoot With Their...          1

Label Distribution:
clickbait
0    16001
1    15999
Name: count, dtype: int64

Train size: 3000
Eval size: 600


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]


--- Data Preparation Complete ---


  trainer = Trainer(
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-11-08 15:28:11,486] A new study created in memory with name: no-name-a199b853-544e-4340-ad23-effc10263ada



STARTING RANDOM SEARCH

Optimizing for 'f1' score...
Total random trials: 10
Random Search samples randomly from the hyperparameter space.



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.36381,0.946667,0.924765,0.973597,0.948553
2,0.587200,0.056655,0.983333,0.989967,0.976898,0.983389
3,0.113500,0.057931,0.985,0.99,0.980198,0.985075


[I 2025-11-08 15:29:14,199] Trial 0 finished with value: 3.9402726466676516 and parameters: {'num_train_epochs': 3, 'per_device_train_batch_size': 64, 'weight_decay': 0.04629549006389165}. Best is trial 0 with value: 3.9402726466676516.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.363797,0.946667,0.924765,0.973597,0.948553
2,0.587200,0.056596,0.983333,0.989967,0.976898,0.983389
3,0.113500,0.054319,0.985,0.99,0.980198,0.985075
4,0.028200,0.054396,0.985,0.99,0.980198,0.985075


[I 2025-11-08 15:30:53,133] Trial 1 finished with value: 3.9402726466676516 and parameters: {'num_train_epochs': 4, 'per_device_train_batch_size': 64, 'weight_decay': 0.031246550560741798}. Best is trial 0 with value: 3.9402726466676516.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.0606,0.042797,0.99,0.996656,0.983498,0.990033
2,0.0288,0.063405,0.988333,0.996644,0.980198,0.988353
3,0.0012,0.04548,0.991667,0.996667,0.986799,0.991708


[I 2025-11-08 15:32:19,431] Trial 2 finished with value: 3.9668401392378043 and parameters: {'num_train_epochs': 3, 'per_device_train_batch_size': 16, 'weight_decay': 0.07445647733106477}. Best is trial 2 with value: 3.9668401392378043.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.363775,0.946667,0.924765,0.973597,0.948553
2,0.587200,0.06089,0.983333,0.989967,0.976898,0.983389
3,0.113100,0.050699,0.985,0.99,0.980198,0.985075
4,0.028500,0.051614,0.986667,0.993311,0.980198,0.986711


[I 2025-11-08 15:33:46,588] Trial 3 finished with value: 3.946886686713094 and parameters: {'num_train_epochs': 4, 'per_device_train_batch_size': 64, 'weight_decay': 0.013086416480385783}. Best is trial 2 with value: 3.9668401392378043.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5926,0.10817,0.965,0.946203,0.986799,0.966074
2,0.0515,0.064138,0.986667,0.996633,0.976898,0.986667
3,0.0211,0.041627,0.986667,0.990033,0.983498,0.986755


[I 2025-11-08 15:35:10,314] Trial 4 finished with value: 3.9469532059804298 and parameters: {'num_train_epochs': 3, 'per_device_train_batch_size': 32, 'weight_decay': 0.0877512281305692}. Best is trial 2 with value: 3.9668401392378043.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.363892,0.946667,0.924765,0.973597,0.948553
2,0.587200,0.060852,0.983333,0.989967,0.976898,0.983389
3,0.113100,0.056112,0.985,0.99,0.980198,0.985075


[I 2025-11-08 15:36:18,921] Trial 5 finished with value: 3.9402726466676516 and parameters: {'num_train_epochs': 3, 'per_device_train_batch_size': 64, 'weight_decay': 0.030766522306489665}. Best is trial 2 with value: 3.9668401392378043.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.0605,0.044879,0.988333,0.996644,0.980198,0.988353
2,0.0258,0.052758,0.988333,0.993333,0.983498,0.988391
3,0.0043,0.047872,0.991667,0.996667,0.986799,0.991708
4,0.0005,0.043502,0.991667,0.996667,0.986799,0.991708
5,0.0004,0.061108,0.99,0.996656,0.983498,0.990033


[I 2025-11-08 15:39:31,480] Trial 6 finished with value: 3.9601870908209946 and parameters: {'num_train_epochs': 5, 'per_device_train_batch_size': 16, 'weight_decay': 0.032521659485126087}. Best is trial 2 with value: 3.9668401392378043.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.0606,0.044163,0.99,0.996656,0.983498,0.990033
2,0.0335,0.056142,0.99,0.996656,0.983498,0.990033
3,0.0009,0.041079,0.991667,0.993377,0.990099,0.991736
4,0.0007,0.054677,0.99,0.996656,0.983498,0.990033


[I 2025-11-08 15:41:40,693] Trial 7 finished with value: 3.9601870908209946 and parameters: {'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'weight_decay': 0.07934836724101123}. Best is trial 2 with value: 3.9668401392378043.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5926,0.108165,0.965,0.946203,0.986799,0.966074


[I 2025-11-08 15:41:48,773] Trial 8 pruned. 
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5926,0.10836,0.965,0.946203,0.986799,0.966074


[I 2025-11-08 15:41:56,605] Trial 9 pruned. 



RANDOM SEARCH COMPLETE

Total time taken: 825.20 seconds (13.75 minutes)

--- BEST HYPERPARAMETERS FOUND ---

Best Trial Object: BestRun(run_id='2', objective=3.9668401392378043, hyperparameters={'num_train_epochs': 3, 'per_device_train_batch_size': 16, 'weight_decay': 0.07445647733106477}, run_summary=None)

📊 Best Hyperparameters:
  num_train_epochs: 3
  per_device_train_batch_size: 16
  weight_decay: 0.07445647733106477

🎯 Best F1 Score: 3.9668

✅ Results saved to: /content/drive/MyDrive/random_search_results.xlsx

To train final model with best hyperparameters:
Use the best_hps dictionary shown above in a new TrainingArguments.
