## Hyperparameter Tuning

# DeBERTa Hyperparameter Tuning â€“ Cybersecurity News Classification

## Objective
Tune key training hyperparameters for a fine-tuned DeBERTa model to improve classification performance on cybersecurity threat categories.

## What this notebook covers
- Hyperparameters explored (learning rate, batch size, epochs, weight decay)
- Training runs and comparison logic
- Metric tracking (F1, precision/recall)
- Best-configuration selection

## Output
A selected hyperparameter configuration with performance results and rationale.

In [None]:
import torch
from transformers import AutoTokenizer, TrainingArguments, Trainer, DebertaV2ForSequenceClassification, AutoConfig
from datasets import DatasetDict
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils.class_weight import compute_class_weight
import torch.nn as nn
import torch.nn.functional as F
import json

In [None]:
# Load in tokenized datasets
tokenized = DatasetDict.load_from_disk('../processed/tokenized_DeBERTa_ds')

train_tokenized = tokenized['train']
val_tokenized = tokenized['validation']
test_tokenized = tokenized['test']

In [None]:
# Load tokenizer
MODEL_NAME = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [None]:
# Reload label mappings
with open('../processed/label2id.json') as f:
    label2id = json.load(f)
with open('../processed/id2label.json') as f:
    id2label = json.load(f)
num_labels = len(label2id)

In [None]:
# Create focal loss class for better classification for imbalanced classes
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, weight=None):
        super().__init__()
        self.gamma = gamma
        self.weight = weight

    def forward(self, logits, targets):
        ce_loss = F.cross_entropy(logits, targets, reduction='none', weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()

class DebertaForFocalLoss(DebertaV2ForSequenceClassification):
    def __init__(self, config, gamma=2.0, class_weights=None):
        super().__init__(config)
        self.focal = FocalLoss(gamma=gamma, weight=class_weights)

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        labels=None,
        **kwargs
    ):
        # Call DeBERTa forward WITHOUT passing labels
        outputs = super().forward(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            labels=None,   # disable internal CE-loss
            **kwargs
        )

        logits = outputs.logits
        loss = None

        if labels is not None:
            loss = self.focal(logits, labels)

        # HuggingFace Trainer needs outputs.loss
        return {"loss": loss, "logits": logits}

In [None]:
# Create evaluation metrics function for trainer
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    preds = np.argmax(logits, axis=1)

    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='weighted', zero_division=0
    )

    return {
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'accuracy': acc
    }

In [None]:
# Create and add class weights
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'


y_train = np.array(train_tokenized['labels'], dtype=int)

classes = np.unique(y_train)

class_weights_np = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
print('Class weights:', class_weights_np)

class_weights = torch.tensor(class_weights_np, dtype=torch.float).to(DEVICE)

In [None]:
# Create function to reinitialize a new model on each hyperparam tune
def model_init():
    # Load config
    config = AutoConfig.from_pretrained(
        "microsoft/deberta-v3-base",
        num_labels=num_labels,
        id2label=id2label,
        label2id=label2id
    )

    # Recreate model with custom class
    model = DebertaForFocalLoss.from_pretrained(
        MODEL_NAME,
        config=config,
        class_weights=class_weights,
        gamma=2.0
    )
    return model


In [None]:
# Create baseline training arguments for tuning
base_args = TrainingArguments(
    output_dir='../models/tuning_outputs',
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1',

    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,

    logging_steps=50,
    logging_strategy='steps',

    fp16=False,
    bf16=True
)

In [None]:
# Build trainer
trainer = Trainer(
    model_init=model_init,
    args=base_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [None]:
# Hyperparam tuning grid
def hp_grid(trial):
    return {
        'learning_rate': trial.suggest_categorical('learning_rate', [1e-5, 2e-5, 3e-5]),
        'num_train_epochs': trial.suggest_categorical('num_train_epochs', [3, 4, 5]),
        'warmup_ratio': trial.suggest_categorical('warmup_ratio', [0.0, 0.1]),
        'weight_decay': trial.suggest_categorical('weight_decay', [0.01, 0.05])
    }

In [None]:
# Run tuner
best_run = trainer.hyperparameter_search(
    direction='maximize',
    hp_space=hp_grid,
    n_trials=12
)
best_run

## Evaluation

In [None]:
# Best hyperparameters
# {'learning_rate': 1e-05,
#  'num_train_epochs': 4,
#  'warmup_ratio': 0.0,
#  'weight_decay': 0.05}
best_run.hyperparameters

In [None]:
# Assign variables for best params
best_lr = best_run.hyperparameters['learning_rate']
best_epochs = best_run.hyperparameters['num_train_epochs']
best_decay = best_run.hyperparameters['weight_decay']
best_warmup = best_run.hyperparameters['warmup_ratio']

In [None]:
# build train arguments with tuned parameters
final_args = TrainingArguments(
    output_dir="../models/DeBERTa_tuned",
    learning_rate=best_lr,
    num_train_epochs=best_epochs,
    weight_decay=best_decay,
    warmup_ratio=best_warmup,

    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",

    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,

    logging_steps=50,
    fp16=False,
    bf16=True
)


In [None]:
# Train and save final model
final_trainer = Trainer(
    model_init=model_init,
    args=final_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

final_trainer.train()
final_trainer.save_model("../models/DeBERTa_tuned")

In [None]:
# simple eval metrics
final_trainer.evaluate()

In [None]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import concatenate_datasets
import torch

In [None]:
# Cyber punk theme for charts
plt.style.use('default')

# Global dictionary
plt.rcParams.update({
    # Canvas and axes
    'figure.facecolor': 'black',
    'axes.facecolor': '#0d0d0d',
    'axes.edgecolor': 'white',
    'axes.labelcolor': 'white',
    'axes.titlecolor': 'white',

    # Tick appearance
    'xtick.color': 'white',
    'ytick.color': 'white',
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,

    # Grid
    'axes.grid': True,
    'grid.color': '#333333',
    'grid.linestyle': '--',
    'grid.linewidth': 0.4,

    # Line colors
    'axes.prop_cycle': plt.cycler(color=[
        '#ff2e2e',  
        '#00eaff',  
        '#40ffb3',  
        '#ff9f1c', 
        '#d11aff'  
    ]),

    # Text
    'text.color': 'white',

    # Legend
    'legend.facecolor': '#1a1a1a',
    'legend.edgecolor': 'white',
    'legend.fontsize': 10,

    # Lines
    'lines.linewidth': 2.0,
    'lines.markersize': 6,


    'savefig.facecolor': 'black',
    'savefig.edgecolor': 'black',
})


In [None]:
# Merge validation and training set for last test evaluation
full_train = concatenate_datasets([train_tokenized, val_tokenized])

In [None]:
# Check best params
best_run.hyperparameters

In [None]:
# Create production args with tuned params
production_args = TrainingArguments(
    output_dir='../models/DeBERTa_production',
    learning_rate=best_lr,
    num_train_epochs=best_epochs,
    weight_decay=best_decay,
    warmup_ratio=best_warmup,

    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1',

    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,

    logging_steps=50,
    fp16=False,
    bf16=True
)

In [None]:
# Set production trainer params
production_trainer = Trainer(
    model_init=model_init,
    args=production_args,
    train_dataset=full_train,
    eval_dataset=test_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [None]:
# Train prod model
production_trainer.train()
production_trainer.save_model('../models/DeBERTa_production')

In [None]:
# Get predictions for the model
prod_output = production_trainer.predict(test_tokenized)

logits = prod_output.predictions
y_pred = logits.argmax(axis=1)
y_true = prod_output.label_ids

In [None]:
# Print out test metrics
print('Test Accuracy:', accuracy_score(y_true, y_pred))
print('Test F1 - Macro:', f1_score(y_true, y_pred, average='macro'))
print('Test F1 - Weighted:', f1_score(y_true, y_pred, average='weighted'))
print('\nClassification Report\n')
print(classification_report(y_true, y_pred, digits=4))

In [None]:
# Plot test per class confusion matrix
cm = confusion_matrix(y_true, y_pred, normalize='true')

plt.figure(figsize=(8,6))
sns.heatmap(
    cm, annot=True, fmt='.2f', cmap='viridis',
    xticklabels=id2label.values(),
    yticklabels=id2label.values()
)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix: Test set')
plt.show()

In [None]:
# Plot per class eval metrics
precision, recall, f1, support = precision_recall_fscore_support(
    y_true, y_pred, zero_division=0
)

labels = list(id2label.values())
x = np.arange(len(labels))
width = 0.25

plt.figure(figsize=(12,6))
plt.bar(x - width, precision, width, label="Precision")
plt.bar(x, recall, width, label="Recall")
plt.bar(x + width, f1, width, label="F1")

plt.xticks(x, labels)
plt.ylabel("Score")
plt.title("Per Class Precision, Recall, and F1: Test-set")
plt.ylim(0, 1)
plt.legend()
plt.show()
