# LoRA Fine-tuning Benchmark: DistilBERT on SST-2

This notebook benchmarks LoRA adaptation methods for fine-tuning DistilBERT on the SST-2 sentiment classification task.

**Configuration:**
- Base Model: DistilBERT
- Dataset: SST-2 (Stanford Sentiment Treebank)
- Split Ratio: Train:Val:Test = 8:1:1
- LoRA Ranks: [2, 4, 8, 16]
- Training Epochs: 10
- Random Seed: 42

**Metrics Tracked:**
- Final Accuracy
- Running time per epoch
- Total training time
- Time to convergence
- GPU memory used
- Total parameters
- Trainable parameters
- Convergence epoch

In [1]:
# Import necessary libraries
import torch
import numpy as np
import random
import time
import json
import warnings
import os
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score
import gc

# Filter out specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='torch.nn.parallel._functions')

# Force to use only one GPU (GPU 0)
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Helper function to check if this is the main process (for multi-GPU training)
def is_main_process():
    # Check LOCAL_RANK environment variable
    # In distributed training: rank 0 is main process
    # In single process or DataParallel: LOCAL_RANK not set, return True
    local_rank = os.environ.get("LOCAL_RANK", None)
    if local_rank is None:
        return True
    return int(local_rank) == 0

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if is_main_process():
    print(f"Using device: {device}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        print(f"Number of GPUs available: {torch.cuda.device_count()}")
        print(f"Initial GPU Memory: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
GPU: NVIDIA RTX 5000 Ada Generation
Number of GPUs available: 1
Initial GPU Memory: 0.00 MB


In [2]:
# Set random seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)
print("Random seed set to 42")

Random seed set to 42


In [3]:
# Load and prepare SST-2 dataset
print("Loading SST-2 dataset...")
dataset = load_dataset("glue", "sst2")

# Get full training data
train_data = dataset["train"]
total_samples = len(train_data)

# Calculate split sizes (8:1:1)
train_size = int(0.8 * total_samples)
val_size = int(0.1 * total_samples)
test_size = total_samples - train_size - val_size

# Split the dataset
train_dataset = train_data.select(range(train_size))
val_dataset = train_data.select(range(train_size, train_size + val_size))
test_dataset = train_data.select(range(train_size + val_size, total_samples))

print(f"Dataset split complete:")
print(f"  Train: {len(train_dataset)} samples")
print(f"  Validation: {len(val_dataset)} samples")
print(f"  Test: {len(test_dataset)} samples")

Loading SST-2 dataset...
Dataset split complete:
  Train: 53879 samples
  Validation: 6734 samples
  Test: 6736 samples


In [4]:
# Tokenize the dataset
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=128)

print("Tokenizing datasets...")
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

print("Tokenization complete!")

Loading tokenizer...
Tokenizing datasets...
Tokenization complete!


In [5]:
# Define compute metrics function
from sklearn.metrics import f1_score

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="binary")
    }

In [6]:
# Custom Trainer to track metrics per epoch with early stopping
from transformers.trainer_callback import TrainerCallback

class MetricsCallback(TrainerCallback):
    def __init__(self, early_stop_patience=3):
        self.epoch_times = []
        self.epoch_accuracies = []
        self.epoch_f1s = []
        self.epoch_start_time = None
        self.best_f1 = 0.0
        self.best_f1_epoch = None
        self.logged_epochs = set()  # Track which epochs have been logged

        # Early stopping: stop when validation F1 fails to improve over best_f1 for `early_stop_patience` consecutive epochs
        self.early_stop_patience = early_stop_patience
        self.epochs_without_improvement = 0
        self.early_stopped = False

    def on_epoch_begin(self, args, state, control, **kwargs):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, args, state, control, **kwargs):
        epoch_time = time.time() - self.epoch_start_time
        self.epoch_times.append(epoch_time)

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        # Called after evaluation
        if 'eval_accuracy' in metrics and state.epoch > 0:
            accuracy = metrics['eval_accuracy']
            f1 = metrics.get('eval_f1', 0.0)
            current_epoch = int(state.epoch)

            # Only log data once per epoch
            if current_epoch not in self.logged_epochs:
                self.logged_epochs.add(current_epoch)
                self.epoch_accuracies.append(accuracy)
                self.epoch_f1s.append(f1)

                # Update best F1 and reset counter if improved
                if f1 > self.best_f1:
                    self.best_f1 = f1
                    self.best_f1_epoch = current_epoch
                    self.epochs_without_improvement = 0
                else:
                    # F1 did not improve over best_f1
                    self.epochs_without_improvement += 1

                # Trigger early stopping if no improvement for patience epochs
                if self.epochs_without_improvement >= self.early_stop_patience and not self.early_stopped:
                    self.early_stopped = True
                    # Request Trainer to stop training after this evaluation
                    control.should_training_stop = True
                    if is_main_process():
                        print(f"\n[Early Stopping] No improvement over best F1 for {self.epochs_without_improvement} consecutive epochs. Stopping training at epoch {current_epoch}.")
                        print(f"[Early Stopping Info] Best F1: {self.best_f1:.4f} at epoch {self.best_f1_epoch}\n")

In [7]:
# Function to train and evaluate LoRA model with a specific rank
def train_lora_model(rank, epochs=30, resume_from_checkpoint=False):
    print(f"\n{'='*80}")
    print(f"Training LoRA model with rank = {rank}")
    print(f"{'='*80}\n")
    
    # Reset seed for each run
    set_seed(42)
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
    
    # Check for existing checkpoint
    output_dir = f"./results_rank_{rank}"
    checkpoint_dir = None
    if resume_from_checkpoint and os.path.exists(output_dir):
        # Find the latest checkpoint
        checkpoints = [d for d in os.listdir(output_dir) if d.startswith("checkpoint-")]
        if checkpoints:
            # Sort by epoch number (checkpoint-XXXX)
            checkpoints.sort(key=lambda x: int(x.split("-")[1]))
            checkpoint_dir = os.path.join(output_dir, checkpoints[-1])
            print(f"Found existing checkpoint: {checkpoint_dir}")
            print(f"Resuming training from this checkpoint...\n")
    
    # Load base model
    print("Loading base DistilBERT model...")
    base_model = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",
        num_labels=2
    )
    
    # Configure LoRA
    lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=rank,
        lora_alpha=rank * 2,  # Common practice: alpha = 2 * r
        lora_dropout=0.1,
        target_modules=["q_lin", "v_lin"],  # DistilBERT attention modules
    )
    
    # Apply LoRA to the model
    model = get_peft_model(base_model, lora_config)
    
    # Print model parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nModel Parameters:")
    print(f"  Total parameters: {total_params:,}")
    print(f"  Trainable parameters: {trainable_params:,}")
    print(f"  Trainable %: {100 * trainable_params / total_params:.2f}%\n")
    
    # Move model to device
    model.to(device)

    # Record GPU memory before training (print once using is_main_process)
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
        initial_memory = torch.cuda.memory_allocated() / 1024**2
        if is_main_process():
            print(f"GPU Memory before training: {initial_memory:.2f} MB")

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=epochs,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=5e-4,
        weight_decay=0.01,
        logging_steps=100,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        save_total_limit=2,  # Keep 2 checkpoints to prevent data loss
        seed=42,
        report_to="none",
        disable_tqdm=False,
    )

    # Create metrics callback
    metrics_callback = MetricsCallback()

    # Create trainer with callback
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        compute_metrics=compute_metrics,
        callbacks=[metrics_callback],
    )

    # Print starting message
    if is_main_process():
        if checkpoint_dir:
            print(f"Resuming training from checkpoint...")
        else:
            print("Starting training from scratch...")

    start_time = time.time()
    # Resume from checkpoint if available
    trainer.train(resume_from_checkpoint=checkpoint_dir if checkpoint_dir else None)
    total_training_time = time.time() - start_time

    # Get peak GPU memory
    if torch.cuda.is_available():
        peak_memory = torch.cuda.max_memory_allocated() / 1024**2
        print(f"\nPeak GPU Memory during training: {peak_memory:.2f} MB")
    else:
        peak_memory = 0

    # Evaluate on test set
    print("\nEvaluating on test set...")
    test_results = trainer.evaluate(tokenized_test)
    test_accuracy = test_results['eval_accuracy']
    test_f1 = test_results.get('eval_f1', 0.0)

    # Calculate time to convergence / time to early stop
    # When early stopped, the training has already stopped, so use total_training_time
    time_to_convergence = total_training_time

    # Get final validation accuracy and F1
    if len(metrics_callback.epoch_accuracies) > 0:
        final_val_accuracy = metrics_callback.epoch_accuracies[-1]
        final_val_f1 = metrics_callback.epoch_f1s[-1]
    else:
        # If epoch_accuracies is empty, evaluate manually
        val_results = trainer.evaluate(tokenized_val)
        final_val_accuracy = val_results['eval_accuracy']
        final_val_f1 = val_results.get('eval_f1', 0.0)

    # Compile results
    results = {
        "rank": rank,
        "final_test_accuracy": test_accuracy,
        "final_test_f1": test_f1,
        "final_val_accuracy": final_val_accuracy,
        "final_val_f1": final_val_f1,
        "total_parameters": total_params,
        "trainable_parameters": trainable_params,
        "trainable_percentage": 100 * trainable_params / total_params,
        "total_training_time": total_training_time,
        "average_epoch_time": np.mean(metrics_callback.epoch_times) if len(metrics_callback.epoch_times) > 0 else 0,
        "epoch_times": metrics_callback.epoch_times,
        "epoch_accuracies": metrics_callback.epoch_accuracies,
        "epoch_f1s": metrics_callback.epoch_f1s,
        "peak_gpu_memory_mb": peak_memory,
        "early_stopped": metrics_callback.early_stopped,
        "best_f1_epoch": metrics_callback.best_f1_epoch,
        "best_val_f1": metrics_callback.best_f1,
    }

    print(f"\n{'='*80}")
    print(f"Results for rank = {rank}:")
    print(f"  Test Accuracy: {test_accuracy:.4f}")
    print(f"  Test F1: {test_f1:.4f}")
    print(f"  Validation Accuracy: {final_val_accuracy:.4f}")
    print(f"  Validation F1: {final_val_f1:.4f}")
    print(f"  Total Training Time: {total_training_time:.2f}s")
    print(f"  Average Epoch Time: {results['average_epoch_time']:.2f}s")
    print(f"  Best F1 Epoch: {metrics_callback.best_f1_epoch}")
    print(f"  Best Val F1: {metrics_callback.best_f1:.4f}")
    print(f"  Early Stopped: {metrics_callback.early_stopped}")
    if metrics_callback.early_stopped:
        print(f"  Stop Info: epochs_without_improvement={metrics_callback.epochs_without_improvement}")
    print(f"  Peak GPU Memory: {peak_memory:.2f} MB")
    print(f"{'='*80}\n")

    # Clean up
    del model
    del trainer
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

    return results

In [8]:
# Run benchmarks for all ranks
ranks_to_test = [2, 4, 8, 16]
all_results = []

print("Starting LoRA benchmark experiments...")
print(f"Testing ranks: {ranks_to_test}")
print(f"Training epochs: 30")
print(f"Random seed: 42\n")

for rank in ranks_to_test:
    # Set resume_from_checkpoint=True to enable automatic resume if needed
    results = train_lora_model(rank, epochs=30)
    all_results.append(results)
    
    # Save individual result
    with open(f"lora_rank_{rank}_results.json", "w") as f:
        json.dump(results, f, indent=2)

print("\n" + "="*80)
print("All experiments completed!")
print("="*80)

Starting LoRA benchmark experiments...
Testing ranks: [2, 4, 8, 16]
Training epochs: 30
Random seed: 42


Training LoRA model with rank = 2

Loading base DistilBERT model...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Model Parameters:
  Total parameters: 67,584,004
  Trainable parameters: 628,994
  Trainable %: 0.93%

GPU Memory before training: 258.90 MB
Starting training from scratch...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2531,0.271836,0.901099,0.910843
2,0.2008,0.221725,0.916394,0.924723
3,0.1804,0.245791,0.916246,0.924275
4,0.1591,0.197385,0.925601,0.932959
5,0.1637,0.206367,0.92768,0.934393
6,0.1248,0.239759,0.931838,0.938414
7,0.1503,0.223102,0.932581,0.938365
8,0.1484,0.226806,0.933472,0.940314
9,0.1334,0.241443,0.932729,0.939186
10,0.1103,0.238845,0.936887,0.942991



[Early Stopping] No improvement over best F1 for 3 consecutive epochs. Stopping training at epoch 14.
[Early Stopping Info] Best F1: 0.9438 at epoch 11


Peak GPU Memory during training: 794.18 MB

Evaluating on test set...



Results for rank = 2:
  Test Accuracy: 0.9418
  Test F1: 0.9480
  Validation Accuracy: 0.9384
  Validation F1: 0.9435
  Total Training Time: 1462.70s
  Average Epoch Time: 99.09s
  Best F1 Epoch: 11
  Best Val F1: 0.9438
  Early Stopped: True
  Stop Info: epochs_without_improvement=3
  Peak GPU Memory: 794.18 MB


Training LoRA model with rank = 4



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading base DistilBERT model...

Model Parameters:
  Total parameters: 67,620,868
  Trainable parameters: 665,858
  Trainable %: 0.98%

GPU Memory before training: 275.41 MB
Starting training from scratch...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2572,0.267721,0.905999,0.913842
2,0.1762,0.216547,0.919661,0.926684
3,0.162,0.243183,0.921443,0.929476
4,0.1534,0.200002,0.927977,0.935221
5,0.1487,0.211919,0.930799,0.937146
6,0.1315,0.229582,0.932581,0.939386
7,0.152,0.210265,0.93763,0.943043
8,0.1472,0.244456,0.934363,0.941208
9,0.1367,0.237709,0.935254,0.940873
10,0.1196,0.227235,0.938075,0.943854



[Early Stopping] No improvement over best F1 for 3 consecutive epochs. Stopping training at epoch 16.
[Early Stopping Info] Best F1: 0.9449 at epoch 13


Peak GPU Memory during training: 794.91 MB

Evaluating on test set...



Results for rank = 4:
  Test Accuracy: 0.9464
  Test F1: 0.9521
  Validation Accuracy: 0.9391
  Validation F1: 0.9445
  Total Training Time: 1669.98s
  Average Epoch Time: 99.18s
  Best F1 Epoch: 13
  Best Val F1: 0.9449
  Early Stopped: True
  Stop Info: epochs_without_improvement=3
  Peak GPU Memory: 794.91 MB


Training LoRA model with rank = 8



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading base DistilBERT model...

Model Parameters:
  Total parameters: 67,694,596
  Trainable parameters: 739,586
  Trainable %: 1.09%

GPU Memory before training: 275.69 MB
Starting training from scratch...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.255,0.262409,0.909712,0.918477
2,0.176,0.203041,0.923671,0.930877
3,0.1575,0.25381,0.923819,0.931682
4,0.1308,0.197821,0.930205,0.937149
5,0.1367,0.206793,0.936293,0.94216
6,0.1374,0.227144,0.935996,0.942062
7,0.1424,0.226479,0.937333,0.943447
8,0.1331,0.229308,0.936442,0.942918
9,0.1183,0.245132,0.939263,0.944856
10,0.1226,0.244274,0.937778,0.943841



[Early Stopping] No improvement over best F1 for 3 consecutive epochs. Stopping training at epoch 17.
[Early Stopping Info] Best F1: 0.9455 at epoch 14


Peak GPU Memory during training: 796.13 MB

Evaluating on test set...



Results for rank = 8:
  Test Accuracy: 0.9461
  Test F1: 0.9514
  Validation Accuracy: 0.9393
  Validation F1: 0.9445
  Total Training Time: 1771.07s
  Average Epoch Time: 98.81s
  Best F1 Epoch: 14
  Best Val F1: 0.9455
  Early Stopped: True
  Stop Info: epochs_without_improvement=3
  Peak GPU Memory: 796.13 MB


Training LoRA model with rank = 16

Loading base DistilBERT model...


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Model Parameters:
  Total parameters: 67,842,052
  Trainable parameters: 887,042
  Trainable %: 1.31%

GPU Memory before training: 276.26 MB
Starting training from scratch...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2461,0.25184,0.914315,0.921078
2,0.1762,0.20893,0.923671,0.930144
3,0.1601,0.269493,0.923522,0.93167
4,0.1429,0.191613,0.928868,0.93643
5,0.1633,0.211275,0.934066,0.940049
6,0.1382,0.260194,0.932878,0.939973
7,0.1256,0.204543,0.940303,0.945749
8,0.1479,0.250768,0.934957,0.941864
9,0.1211,0.245914,0.940006,0.945917
10,0.1123,0.227265,0.93956,0.945435



[Early Stopping] No improvement over best F1 for 3 consecutive epochs. Stopping training at epoch 12.
[Early Stopping Info] Best F1: 0.9459 at epoch 9


Peak GPU Memory during training: 798.57 MB

Evaluating on test set...



Results for rank = 16:
  Test Accuracy: 0.9423
  Test F1: 0.9488
  Validation Accuracy: 0.9369
  Validation F1: 0.9424
  Total Training Time: 1251.68s
  Average Epoch Time: 99.02s
  Best F1 Epoch: 9
  Best Val F1: 0.9459
  Early Stopped: True
  Stop Info: epochs_without_improvement=3
  Peak GPU Memory: 798.57 MB


All experiments completed!


In [9]:
# Optional: Load saved results from individual JSON files
# Use this cell if you want to regenerate the summary from saved results
import json

all_results = []
ranks_to_load = [2, 4, 8, 16]

print("Loading saved results...")
for rank in ranks_to_load:
    try:
        with open(f"lora_rank_{rank}_results.json", "r") as f:
            result = json.load(f)
            all_results.append(result)
            print(f"✓ Loaded results for rank {rank}")
    except FileNotFoundError:
        print(f"✗ No results file found for rank {rank}")

print(f"\nTotal results loaded: {len(all_results)}")

Loading saved results...
✓ Loaded results for rank 2
✓ Loaded results for rank 4
✓ Loaded results for rank 8
✓ Loaded results for rank 16

Total results loaded: 4


In [10]:
# Save all results to a summary file
import pandas as pd

# Create summary DataFrame
summary_data = []
for result in all_results:
    # Check if this is old data without best_val_f1
    if 'best_val_f1' not in result:
        print(f"Warning: Result for rank {result['rank']} is missing 'best_val_f1'. Please re-run training.")
        best_val_f1_str = "N/A"
        best_f1_epoch_str = "N/A"
    else:
        best_val_f1_str = f"{result['best_val_f1']:.4f}"
        best_f1_epoch_str = str(result['best_f1_epoch'])
    
    summary_data.append({
        "Rank": result["rank"],
        "Test Acc": f"{result['final_test_accuracy']:.4f}",
        "Test F1": f"{result['final_test_f1']:.4f}",
        "Val Acc": f"{result['final_val_accuracy']:.4f}",
        "Val F1": f"{result['final_val_f1']:.4f}",
        "Best Val F1": best_val_f1_str,
        "Best F1 Epoch": best_f1_epoch_str,
        "Trainable Params": f"{result['trainable_parameters']:,}",
        "Trainable %": f"{result['trainable_percentage']:.2f}%",
        "Total Time (s)": f"{result['total_training_time']:.2f}",
        "Avg Epoch (s)": f"{result['average_epoch_time']:.2f}",
        "Early Stopped": "Yes" if result.get('early_stopped', False) else "No",
        "Peak GPU (MB)": f"{result['peak_gpu_memory_mb']:.2f}"
    })

summary_df = pd.DataFrame(summary_data)
print("\n" + "="*80)
print("BENCHMARK SUMMARY")
print("="*80)
print(summary_df.to_string(index=False))

# Save summary to CSV
summary_df.to_csv("lora_benchmark_summary.csv", index=False)
print("\n✓ Summary saved to 'lora_benchmark_summary.csv'")

# Save complete results
with open("lora_all_results.json", "w") as f:
    json.dump(all_results, f, indent=2)
print("✓ Complete results saved to 'lora_all_results.json'")


BENCHMARK SUMMARY
 Rank Test Acc Test F1 Val Acc Val F1 Best Val F1 Best F1 Epoch Trainable Params Trainable % Total Time (s) Avg Epoch (s) Early Stopped Peak GPU (MB)
    2   0.9418  0.9480  0.9384 0.9435      0.9438            11          628,994       0.93%        1462.70         99.09           Yes        794.18
    4   0.9464  0.9521  0.9391 0.9445      0.9449            13          665,858       0.98%        1669.98         99.18           Yes        794.91
    8   0.9461  0.9514  0.9393 0.9445      0.9455            14          739,586       1.09%        1771.07         98.81           Yes        796.13
   16   0.9423  0.9488  0.9369 0.9424      0.9459             9          887,042       1.31%        1251.68         99.02           Yes        798.57

✓ Summary saved to 'lora_benchmark_summary.csv'
✓ Complete results saved to 'lora_all_results.json'
