# Benchmarking QSiLUApprox in YOLOv5
**Objective:** Evaluate the accuracy impact of replacing SiLU activation with QSiLUApprox in YOLOv5.


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from ultralytics import YOLO

# Custom modules
from quantization.quantization_tools import QuantizeActivation, test_quantization, get_qstat
from approximation.act_approximation_tools import SiluApproximation, test_silu_approximation
from QSiLUApprox.QSiLUApprox import QSiLUApprox
from utils.module_replacer import replace_module

# Check for GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

## Load YOLOv5 Model

In [None]:
# Load the pre-trained YOLOv5n model
model = YOLO('yolov5n.pt').eval().to(device)

# Run baseline evaluation (without modification)
print("\n--- Evaluating Original Model ---\n")
baseline_results = model.val(
    data='coco.yaml',
    batch=32,
    imgsz=640,
    device=device,
    half=True,
    workers=14
)

# Store baseline metrics
baseline_map50 = baseline_results.box.map50
baseline_map = baseline_results.box.map
baseline_latency = baseline_results.speed["inference"]

## Replace SiLU with QSiLUApprox

In [None]:
print("\n--- Replacing SiLU with QSiLUApprox ---\n")
replace_module(model.model, nn.SiLU, QSiLUApprox, ["act"])

## Evaluate Modified Model

In [None]:
print("\n--- Evaluating Modified Model ---\n")
modified_results = model.val(
    data='coco.yaml',
    batch=32,
    imgsz=640,
    device=device,
    half=True,
    workers=14
)

# Store modified model metrics
modified_map50 = modified_results.box.map50
modified_map = modified_results.box.map
modified_latency = modified_results.speed["inference"]

## Results Comparison

In [None]:
print("\n--- Benchmark Results ---\n")
print(f"mAP@0.5 (Baseline): {baseline_map50:.4f}")
print(f"mAP@0.5 (Modified): {modified_map50:.4f}")
print(f"mAP@0.5:0.95 (Baseline): {baseline_map:.4f}")
print(f"mAP@0.5:0.95 (Modified): {modified_map:.4f}")
print(f"Inference Latency (Baseline): {baseline_latency:.2f} ms")
print(f"Inference Latency (Modified): {modified_latency:.2f} ms")

# Plot results
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

# mAP comparison
ax[0].bar(["Baseline", "Modified"], [baseline_map50, modified_map50], color=['blue', 'orange'])
ax[0].set_title("mAP@0.5 Comparison")
ax[0].set_ylabel("mAP@0.5")

# Inference time comparison
ax[1].bar(["Baseline", "Modified"], [baseline_latency, modified_latency], color=['blue', 'orange'])
ax[1].set_title("Inference Latency Comparison")
ax[1].set_ylabel("Latency (ms)")

plt.show()

## Conclusion

In [None]:
print("\n--- Summary ---\n")
if modified_map50 >= baseline_map50:
    print("✅ The modified model maintains or improves accuracy.")
else:
    print("⚠️ The modified model has a slight accuracy drop.")

if modified_latency <= baseline_latency:
    print("✅ The modified model is more efficient.")
else:
    print("⚠️ The modified model has increased latency.")

print("\nFurther optimizations may be needed for better trade-offs between accuracy and efficiency.")