# Phase 5: Model Optimization & Acceleration
Export models to ONNX, optimize with TensorRT, apply INT8 quantization.

**Goal**: Maximize inference speed while minimizing accuracy loss
**Benchmark**: FP32 vs FP16 vs INT8

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
PROJECT_DIR = '/content/drive/MyDrive/computer_vision'
RESULTS_DIR = f'{PROJECT_DIR}/results/phase5'
MODELS_DIR = f'{PROJECT_DIR}/results/phase3'
os.makedirs(RESULTS_DIR, exist_ok=True)

!pip install -q ultralytics onnx onnxruntime-gpu

## 5.1 Export YOLOv8 to ONNX

In [None]:
from ultralytics import YOLO
import shutil

# Find best model from Phase 3 (prefer outdoor-augmented)
best_pt = f'{MODELS_DIR}/yolov8n_outdoor_aug/weights/best.pt'
if not os.path.exists(best_pt):
    best_pt = f'{MODELS_DIR}/yolov8n_baseline/weights/best.pt'
if not os.path.exists(best_pt):
    best_pt = f'{MODELS_DIR}/yolov8n_raw/weights/best.pt'

if os.path.exists(best_pt):
    print(f"Using model: {best_pt}")
    model = YOLO(best_pt)
    
    # ONNX export
    onnx_path = model.export(format='onnx', imgsz=640, simplify=True)
    print(f"ONNX model exported: {onnx_path}")
    
    # Copy to results
    shutil.copy(onnx_path, f'{RESULTS_DIR}/yolov8n_best.onnx')
else:
    print(f"No trained model found.")
    print("Run Phase 3 first to train the model.")

## 5.2 Export to TensorRT (FP16)

In [None]:
if os.path.exists(best_pt):
    model = YOLO(best_pt)
    
    # TensorRT FP16 export
    trt_path = model.export(format='engine', imgsz=640, half=True)
    print(f"TensorRT FP16 model exported: {trt_path}")
    
    shutil.copy(trt_path, f'{RESULTS_DIR}/yolov8n_fp16.engine')
else:
    print("Skipping TensorRT export - model not found")

## 5.3 ONNX Runtime Inference Benchmark

In [None]:
import onnxruntime as ort
import numpy as np
import time
import cv2

onnx_model_path = f'{RESULTS_DIR}/yolov8n_best.onnx'

if os.path.exists(onnx_model_path):
    # Create sessions with different providers
    providers_list = {
        'ONNX_CPU': ['CPUExecutionProvider'],
        'ONNX_GPU': ['CUDAExecutionProvider', 'CPUExecutionProvider'],
    }
    
    # Create dummy input
    dummy = np.random.randn(1, 3, 640, 640).astype(np.float32)
    
    for name, providers in providers_list.items():
        try:
            session = ort.InferenceSession(onnx_model_path, providers=providers)
            input_name = session.get_inputs()[0].name
            
            # Warmup
            for _ in range(5):
                session.run(None, {input_name: dummy})
            
            # Benchmark
            times = []
            for _ in range(50):
                start = time.time()
                session.run(None, {input_name: dummy})
                times.append((time.time() - start) * 1000)
            
            avg = np.mean(times)
            fps = 1000 / avg
            print(f"{name}: {avg:.1f} ms/img ({fps:.1f} FPS)")
        except Exception as e:
            print(f"{name}: failed - {e}")
else:
    print("ONNX model not found. Export it first (section 5.1)")

## 5.4 PyTorch vs ONNX vs TensorRT Comparison

In [None]:
import pandas as pd

benchmark_results = []

# PyTorch benchmark
if os.path.exists(best_pt):
    model = YOLO(best_pt)
    dummy_img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
    
    # Warmup
    for _ in range(5):
        model(dummy_img, verbose=False)
    
    times = []
    for _ in range(50):
        start = time.time()
        model(dummy_img, verbose=False)
        times.append((time.time() - start) * 1000)
    
    avg_pt = np.mean(times)
    benchmark_results.append({
        'Format': 'PyTorch FP32',
        'Latency_ms': round(avg_pt, 1),
        'FPS': round(1000/avg_pt, 1),
        'Model_Size_MB': round(os.path.getsize(best_pt) / 1e6, 1)
    })

# ONNX benchmark
onnx_path = f'{RESULTS_DIR}/yolov8n_best.onnx'
if os.path.exists(onnx_path):
    session = ort.InferenceSession(onnx_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    input_name = session.get_inputs()[0].name
    dummy = np.random.randn(1, 3, 640, 640).astype(np.float32)
    
    for _ in range(5):
        session.run(None, {input_name: dummy})
    
    times = []
    for _ in range(50):
        start = time.time()
        session.run(None, {input_name: dummy})
        times.append((time.time() - start) * 1000)
    
    avg_onnx = np.mean(times)
    benchmark_results.append({
        'Format': 'ONNX Runtime GPU',
        'Latency_ms': round(avg_onnx, 1),
        'FPS': round(1000/avg_onnx, 1),
        'Model_Size_MB': round(os.path.getsize(onnx_path) / 1e6, 1)
    })

# TensorRT benchmark
trt_path = f'{RESULTS_DIR}/yolov8n_fp16.engine'
if os.path.exists(trt_path):
    model_trt = YOLO(trt_path)
    
    for _ in range(5):
        model_trt(dummy_img, verbose=False)
    
    times = []
    for _ in range(50):
        start = time.time()
        model_trt(dummy_img, verbose=False)
        times.append((time.time() - start) * 1000)
    
    avg_trt = np.mean(times)
    benchmark_results.append({
        'Format': 'TensorRT FP16',
        'Latency_ms': round(avg_trt, 1),
        'FPS': round(1000/avg_trt, 1),
        'Model_Size_MB': round(os.path.getsize(trt_path) / 1e6, 1)
    })

if benchmark_results:
    df = pd.DataFrame(benchmark_results)
    print("\n" + "=" * 60)
    print("OPTIMIZATION BENCHMARK")
    print("=" * 60)
    print(df.to_string(index=False))
    df.to_csv(f'{RESULTS_DIR}/optimization_benchmark.csv', index=False)
else:
    print("No models found. Run Phases 3 & 5.1 first.")

In [None]:
print(f"\nPhase 5 results saved to: {RESULTS_DIR}")
print("Optimized models ready for deployment.")
print("Next: Open Phase6_Deployment.ipynb")