# Model Conversion to TensorRT and ONNX

This notebook implements the conversion of our trained model to ONNX and TensorRT formats for optimized inference on NVIDIA GPUs.

In [None]:
import torch
import onnx
import tensorrt as trt
import numpy as np
from pathlib import Path
import logging

# Configuration
CONFIG = {
    'model_path': '../part2/outputs/best_model.pth',
    'output_dir': Path('./outputs'),
    'batch_size': 32,
    'image_size': 64,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu')
}

logging.basicConfig(level=logging.INFO)
CONFIG['output_dir'].mkdir(exist_ok=True)

## 1. ONNX Export

In [None]:
class ONNXExporter:
    def __init__(self, model_path):
        self.model_path = model_path
        self.model = self._load_model()
        
    def _load_model(self):
        checkpoint = torch.load(self.model_path)
        model = CNN(checkpoint['config'])  # CNN class from Part 2
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()
        return model
        
    def export(self, output_path):
        dummy_input = torch.randn(1, 3, CONFIG['image_size'], CONFIG['image_size'])
        
        torch.onnx.export(
            self.model,
            dummy_input,
            output_path,
            input_names=['input'],
            output_names=['output'],
            dynamic_axes={
                'input': {0: 'batch_size'},
                'output': {0: 'batch_size'}
            },
            opset_version=13
        )
        
        # Verify ONNX model
        onnx_model = onnx.load(output_path)
        onnx.checker.check_model(onnx_model)
        
        logging.info(f"ONNX model exported and verified: {output_path}")
        return output_path

## 2. TensorRT Conversion

In [None]:
class TensorRTConverter:
    def __init__(self):
        self.logger = trt.Logger(trt.Logger.WARNING)
        
    def build_engine(self, onnx_path, engine_path):
        builder = trt.Builder(self.logger)
        network = builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        )
        parser = trt.OnnxParser(network, self.logger)
        
        # Parse ONNX
        with open(onnx_path, 'rb') as model:
            parser.parse(model.read())
        
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 30  # 1GB
        
        # Enable FP16 if possible
        if builder.platform_has_fast_fp16:
            config.set_flag(trt.BuilderFlag.FP16)
        
        # Set optimization profile
        profile = builder.create_optimization_profile()
        profile.set_shape(
            'input',
            (1, 3, CONFIG['image_size'], CONFIG['image_size']),  # min
            (CONFIG['batch_size'], 3, CONFIG['image_size'], CONFIG['image_size']),  # opt
            (CONFIG['batch_size']*2, 3, CONFIG['image_size'], CONFIG['image_size'])  # max
        )
        config.add_optimization_profile(profile)
        
        # Build and save engine
        engine = builder.build_engine(network, config)
        with open(engine_path, 'wb') as f:
            f.write(engine.serialize())
            
        logging.info(f"TensorRT engine built and saved: {engine_path}")
        return engine

## 3. Inference Benchmarking

In [None]:
class InferenceBenchmark:
    def __init__(self):
        self.results = {}
        
    def benchmark_pytorch(self, model, input_shape, n_iterations=100):
        model = model.to(CONFIG['device'])
        dummy_input = torch.randn(input_shape).to(CONFIG['device'])
        
        # Warmup
        with torch.no_grad():
            for _ in range(10):
                _ = model(dummy_input)
        
        # Benchmark
        times = []
        with torch.no_grad():
            for _ in range(n_iterations):
                start = torch.cuda.Event(enable_timing=True)
                end = torch.cuda.Event(enable_timing=True)
                
                start.record()
                _ = model(dummy_input)
                end.record()
                
                torch.cuda.synchronize()
                times.append(start.elapsed_time(end))
        
        self.results['pytorch'] = {
            'mean': np.mean(times),
            'std': np.std(times)
        }
        return self.results['pytorch']

## 4. Complete Pipeline Execution

In [None]:
def main():
    # Export to ONNX
    logging.info("Exporting model to ONNX...")
    exporter = ONNXExporter(CONFIG['model_path'])
    onnx_path = CONFIG['output_dir'] / 'model.onnx'
    exporter.export(onnx_path)
    
    # Convert to TensorRT
    logging.info("Converting to TensorRT...")
    converter = TensorRTConverter()
    engine_path = CONFIG['output_dir'] / 'model.engine'
    engine = converter.build_engine(onnx_path, engine_path)
    
    # Benchmark
    logging.info("Running inference benchmarks...")
    benchmark = InferenceBenchmark()
    
    # PyTorch benchmark
    pytorch_results = benchmark.benchmark_pytorch(
        exporter.model,
        (CONFIG['batch_size'], 3, CONFIG['image_size'], CONFIG['image_size'])
    )
    
    # Print results
    logging.info("\nBenchmark Results:")
    logging.info(f"PyTorch - Mean: {pytorch_results['mean']:.2f}ms ± {pytorch_results['std']:.2f}ms")
    
    return engine_path

if __name__ == "__main__":
    main()