# Vision Transformer Model Optimization for Deployment
This notebook implements the optimization pipeline for deploying a Vision Transformer model with quantization. It focuses on practical implementation and deployment considerations.

In [None]:
import torch
import torchvision
from torch.ao.quantization import get_default_qconfig
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader

# Configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 32
IMAGE_SIZE = 224
MODEL_PATH = 'optimized_model.pth'

print(f"Using device: {DEVICE}")

## 1. Model and Data Pipeline

In [None]:
def create_transform_pipeline():
    """Create standard preprocessing pipeline for inference"""
    return transforms.Compose([
        transforms.Resize(IMAGE_SIZE),
        transforms.CenterCrop(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])

def load_vit_model():
    """Load and prepare ViT model for deployment"""
    model = torch.hub.load('facebookresearch/dino:main', 'dino-vitb16')
    model = model.to(DEVICE)
    model.eval()
    return model

## 2. Model Optimization

In [None]:
class ModelOptimizer:
    def __init__(self, model):
        self.model = model
    
    def quantize(self):
        """Quantize model for deployment"""
        quantized_model = torch.quantization.quantize_dynamic(
            self.model,
            {torch.nn.Linear},
            dtype=torch.qint8
        )
        return quantized_model
    
    def get_model_size(self, model):
        """Calculate model size in MB"""
        param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
        buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
        return (param_size + buffer_size) / 1024**2

## 3. Deployment Pipeline

In [None]:
class DeploymentPipeline:
    def __init__(self):
        self.transform = create_transform_pipeline()
        self.model = None
    
    def optimize_for_deployment(self):
        """Prepare optimized model for deployment"""
        # Load model
        model = load_vit_model()
        optimizer = ModelOptimizer(model)
        
        # Optimize model
        print("Original model size: {:.2f} MB".format(
            optimizer.get_model_size(model)
        ))
        
        optimized_model = optimizer.quantize()
        print("Optimized model size: {:.2f} MB".format(
            optimizer.get_model_size(optimized_model)
        ))
        
        self.model = optimized_model
        return optimized_model
    
    def save_model(self, path=MODEL_PATH):
        """Save optimized model"""
        if self.model is None:
            raise ValueError("Model not optimized yet")
        torch.save(self.model.state_dict(), path)
        print(f"Model saved to {path}")
    
    def load_model(self, path=MODEL_PATH):
        """Load optimized model"""
        self.model = load_vit_model()
        self.model.load_state_dict(torch.load(path))
        self.model.eval()
        return self.model
    
    def inference(self, image):
        """Run inference on single image"""
        if self.model is None:
            raise ValueError("Model not loaded")
            
        with torch.no_grad():
            image = self.transform(image).unsqueeze(0).to(DEVICE)
            output = self.model(image)
        return output

## 4. Deploy Optimized Model

In [None]:
def main():
    """Main deployment pipeline"""
    # Initialize pipeline
    pipeline = DeploymentPipeline()
    
    # Optimize and save model
    print("Optimizing model...")
    pipeline.optimize_for_deployment()
    pipeline.save_model()
    
    # Verify deployment
    print("\nVerifying deployment...")
    loaded_model = pipeline.load_model()
    print("Model loaded successfully")
    
    return pipeline

if __name__ == "__main__":
    deployment_pipeline = main()

## 5. Usage Example

In [None]:
from PIL import Image

def process_image(image_path, pipeline):
    """Example of processing a single image"""
    image = Image.open(image_path)
    output = pipeline.inference(image)
    return output

# Example usage:
# output = process_image('example.jpg', deployment_pipeline)