<a href="https://colab.research.google.com/github/Papa-Panda/Paper_reading/blob/main/Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.quantization
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import FakeData
from torch.utils.data import DataLoader
import time

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Helper function to measure inference time
def measure_inference_time(model, dataloader, device):
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            _ = model(inputs)
    end_time = time.time()
    return end_time - start_time

# Load ResNet18 (non-quantized)
resnet_fp32 = models.resnet18(pretrained=False)
resnet_fp32.fc = nn.Linear(512, 10)  # Adapting for 10 classes
resnet_fp32 = resnet_fp32.to(device)

# Create FakeData for training and testing
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = FakeData(transform=transform, size=1000)
test_dataset = FakeData(transform=transform, size=200)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define training function
def train_model(model, dataloader, criterion, optimizer, num_epochs=1):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}")

# Train non-quantized model
print("Training non-quantized model...")
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet_fp32.parameters(), lr=0.01, momentum=0.9)
train_model(resnet_fp32, train_loader, criterion, optimizer)

# Measure inference time for non-quantized model
fp32_inference_time = measure_inference_time(resnet_fp32, test_loader, device)
print(f"Non-quantized model inference time: {fp32_inference_time:.4f} seconds")

# Quantization preparation
resnet_fp32.eval()
resnet_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')  # Backend for quantization
torch.quantization.prepare(resnet_fp32, inplace=True)

# Calibration step (required for quantization)
print("Calibrating quantized model...")
with torch.no_grad():
    for inputs, _ in train_loader:
        inputs = inputs.to(device)
        _ = resnet_fp32(inputs)

# Convert to quantized model
quantized_model = torch.quantization.convert(resnet_fp32.eval(), inplace=False).to(device)

# Measure inference time for quantized model
quantized_inference_time = measure_inference_time(quantized_model, test_loader, device)
print(f"Quantized model inference time: {quantized_inference_time:.4f} seconds")

# Compare model sizes
fp32_model_size = sum(p.numel() for p in resnet_fp32.parameters())
quantized_model_size = sum(p.numel() for p in quantized_model.parameters())
print(f"Non-quantized model size: {fp32_model_size} parameters")
print(f"Quantized model size: {quantized_model_size} parameters")

# Evaluate performance
def evaluate_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return total_loss / len(dataloader), accuracy

# Evaluate both models
fp32_loss, fp32_accuracy = evaluate_model(resnet_fp32, test_loader, criterion)
quantized_loss, quantized_accuracy = evaluate_model(quantized_model, test_loader, criterion)

print(f"Non-quantized model: Loss = {fp32_loss:.4f}, Accuracy = {fp32_accuracy:.2f}%")
print(f"Quantized model: Loss = {quantized_loss:.4f}, Accuracy = {quantized_accuracy:.2f}%")



Training non-quantized model...
Epoch [1/1], Loss: 2.5353
Non-quantized model inference time: 22.0852 seconds
Calibrating quantized model...


