In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torchvision.models import resnet18
import torch.quantization

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet18 expects 224x224 images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
model = resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 10)  # Adjust for CIFAR10

# Fuse Conv, bn, relu
# Fusing multiple layers or operations into a single module can significantly improve runtime performance and reduce memory footprint.
model = torch.quantization.fuse_modules(model, [['conv1', 'bn1', 'relu']], inplace=True)

# Prepare the model for QAT
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
quantized_model = torch.quantization.prepare_qat(model, inplace=True)  # Quantization-Aware Training (QAT) 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
quantized_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(quantized_model.parameters(), lr=0.001, momentum=0.9)

# Training loop
quantized_model.train()
for epoch in range(10):  # Num of epochs
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = quantized_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print(f'Epoch [{epoch+1}/10], Step [{i+1}/{len(train_loader)}], Loss: {loss.item()}')


In [None]:
quantized_model.eval()
quantized_model.cpu()

# After training, convert the model to a fully quantized version to reduce its size and potentially increase inference speed.
quantized_model = torch.quantization.convert(quantized_model, inplace=True)