In [1]:
import torch
print(torch.cuda.is_available())

True


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchsummary import summary
import time
import os

In [4]:
torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_set = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_set = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [6]:
class SmallCNN(nn.Module):
    def __init__(self):
        super(SmallCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SmallCNN().to(device)
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
         MaxPool2d-2           [-1, 16, 16, 16]               0
            Conv2d-3           [-1, 32, 16, 16]           4,640
         MaxPool2d-4             [-1, 32, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]          18,496
         MaxPool2d-6             [-1, 64, 4, 4]               0
            Linear-7                  [-1, 128]         131,200
            Linear-8                   [-1, 10]           1,290
Total params: 156,074
Trainable params: 156,074
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.27
Params size (MB): 0.60
Estimated Total Size (MB): 0.88
----------------------------------------------------------------


In [7]:
def train_model(model, epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

train_model(model)

Epoch 1, Loss: 1.4841
Epoch 2, Loss: 1.1088
Epoch 3, Loss: 0.9362
Epoch 4, Loss: 0.8256
Epoch 5, Loss: 0.7436
Epoch 6, Loss: 0.6748
Epoch 7, Loss: 0.6173
Epoch 8, Loss: 0.5653
Epoch 9, Loss: 0.5167
Epoch 10, Loss: 0.4669


In [8]:
def evaluate_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = 100 * correct / total
    print(f"Test Accuracy: {acc:.2f}%")
    return acc

evaluate_model(model)

Test Accuracy: 72.97%


72.97

In [9]:
model.cpu()
quantized_model = torch.quantization.quantize_dynamic(
    model, {nn.Linear}, dtype=torch.qint8)

In [10]:
def evaluate_cpu_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = 100 * correct / total
    print(f"[Quantized] Test Accuracy: {acc:.2f}%")
    return acc

evaluate_cpu_model(quantized_model)

[Quantized] Test Accuracy: 73.06%


73.06

In [11]:
def model_size(model, name="model.pth"):
    torch.save(model.state_dict(), name)
    size = os.path.getsize(name) / 1e6
    print(f"Model size: {size:.2f} MB")

model_size(model, "model_fp32.pth")
model_size(quantized_model, "model_int8.pth")

# Inference Timer
def measure_inference_time(model, device='cpu', n_runs=100):
    dummy = torch.randn(1, 3, 32, 32).to(device)
    model.eval()
    start = time.time()
    with torch.no_grad():
        for _ in range(n_runs):
            model(dummy)
    end = time.time()
    avg_time = (end - start) / n_runs * 1000
    print(f"Avg inference time on {device}: {avg_time:.2f} ms")

measure_inference_time(model, 'cpu')
measure_inference_time(quantized_model, 'cpu')

Model size: 0.63 MB
Model size: 0.23 MB
Avg inference time on cpu: 0.82 ms
Avg inference time on cpu: 0.65 ms


In [12]:
scripted_model = torch.jit.script(quantized_model)
scripted_model.save("cifar10_quantized_scripted.pt")
print("Scripted model saved for edge deployment.")

Scripted model saved for edge deployment.


In [14]:
onnx_input = torch.randn(1, 3, 32, 32)
torch.onnx.export(
    model,                        # original model (float32)
    onnx_input,                   # example input tensor
    "cifar10_model.onnx",        # output file name
    export_params=True,
    opset_version=11,
    do_constant_folding=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)
print("ONNX model exported to 'cifar10_model.onnx'")


ONNX model exported to 'cifar10_model.onnx'
