In [1]:
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# CIFAR-10 데이터셋 로드 및 전처리
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
trainset = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)
testset = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform
)

trainloader = DataLoader(trainset, batch_size=128, shuffle=True)
testloader = DataLoader(testset, batch_size=100, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [16]:
# 모델 정의
import torch
import torch.nn.functional as F


class CIFAR10Classifier(nn.Module):
    def __init__(self):
        super(CIFAR10Classifier, self).__init__()

        # 양자화 스텁 추가
        self.quant = torch.quantization.QuantStub()  # 입력을 양자화
        self.dequant = torch.quantization.DeQuantStub()  # 출력을 역양자화

        # Convolutional Layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Fully Connected Layers
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

        # Pooling and Dropout
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.quant(x)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.contiguous().view(-1, 128 * 4 * 4)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        x = self.dequant(x)
        return x

In [17]:
# 모델 학습 함수 정의
def train_model(model, trainloader, epochs=5):
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(trainloader):.4f}")
    return model

In [18]:
# 모델 학습
model = CIFAR10Classifier()
model = train_model(model, trainloader, epochs=5)


Epoch [1/5], Loss: 2.1800
Epoch [2/5], Loss: 1.7158
Epoch [3/5], Loss: 1.4862
Epoch [4/5], Loss: 1.3241
Epoch [5/5], Loss: 1.1814


In [19]:
# 모델 평가 함수 정의
def evaluate_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")
    return accuracy

In [20]:
# 모델 평가
accuracy = evaluate_model(model, testloader)

Accuracy: 61.95%


In [34]:
# Dynamic Quantization 적용
torch.backends.quantized.engine = "qnnpack"
dynamic_quantized_model = torch.quantization.quantize_dynamic(model, dtype=torch.qint8)

In [35]:
# 평가
accuracy = evaluate_model(dynamic_quantized_model, testloader)


Accuracy: 61.91%


In [25]:
# Static Quantization 적용
model.qconfig = torch.quantization.get_default_qconfig("qnnpack")
model_static_quantized = torch.quantization.prepare(model, inplace=False)

# Calibration 수행 (첫 번째 배치로 모델에 입력 전달)
for images, _ in trainloader:
    model_static_quantized(images)
    break  # Calibration은 한 번만 수행

model_static_quantized = torch.quantization.convert(
    model_static_quantized, inplace=False
)

In [26]:
accuracy = evaluate_model(model_static_quantized, testloader)

Accuracy: 61.87%


In [27]:
# QAT 모델 정의
qat_model = CIFAR10Classifier()
qat_model.qconfig = torch.quantization.get_default_qat_qconfig("qnnpack")
torch.quantization.prepare_qat(qat_model, inplace=True)

# QAT 훈련
qat_model = train_model(qat_model, trainloader, epochs=5)
torch.quantization.convert(qat_model, inplace=True)


Epoch [1/5], Loss: 2.2017
Epoch [2/5], Loss: 1.7616
Epoch [3/5], Loss: 1.5237
Epoch [4/5], Loss: 1.3624
Epoch [5/5], Loss: 1.2082


CIFAR10Classifier(
  (quant): Quantize(scale=tensor([0.0078]), zero_point=tensor([127]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (conv1): QuantizedConv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.0338754840195179, zero_point=126, padding=(1, 1))
  (conv2): QuantizedConv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.03431626781821251, zero_point=116, padding=(1, 1))
  (conv3): QuantizedConv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), scale=0.03748861327767372, zero_point=148, padding=(1, 1))
  (fc1): QuantizedLinear(in_features=2048, out_features=256, scale=0.023754138499498367, zero_point=141, qscheme=torch.per_tensor_affine)
  (fc2): QuantizedLinear(in_features=256, out_features=128, scale=0.023721380159258842, zero_point=108, qscheme=torch.per_tensor_affine)
  (fc3): QuantizedLinear(in_features=128, out_features=10, scale=0.08717452734708786, zero_point=115, qscheme=torch.per_tensor_affine)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, c

In [28]:
# 평가
accuracy = evaluate_model(qat_model, testloader)


Accuracy: 60.95%


In [36]:
def print_model_size(model):
    import os

    torch.save(model.state_dict(), "/tmp/model.pth")
    print(f"Model size: {os.path.getsize('/tmp/model.pth') / 1e6} MB")


print_model_size(model)
print_model_size(dynamic_quantized_model)
print_model_size(model_static_quantized)
print_model_size(qat_model)


Model size: 2.611474 MB
Model size: 0.938292 MB
Model size: 0.661388 MB
Model size: 0.661388 MB
