In [3]:
import torch
import time
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
mnist_train = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
mnist_test = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

X_train = mnist_train.data
y_train = mnist_train.targets
X_test = mnist_test.data
y_test = mnist_test.targets

X_train = X_train.view(X_train.size(0), -1).float() 
X_test = X_test.view(X_test.size(0), -1).float()   



In [4]:
class LogisticRegression(nn.Module):
    def __init__(self, input_size, num_classes):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return self.linear(x)

input_size = X_train.shape[1]  
num_classes = 10  

model = LogisticRegression(input_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(100): 
    model.train()
    optimizer.zero_grad()
    
    outputs = model(X_train)  
    loss = criterion(outputs, y_train) 
    
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/100], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 857.6127
Epoch [20/100], Loss: 309.0080
Epoch [30/100], Loss: 98.8214
Epoch [40/100], Loss: 82.9048
Epoch [50/100], Loss: 267.2411
Epoch [60/100], Loss: 112.2827
Epoch [70/100], Loss: 62.2377
Epoch [80/100], Loss: 186.9705
Epoch [90/100], Loss: 138.7474
Epoch [100/100], Loss: 60.6565


In [5]:
def evaluate_model(model, X_test, y_test):
    model.eval()
    start_time = time.time()
    outputs = model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    inference_time = time.time() - start_time
    model_size = sum(p.numel() * (4 if p.dtype == torch.float32 else 1) for p in model.parameters())  
    return accuracy, model_size, inference_time

accuracy, model_size, inference_time = evaluate_model(model, X_test, y_test)
print(f'Accuracy: {accuracy}, Model Size: {model_size / 1e6:.2f} MB, Inference Time: {inference_time:.6f} seconds')


Accuracy: 0.9087, Model Size: 0.03 MB, Inference Time: 0.031446 seconds


In [6]:
from torch.quantization import quantize_dynamic

def quantize_model(model):
    torch.backends.quantized.engine = 'qnnpack'
    quantized_model = quantize_dynamic(
        model,               
        {torch.nn.Linear},   
        dtype=torch.qint8     
    )
    return quantized_model


In [7]:
import time

def evaluate_quantized_model(model_int8, X_test, y_test):

    model_int8.eval()  
    start_time = time.time()
    X_test_tensor = X_test.view(X_test.size(0), -1).float()  
    outputs = model_int8(X_test_tensor)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    inference_time = time.time() - start_time
    model_size_bytes = sum(p.numel() * (4 if p.dtype == torch.float32 else 1) for p in model_int8.parameters())
    model_size_mb = model_size_bytes / (1024 * 1024)  
    
    return accuracy, model_size_mb, inference_time

quantized_model = quantize_model(model)  

accuracy_quant, model_size_quant, inference_time_quant = evaluate_quantized_model(quantized_model, X_test, y_test)
print(f'Quantized Accuracy: {accuracy_quant}, Model Size: {model_size_quant:.2f} MB, Quantized Inference Time: {inference_time_quant:.6f} seconds')


Quantized Accuracy: 0.9089, Model Size: 0.00 MB, Quantized Inference Time: 0.034173 seconds




In [8]:
print(f'Original Model -> Accuracy: {accuracy}, Size: {model_size / 1e6:.2f} MB, Inference Time: {inference_time:.6f} seconds')
print(f'Quantized Model -> Accuracy: {accuracy_quant}, Size: {model_size_quant / 1e6:.2f} MB, Inference Time: {inference_time_quant:.6f} seconds')


Original Model -> Accuracy: 0.9087, Size: 0.03 MB, Inference Time: 0.031446 seconds
Quantized Model -> Accuracy: 0.9089, Size: 0.00 MB, Inference Time: 0.034173 seconds
