In [2]:
!nvidia-smi

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import glob
import cv2
import torch.nn.functional as F
from torch.autograd import Variable
import os

import torchvision
import torchvision.transforms as transforms

from torch.nn import CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
import matplotlib.pyplot as plt
from torchsummary import summary

Thu Dec 11 01:13:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.83                 Driver Version: 576.83         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A2000 Laptop GPU  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   53C    P0             12W /   45W |     720MiB /   4096MiB |      4%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
def load_data(data_dir = "./data"):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])


    trainset = torchvision.datasets.CIFAR10(
        root = data_dir, train = True, download = True, transform = transform)

    testset = torchvision.datasets.CIFAR10(
        root = data_dir, train = False, download = True, transform = transform)
    
    return trainset, testset

In [18]:
class Net(nn.Module):
    def __init__(self, l1 = 120, l2 = 84):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, 10)
        self.softmax = nn.Softmax(dim = 1)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(-1, 16 * 5 * 5)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        output = self.softmax(x)

        return output

In [19]:
model = Net()

if torch.cuda.is_available():
    model.cuda()

summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                  [-1, 120]          48,120
            Linear-6                   [-1, 84]          10,164
            Linear-7                   [-1, 10]             850
           Softmax-8                   [-1, 10]               0
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.31
----------------------------------------------------------------


In [20]:
def test_accuracy(net, device = "cpu"):
    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [21]:
def train(net, criterion, optimizer, save_path, device = "cpu"):
    T_cur = 0

    for epoch in range(1, epochs + 1):
        
        running_loss = 0.0
        epoch_steps = 0
        T_cur += 1

        if epoch <= warm_epoch:
            optimizer.param_groups[0]['lr'] = (1.0 * epoch) / warm_epoch * init_lr
        else:
            optimizer.param_groups[0]['lr'] = last_lr + (init_lr - last_lr) * (1 + np.cos(T_cur * np.pi / T_max)) / 2

        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1
            
            if i + 1 == len(trainloader):
                print("[Epoch %d] loss: %.3f" % (epoch, running_loss / epoch_steps))
                running_loss = 0.0
                
    print("Finished Training")
    print("Test accuracy:", test_accuracy(net, device))
    torch.save(net.state_dict(), save_path)

In [22]:
epochs = 10
warm_epoch = 5
init_lr = 1e-2
last_lr = 1e-4
T_max = epochs

configs = [{'l1': 64, 'l2': 32}, {'l1': 128, 'l2': 64}]

trainset, testset = load_data('./data')

trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size = 128,
    shuffle = True,
)

testloader = torch.utils.data.DataLoader(
    testset,
    batch_size = 4,
    shuffle = True,
    num_workers = 2
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|████████████████████████████████████████████████████████████████████████████████| 170M/170M [05:32<00:00, 513kB/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [24]:
os.makedirs('./snapshot', exist_ok = True)

for i, cfg in enumerate(configs):
    print(cfg)

    net = Net(cfg['l1'], cfg['l2'])

    device = "cpu"

    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)

    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr = init_lr, momentum = 0.9)

    save_path = f'./snapshot/model{i}.pth'
    train(net, criterion, optimizer, save_path, device)

{'l1': 64, 'l2': 32}
[Epoch 1] loss: 2.303
[Epoch 2] loss: 2.303
[Epoch 3] loss: 2.302
[Epoch 4] loss: 2.302
[Epoch 5] loss: 2.293
[Epoch 6] loss: 2.239
[Epoch 7] loss: 2.199
[Epoch 8] loss: 2.182
[Epoch 9] loss: 2.176
[Epoch 10] loss: 2.174
Finished Training
Test accuracy: 0.2817
{'l1': 128, 'l2': 64}
[Epoch 1] loss: 2.303
[Epoch 2] loss: 2.303
[Epoch 3] loss: 2.302
[Epoch 4] loss: 2.301
[Epoch 5] loss: 2.274
[Epoch 6] loss: 2.182
[Epoch 7] loss: 2.155
[Epoch 8] loss: 2.145
[Epoch 9] loss: 2.141
[Epoch 10] loss: 2.139
Finished Training
Test accuracy: 0.3225


In [25]:
from tqdm import tqdm

In [28]:
def test_ensemble(device = "cuda:0"):
    correct = 0
    total = 0

    with torch.no_grad():
        for data in tqdm(testloader):
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            final_outputs = torch.zeros((4, 10))
            final_outputs = final_outputs.to(device)

            for i, cfg in enumerate(configs):
                net = Net(cfg['l1'], cfg['l2'])
                net = net.to(device)

                net.load_state_dict(torch.load(f'./snapshot/model{i}.pth'))
                
                outputs = net(images)
                final_outputs = final_outputs.add(outputs)

            final_outputs.div(len(configs))
            _, predicted = torch.max(final_outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [29]:
test_ensemble()

  net.load_state_dict(torch.load(f'./snapshot/model{i}.pth'))
100%|██████████████████████████████████████████████████████████████████████████████| 2500/2500 [00:26<00:00, 94.57it/s]


0.3102