Refer https://www.bilibili.com/video/BV1D1421r7sj?spm_id_from=333.788.videopod.sections&vd_source=b47e02f9c38ec61f19d536cee73b2322

In [2]:
!nvidia-smi

Fri Mar  7 05:19:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2060      WDDM  |   00000000:04:00.0  On |                  N/A |
|  0%   51C    P0             32W /  172W |    2497MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Looking in indexes: https://download.pytorch.org/whl/cu126



[notice] A new release of pip is available: 24.1.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import torch
print(torch.cuda.is_available()) 
print(torch.cuda.device_count()) 
print(torch.cuda.get_device_name(0))

True
1
NVIDIA GeForce RTX 2060


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

<!-- ![This is an image](image/cifar-10.jpg)-->
<img src="image/cifar-10.jpg" alt="vgg" width="700"/>


In [6]:
transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='./cifar10', download=True, transform=transform)
loader = DataLoader(train_dataset, batch_size=500, shuffle=False)

mean = torch.zeros(3)
std = torch.zeros(3)

for images, _ in loader:
    for c in range(3):
        mean[c] += images[:, c, :, :].mean()
        std[c] += images[:, c, :, :].std()

mean /= len(loader)
std /= len(loader)

print(f'Mean: {mean}')
print(f'Std: {std}')

Mean: tensor([0.4914, 0.4822, 0.4465])
Std: tensor([0.2469, 0.2434, 0.2615])


In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465], #Converts images from PIL or NumPy arrays to PyTorch tensors and scales pixel values from [0,255] to [0,1]
        std=[0.2469, 0.2434, 0.2615]   # Standardizes data using the provided mean and standard deviation
    )
])

train_dataset = datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./cifar10', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [8]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}]\tLoss: {loss.item():.4f}')

In [9]:
def test(model, device, test_loader):
    model.eval()
    correct = 0
    test_loss = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)')


<!-- ![Image from URL](image/alexnet.jpg) -->
<img src="image/VGG.png" alt="vgg" width="800"/>


output_size = (input_size + 2 × padding - kernel_size) / stride + 1

The size of CIFAR-10's image is 3 channels and 32 x 32  

image input | 32×32×3 | | Conv(3→64) | 32×32×64 | | MaxPool | 16×16×64 | | Conv(64→128) | 16×16×128 | | MaxPool | 8×8×128 | | Conv(128→256) | 8×8×256 | | Conv(256→256) | 8×8×256 | | MaxPool | 4×4×256 | | Conv(256→512) | 4×4×512 | | MaxPool | 2×2×512 | | Flatten | 2048 | | Linear | 512 → 512 → 10 | type

In [10]:
class ModifiedVGG(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(

            nn.Conv2d(3, 64, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),

            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 

            nn.Flatten(),  
            nn.Linear(512 * 2 * 2, 512),
            nn.ReLU(),

            nn.Linear(512, 512),
            nn.ReLU(),

            nn.Linear(512, 10) 
        )

    def forward(self, x):
        return self.model(x)

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')
model = ModifiedVGG().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Using device: cuda
Train Epoch: 1 [0/50000]	Loss: 2.3015
Train Epoch: 1 [12800/50000]	Loss: 1.9187
Train Epoch: 1 [25600/50000]	Loss: 1.7141
Train Epoch: 1 [38400/50000]	Loss: 1.5726
Test set: Average loss: 1.3894, Accuracy: 4696/10000 (46.96%)
Train Epoch: 2 [0/50000]	Loss: 1.3954
Train Epoch: 2 [12800/50000]	Loss: 1.2452
Train Epoch: 2 [25600/50000]	Loss: 1.1081
Train Epoch: 2 [38400/50000]	Loss: 1.0416
Test set: Average loss: 1.0778, Accuracy: 6157/10000 (61.57%)
Train Epoch: 3 [0/50000]	Loss: 0.9074
Train Epoch: 3 [12800/50000]	Loss: 0.9532
Train Epoch: 3 [25600/50000]	Loss: 0.8743
Train Epoch: 3 [38400/50000]	Loss: 1.0617
Test set: Average loss: 0.9065, Accuracy: 6874/10000 (68.74%)
Train Epoch: 4 [0/50000]	Loss: 0.8491
Train Epoch: 4 [12800/50000]	Loss: 0.7668
Train Epoch: 4 [25600/50000]	Loss: 0.7016
Train Epoch: 4 [38400/50000]	Loss: 0.8230
Test set: Average loss: 0.8534, Accuracy: 7055/10000 (70.55%)
Train Epoch: 5 [0/50000]	Loss: 0.7371
Train Epoch: 5 [12800/50000]	Loss: 0.67