In [1]:
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!pip install -q kaggle
!kaggle --version
!kaggle competitions download -c deep-learning-spring-2025-project-1
!unzip -q deep-learning-spring-2025-project-1.zip -d ./data

Saving kaggle.json to kaggle.json
Kaggle API 1.6.17
Downloading deep-learning-spring-2025-project-1.zip to /content
100% 189M/189M [00:09<00:00, 21.9MB/s]
100% 189M/189M [00:09<00:00, 20.9MB/s]


1. Import dependency

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from data_handler import DataHandler
import numpy as np
import torch.nn.functional as F

 2. Dataset

In [3]:
# 数据集类
class CIFAR10Dataset(Dataset):
    def __init__(self, data_handler, files, transform=None):
        self.data = []
        self.labels = []
        self.transform = transform

        for file in files:
            batch = data_handler.unpickle(file)
            images = batch[b"data"].reshape(-1, 3, 32, 32).astype(np.uint8)
            labels = batch[b"labels"]
            self.data.append(images)
            self.labels.extend(labels)

        self.data = np.vstack(self.data)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.data[idx]
        image = np.transpose(image, (1, 2, 0))
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label


3. Data Process

In [4]:
# 数据处理
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

data_handler = DataHandler("./data/cifar-10-python/cifar-10-batches-py/")
train_files = [f"data_batch_{i}" for i in range(1, 6)]
test_files = ["test_batch"]

train_dataset = CIFAR10Dataset(data_handler, train_files, transform=transform)
test_dataset = CIFAR10Dataset(data_handler, test_files, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


4. Build Model

In [5]:
class ResidualBlock(nn.Module):
  def __init__(self, in_channels, out_channels, stride=1):
    super(ResidualBlock, self).__init__()
    # Using 3x3 square kernel, padding=1 ensure that the output size is the same as the input size.
    self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=True)
    self.bn1 = nn.BatchNorm2d(out_channels)

    self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True)
    self.bn2 = nn.BatchNorm2d(out_channels)

    self.sample = None
    if stride != 1 or in_channels != out_channels:
      self.sample = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=True),
        nn.BatchNorm2d(out_channels)
            )
  def forward(self, x):
    residualx = x
    if self.sample is not None:
      residualx = self.sample(x)

    out = self.conv1(x)
    out = self.bn1(out)
    out = nn.ReLU(inplace=True)(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out += residualx
    out = nn.ReLU(inplace=True)(out)

    return out

In [6]:
class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes = 10):
    super(ResNet, self).__init__()
    self.in_channels = 64
    self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.layer1 = self._make_layer(block, 50, layers[0])
    self.layer2 = self._make_layer(block, 100, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 200, layers[2], stride=2)
    # self.layer4 = self._make_layer(block, 336, layers[3], stride=2)
    self.layer4 = self._make_layer(block, 200, layers[3], stride=2)
    self.layer5 = self._make_layer(block, 200, layers[3], stride=2)

    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    # self.fc = nn.Linear(336, num_classes)
    self.fc_1 = nn.Linear(200, 100)
    self.fc_2 = nn.Linear(100, 50)
    self.fc_3 = nn.Linear(50, num_classes)


  def _make_layer(self, block, out_channels, num_blocks, stride=1):
    strides = [stride] + [1] * (num_blocks - 1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_channels, out_channels, stride))
      self.in_channels = out_channels
    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    # x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.layer5(x)

    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    # x = self.fc(x)
    x = self.relu(self.fc_1(x))
    x = self.relu(self.fc_2(x))
    x = self.relu(self.fc_3(x))
    x = F.log_softmax(x, dim=1)

    return x

In [7]:
def train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, num_epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        correct_train = 0
        total_train = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_accuracy = 100 * correct_train / total_train

        # === 测试阶段 ===
        model.eval()
        correct_test = 0
        total_test = 0

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                correct_test += (predicted == labels).sum().item()
                total_test += labels.size(0)

        test_accuracy = 100 * correct_test / total_test
        print(f"Epoch{epoch},train_accuracy:{train_accuracy},test_accuracy:{test_accuracy}")
    return train_accuracy, test_accuracy

In [8]:
layers = [2, 2, 2, 2]
model = ResNet(ResidualBlock, layers).cuda()
criterion = torch.nn.CrossEntropyLoss()

from torchsummary import summary
summary(model, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 50, 32, 32]           3,250
       BatchNorm2d-5           [-1, 50, 32, 32]             100
            Conv2d-6           [-1, 50, 32, 32]          28,850
       BatchNorm2d-7           [-1, 50, 32, 32]             100
            Conv2d-8           [-1, 50, 32, 32]          22,550
       BatchNorm2d-9           [-1, 50, 32, 32]             100
    ResidualBlock-10           [-1, 50, 32, 32]               0
           Conv2d-11           [-1, 50, 32, 32]          22,550
      BatchNorm2d-12           [-1, 50, 32, 32]             100
           Conv2d-13           [-1, 50, 32, 32]          22,550
      BatchNorm2d-14           [-1, 50,

In [None]:
learning_rates = [0.005, 0.001, 0.0005]
results = []

for lr in learning_rates:
    model = ResNet(ResidualBlock, layers).cuda()
    optimizer = optim.RMSprop(model.parameters(), lr=lr, alpha = 0.9)

    print(f"\nTraining with RMSprop (lr={lr})...")
    train_acc, test_acc = train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, 50)

    results.append({
        'optimizer': optimizer,
        'learning_rate': lr,
        'train_acc': train_acc,
        'test_acc': test_acc
    })


Training with RMSprop (lr=0.005)...
Epoch0,train_accuracy:19.816,test_accuracy:18.77
Epoch1,train_accuracy:30.54,test_accuracy:37.33
Epoch2,train_accuracy:42.902,test_accuracy:41.28
Epoch3,train_accuracy:52.032,test_accuracy:47.35
Epoch4,train_accuracy:60.194,test_accuracy:57.65
Epoch5,train_accuracy:64.064,test_accuracy:64.5
Epoch6,train_accuracy:67.226,test_accuracy:61.4
Epoch7,train_accuracy:69.208,test_accuracy:66.09
Epoch8,train_accuracy:71.336,test_accuracy:67.14
Epoch9,train_accuracy:72.78,test_accuracy:65.05
Epoch10,train_accuracy:73.796,test_accuracy:69.32
Epoch11,train_accuracy:74.974,test_accuracy:66.8
Epoch12,train_accuracy:76.036,test_accuracy:70.21
Epoch13,train_accuracy:76.732,test_accuracy:73.59
Epoch14,train_accuracy:77.238,test_accuracy:74.72
Epoch15,train_accuracy:77.738,test_accuracy:73.59
Epoch16,train_accuracy:78.532,test_accuracy:74.41
Epoch17,train_accuracy:78.908,test_accuracy:76.06
Epoch18,train_accuracy:79.332,test_accuracy:75.69
Epoch19,train_accuracy:79.27

In [None]:
print("\n=== All Results ===")
for result in results:
    print(f"Optimizer: {result['optimizer']}, "
          f"Learning Rate: {result['learning_rate']}, "
          f"Train Accuracy: {result['train_acc']:.2f}%, "
          f"Test Accuracy: {result['test_acc']:.2f}%")


=== All Results ===
Optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.005
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
), Learning Rate: 0.005, Train Accuracy: 30.96%, Test Accuracy: 40.97%
Optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
), Learning Rate: 0.001, Train Accuracy: 18.68%, Test Accuracy: 25.34%
Optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.0005
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
), Learning Rate: 0.0005, Train Accuracy: 15.76%, Test Accuracy: 21.33%
