In [1]:
import os
import random
import numpy as np
import pandas as pd
import cv2
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.image as mpimg
%matplotlib inline

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import random_split, TensorDataset
from torch.utils.data.dataloader import DataLoader

from torchvision import datasets, transforms, models 
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid

import warnings
warnings.filterwarnings("ignore")

In [2]:
TRAIN_RATIO = 0.8
EPOCHS = 50
BATCH_SIZE = 32

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

## 小題 1：數據讀取與預處理 (15 分)

In [4]:
train_imgs = torch.load("/kaggle/input/moai-2025-training/train_images.pt").to(torch.float32) / 255
train_imgs = torch.unsqueeze(train_imgs, 1)
test_imgs = torch.load("/kaggle/input/moai-2025-training/test_images.pt")
train_labels = pd.read_csv("/kaggle/input/moai-2025-training/train_labels.csv").to_numpy()[:,1]
train_labels = F.one_hot(torch.Tensor(train_labels).to(torch.int64), num_classes=10).to(torch.float32)

In [5]:
train_imgs = train_imgs.to(device)
train_labels = train_labels.to(device)

分割數據

In [6]:
l = int(train_imgs.shape[0] * TRAIN_RATIO)
trainset = TensorDataset(train_imgs[:l], train_labels[:l])
testset = TensorDataset(train_imgs[l:], train_labels[l:])

In [7]:
train_dataloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True)

## 小題 2：構建 CNN 模型 (15 分)

In [8]:
model = nn.Sequential(
    nn.Conv2d(1,20,3,padding=1),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.Conv2d(20,64,3,padding=1),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.ReLU(),
    nn.Linear(64 * 7 * 7, 256),
    nn.ReLU(),
    nn.Linear(256, 10),
    nn.Softmax(),
)

## 小題 3：訓練模型 (20 分)

In [9]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

In [10]:
for i in train_dataloader:
    print(i[0].device)
    break

cuda:0


In [11]:
for epoch in trange(EPOCHS, position=0):
    training_loss = 0.0
    validing_loss = 0.0
    
    model.train()
    for batch in train_dataloader:
        # get the inputs; data is a list of [inputs, labels]
        images, labels = batch

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        training_loss += loss.item()

    model.eval()
    total = 0
    corr = 0
    for batch in test_dataloader:
        # get the inputs; data is a list of [inputs, labels]
        images, labels = batch

        # forward only
        outputs = model(images)
        loss = criterion(outputs, labels)
        validing_loss += loss.item()
        
        lab = torch.argmax(labels, axis=1)
        out = torch.argmax(outputs, axis=1)
        corr += BATCH_SIZE - len(torch.nonzero(lab - out))
        total += BATCH_SIZE

    tqdm.write(f'[Epoch {epoch + 1}] loss: {training_loss / 2000:.3f} | {validing_loss / 2000:.3f}, acc: {corr / total * 100:.3f}')

print('Finished Training')

  0%|          | 0/50 [00:00<?, ?it/s]

[Epoch 1] loss: 1.432 | 0.308, acc: 82.125
[Epoch 2] loss: 1.198 | 0.287, acc: 93.242
[Epoch 3] loss: 1.142 | 0.282, acc: 95.808
[Epoch 4] loss: 1.129 | 0.280, acc: 96.783
[Epoch 5] loss: 1.122 | 0.280, acc: 97.183
[Epoch 6] loss: 1.117 | 0.279, acc: 97.517
[Epoch 7] loss: 1.115 | 0.279, acc: 97.733
[Epoch 8] loss: 1.112 | 0.278, acc: 97.883
[Epoch 9] loss: 1.111 | 0.278, acc: 97.817
[Epoch 10] loss: 1.110 | 0.278, acc: 97.833
[Epoch 11] loss: 1.108 | 0.278, acc: 98.192
[Epoch 12] loss: 1.108 | 0.278, acc: 98.200
[Epoch 13] loss: 1.107 | 0.277, acc: 98.350
[Epoch 14] loss: 1.106 | 0.278, acc: 98.108
[Epoch 15] loss: 1.106 | 0.277, acc: 98.367
[Epoch 16] loss: 1.105 | 0.277, acc: 98.408
[Epoch 17] loss: 1.104 | 0.277, acc: 98.275
[Epoch 18] loss: 1.104 | 0.277, acc: 98.500
[Epoch 19] loss: 1.104 | 0.277, acc: 98.308
[Epoch 20] loss: 1.103 | 0.277, acc: 98.425
[Epoch 21] loss: 1.102 | 0.277, acc: 98.625
[Epoch 22] loss: 1.102 | 0.277, acc: 98.683
[Epoch 23] loss: 1.102 | 0.277, acc: 98.7

### Validate Model

In [12]:
model.eval()
test_imgs = test_imgs.to(device).to(torch.float32)
test_imgs = torch.unsqueeze(test_imgs, 1)

In [13]:
test_imgs.shape

torch.Size([10000, 1, 28, 28])

In [14]:
test_predict = model(test_imgs)
test_predict = torch.argmax(test_predict, axis=1)
df = pd.DataFrame(test_predict.cpu().numpy())
df.to_csv("test_labels.csv", header=["label"], index_label="id")
df.to_csv("submission.csv", header=["label"], index_label="id")