In [1]:
import torch
from torch import nn
from torch.nn import functional as F

import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))  # 如果有 GPU


2.6.0+cu118
11.8
True
NVIDIA GeForce RTX 4060 Laptop GPU


In [2]:
import pandas as pd
import numpy as np
import os
import warnings

warnings.filterwarnings("ignore")

In [3]:
# 圖片讀取
from torch.utils.data import Dataset
from PIL import Image
import os

class TxtImageDataset(Dataset):
    """
    從 txt 檔讀取圖片路徑與類別，格式為：
    /path/to/image1.jpg 0
    /path/to/image2.jpg 1
    """
    def __init__(self, txt_file, transform=None):
        self.samples = []
        with open(txt_file, 'r') as f:
            for line in f:
                path, label = line.strip().split()
                self.samples.append((path, int(label)))
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        img_path, label = self.samples[index]
        image = Image.open(img_path).convert('RGB')  # 強制為 RGB 格式
        if self.transform:
            image = self.transform(image)
        return image, label


In [4]:
# 圖片處理
import torchvision.transforms as T
from torch.utils.data import DataLoader

# 對照組
transform_control = T.Compose([
    T.Resize((64,64)), 
    T.ToTensor()
])

train_control_dataset = TxtImageDataset('train.txt', transform=transform_control)
val_control_dataset = TxtImageDataset('val.txt', transform=transform_control)
test_control_dataset = TxtImageDataset('test.txt', transform=transform_control)

# 讀取資料集
batch_size = 8
train_loader = DataLoader(train_control_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_control_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_control_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

In [5]:
import torch.optim as optim
from torchvision import models

# 選擇裝置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 建立 ResNet34 模型
def build_model(num_classes):
    model = models.resnet34(pretrained=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)  # 替換最後分類層
    return model.to(device)

# 訓練模型
def train(model, train_loader, val_loader, epochs=10, lr=0.00001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
            if (batch_idx + 1) % 50 == 0 or (batch_idx + 1) == len(train_loader):
                print(f"  Epoch {epoch+1}/{epochs} ┃ Batch {batch_idx+1}/{len(train_loader)} ┃ Loss: {loss.item():.4f}")

        val_acc, val_loss = evaluate(model, val_loader, criterion)
        print(f"[Val] Acc: {val_acc:.2f}%, Loss: {val_loss:.4f}")

# 評估模型（Val/Test 共用）
def evaluate(model, data_loader, criterion):
    model.eval()
    loss_total, correct, total = 0, 0, 0
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            loss_total += loss.item() * x.size(0)
            _, predicted = outputs.max(1)
            correct += (predicted == y).sum().item()
            total += y.size(0)
    acc = 100. * correct / total
    avg_loss = loss_total / total
    return acc, avg_loss


In [6]:
# Baseline訓練
model = build_model(num_classes=50)
train(model, train_loader, val_loader, epochs=10, lr=1e-4)

  Epoch 1/10 ┃ Batch 50/3958 ┃ Loss: 3.6568
  Epoch 1/10 ┃ Batch 100/3958 ┃ Loss: 3.8030
  Epoch 1/10 ┃ Batch 150/3958 ┃ Loss: 3.6508
  Epoch 1/10 ┃ Batch 200/3958 ┃ Loss: 3.5641
  Epoch 1/10 ┃ Batch 250/3958 ┃ Loss: 3.6363
  Epoch 1/10 ┃ Batch 300/3958 ┃ Loss: 3.8868
  Epoch 1/10 ┃ Batch 350/3958 ┃ Loss: 3.4212
  Epoch 1/10 ┃ Batch 400/3958 ┃ Loss: 3.3275
  Epoch 1/10 ┃ Batch 450/3958 ┃ Loss: 3.3952
  Epoch 1/10 ┃ Batch 500/3958 ┃ Loss: 3.0953
  Epoch 1/10 ┃ Batch 550/3958 ┃ Loss: 3.2839
  Epoch 1/10 ┃ Batch 600/3958 ┃ Loss: 3.5871
  Epoch 1/10 ┃ Batch 650/3958 ┃ Loss: 3.6208
  Epoch 1/10 ┃ Batch 700/3958 ┃ Loss: 3.3807
  Epoch 1/10 ┃ Batch 750/3958 ┃ Loss: 2.7345
  Epoch 1/10 ┃ Batch 800/3958 ┃ Loss: 3.5209
  Epoch 1/10 ┃ Batch 850/3958 ┃ Loss: 3.3057
  Epoch 1/10 ┃ Batch 900/3958 ┃ Loss: 3.0645
  Epoch 1/10 ┃ Batch 950/3958 ┃ Loss: 3.2297
  Epoch 1/10 ┃ Batch 1000/3958 ┃ Loss: 3.5688
  Epoch 1/10 ┃ Batch 1050/3958 ┃ Loss: 3.7668
  Epoch 1/10 ┃ Batch 1100/3958 ┃ Loss: 2.8275
  Epoch 

In [7]:
# 最終測試結果
print("Final Test Evaluation")
test_acc, test_loss = evaluate(model, test_loader, nn.CrossEntropyLoss())
print(f"[Test] Accuracy: {test_acc:.2f}%, Loss: {test_loss:.4f}")

Final Test Evaluation
[Test] Accuracy: 62.89%, Loss: 1.4200


In [5]:
from tqdm import tqdm

# -----------------------------
# 模型：固定結構 SelfAttn → Conv → SelfAttn → Conv
# ablation_list 用來決定哪些層要「刪除不用」
# -----------------------------
class LightSelfAttention(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.q = nn.Conv2d(dim, dim, kernel_size=1)
        self.k = nn.Conv2d(dim, dim, kernel_size=1)
        self.v = nn.Conv2d(dim, dim, kernel_size=1)
        self.proj = nn.Conv2d(dim, dim, kernel_size=1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        B, C, H, W = x.shape
        q = self.q(x).view(B, C, -1)                      # (B, C, HW)
        k = self.k(x).view(B, C, -1)                      # (B, C, HW)
        v = self.v(x).view(B, C, -1)                      # (B, C, HW)
        attn = self.softmax(torch.bmm(q.transpose(1, 2), k) / (C ** 0.5))  # (B, HW, HW)
        out = torch.bmm(v, attn.transpose(1, 2)).view(B, C, H, W)          # (B, C, H, W)
        return self.proj(out)

class ConvLayer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1)
        self.bn = nn.BatchNorm2d(dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))

class FlexibleNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=50, dim=64, ablation_list=[1,2,3,4]):
        super().__init__()
        self.proj = nn.Conv2d(in_channels, dim, kernel_size=1)
        self.layers = nn.ModuleList([
            LightSelfAttention(dim),  # Block 1
            ConvLayer(dim),          # Block 2
            LightSelfAttention(dim),  # Block 3
            ConvLayer(dim)           # Block 4
        ])
        self.ablation_list = ablation_list
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(dim, num_classes)

    def forward(self, x):
        x = self.proj(x)
        for i, layer in enumerate(self.layers):
            if (i + 1) not in self.ablation_list:
                continue  # Skip this layer
            x = layer(x)
        x = self.pool(x).squeeze(-1).squeeze(-1)
        x = self.fc(x)
        return x  # raw logits for CrossEntropyLoss

# -----------------------------
# 訓練與評估函數（含訓練過程監控）
# -----------------------------
def train(train_loader, val_loader, epochs=10, lr=0.0000005, ablation_list=[1,2,3,4]):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = FlexibleNet(ablation_list=ablation_list).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)

    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

            # 🔄 每 10 個 batch 印一次進度（可調整）
            if (batch_idx + 1) % 50 == 0 or (batch_idx + 1) == len(train_loader):
                print(f"  Epoch {epoch+1}/{epochs} ┃ Batch {batch_idx+1}/{len(train_loader)} ┃ Loss: {loss.item():.4f}")

        train_loss = running_loss / len(train_loader.dataset)
        val_acc, val_loss = evaluate(model, val_loader, criterion, device)
        print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Acc = {val_acc:.4f}, Val Loss = {val_loss:.4f}")

    return model

def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct, total, total_loss = 0, 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total
    avg_loss = total_loss / total
    return acc, avg_loss


In [6]:
print("\nFull Model Training")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_full = train(train_loader, val_loader)
acc_full, loss_full = evaluate(model_full, test_loader, nn.CrossEntropyLoss(), device)


Full Model Training
  Epoch 1/10 ┃ Batch 50/7916 ┃ Loss: 3.8913
  Epoch 1/10 ┃ Batch 100/7916 ┃ Loss: 3.8124
  Epoch 1/10 ┃ Batch 150/7916 ┃ Loss: 3.9507
  Epoch 1/10 ┃ Batch 200/7916 ┃ Loss: 4.1358
  Epoch 1/10 ┃ Batch 250/7916 ┃ Loss: 4.0170
  Epoch 1/10 ┃ Batch 300/7916 ┃ Loss: 3.8197
  Epoch 1/10 ┃ Batch 350/7916 ┃ Loss: 4.0289
  Epoch 1/10 ┃ Batch 400/7916 ┃ Loss: 3.8789
  Epoch 1/10 ┃ Batch 450/7916 ┃ Loss: 3.8110
  Epoch 1/10 ┃ Batch 500/7916 ┃ Loss: 3.9057
  Epoch 1/10 ┃ Batch 550/7916 ┃ Loss: 3.9762
  Epoch 1/10 ┃ Batch 600/7916 ┃ Loss: 3.7851
  Epoch 1/10 ┃ Batch 650/7916 ┃ Loss: 3.8658
  Epoch 1/10 ┃ Batch 700/7916 ┃ Loss: 3.8677
  Epoch 1/10 ┃ Batch 750/7916 ┃ Loss: 3.8871
  Epoch 1/10 ┃ Batch 800/7916 ┃ Loss: 4.2060
  Epoch 1/10 ┃ Batch 850/7916 ┃ Loss: 4.0755
  Epoch 1/10 ┃ Batch 900/7916 ┃ Loss: 4.1453
  Epoch 1/10 ┃ Batch 950/7916 ┃ Loss: 4.0924
  Epoch 1/10 ┃ Batch 1000/7916 ┃ Loss: 4.1045
  Epoch 1/10 ┃ Batch 1050/7916 ┃ Loss: 3.9382
  Epoch 1/10 ┃ Batch 1100/7916 ┃ 

In [7]:
# 消融實驗: 去掉 第一層之 SelfAttention 層
model_234 = train(train_loader, val_loader, ablation_list=[2,3,4])
acc_234, loss_234 = evaluate(model_234, test_loader, nn.CrossEntropyLoss(), device)

  Epoch 1/10 ┃ Batch 50/7916 ┃ Loss: 4.0996
  Epoch 1/10 ┃ Batch 100/7916 ┃ Loss: 3.9047
  Epoch 1/10 ┃ Batch 150/7916 ┃ Loss: 3.8815
  Epoch 1/10 ┃ Batch 200/7916 ┃ Loss: 3.8683
  Epoch 1/10 ┃ Batch 250/7916 ┃ Loss: 4.0688
  Epoch 1/10 ┃ Batch 300/7916 ┃ Loss: 3.8606
  Epoch 1/10 ┃ Batch 350/7916 ┃ Loss: 4.0166
  Epoch 1/10 ┃ Batch 400/7916 ┃ Loss: 4.1707
  Epoch 1/10 ┃ Batch 450/7916 ┃ Loss: 3.9962
  Epoch 1/10 ┃ Batch 500/7916 ┃ Loss: 4.2279
  Epoch 1/10 ┃ Batch 550/7916 ┃ Loss: 3.9948
  Epoch 1/10 ┃ Batch 600/7916 ┃ Loss: 3.8160
  Epoch 1/10 ┃ Batch 650/7916 ┃ Loss: 3.9004
  Epoch 1/10 ┃ Batch 700/7916 ┃ Loss: 4.0028
  Epoch 1/10 ┃ Batch 750/7916 ┃ Loss: 4.0641
  Epoch 1/10 ┃ Batch 800/7916 ┃ Loss: 3.6453
  Epoch 1/10 ┃ Batch 850/7916 ┃ Loss: 3.9546
  Epoch 1/10 ┃ Batch 900/7916 ┃ Loss: 4.1896
  Epoch 1/10 ┃ Batch 950/7916 ┃ Loss: 4.1738
  Epoch 1/10 ┃ Batch 1000/7916 ┃ Loss: 4.1297
  Epoch 1/10 ┃ Batch 1050/7916 ┃ Loss: 4.1175
  Epoch 1/10 ┃ Batch 1100/7916 ┃ Loss: 3.9090
  Epoch 

In [8]:
# 消融實驗: 去掉 第二層之 Conv 層
model_134 = train(train_loader, val_loader, ablation_list=[1,3,4])
acc_134, loss_134 = evaluate(model_134, test_loader, nn.CrossEntropyLoss(), device)

  Epoch 1/10 ┃ Batch 50/7916 ┃ Loss: 4.0059
  Epoch 1/10 ┃ Batch 100/7916 ┃ Loss: 3.9990
  Epoch 1/10 ┃ Batch 150/7916 ┃ Loss: 3.9773
  Epoch 1/10 ┃ Batch 200/7916 ┃ Loss: 3.9433
  Epoch 1/10 ┃ Batch 250/7916 ┃ Loss: 3.8185
  Epoch 1/10 ┃ Batch 300/7916 ┃ Loss: 3.9873
  Epoch 1/10 ┃ Batch 350/7916 ┃ Loss: 3.9692
  Epoch 1/10 ┃ Batch 400/7916 ┃ Loss: 3.8612
  Epoch 1/10 ┃ Batch 450/7916 ┃ Loss: 3.9772
  Epoch 1/10 ┃ Batch 500/7916 ┃ Loss: 4.0574
  Epoch 1/10 ┃ Batch 550/7916 ┃ Loss: 3.9177
  Epoch 1/10 ┃ Batch 600/7916 ┃ Loss: 3.7347
  Epoch 1/10 ┃ Batch 650/7916 ┃ Loss: 3.8927
  Epoch 1/10 ┃ Batch 700/7916 ┃ Loss: 4.0106
  Epoch 1/10 ┃ Batch 750/7916 ┃ Loss: 3.8724
  Epoch 1/10 ┃ Batch 800/7916 ┃ Loss: 3.8599
  Epoch 1/10 ┃ Batch 850/7916 ┃ Loss: 3.9350
  Epoch 1/10 ┃ Batch 900/7916 ┃ Loss: 3.8372
  Epoch 1/10 ┃ Batch 950/7916 ┃ Loss: 3.9594
  Epoch 1/10 ┃ Batch 1000/7916 ┃ Loss: 3.8485
  Epoch 1/10 ┃ Batch 1050/7916 ┃ Loss: 4.0217
  Epoch 1/10 ┃ Batch 1100/7916 ┃ Loss: 4.0047
  Epoch 

In [9]:
# 消融實驗: 去掉 第三層之 SelfAttention 層
model_124 = train(train_loader, val_loader, ablation_list=[1,2,4])
acc_124, loss_124 = evaluate(model_124, test_loader, nn.CrossEntropyLoss(), device)

  Epoch 1/10 ┃ Batch 50/7916 ┃ Loss: 3.9477
  Epoch 1/10 ┃ Batch 100/7916 ┃ Loss: 3.7678
  Epoch 1/10 ┃ Batch 150/7916 ┃ Loss: 3.8618
  Epoch 1/10 ┃ Batch 200/7916 ┃ Loss: 3.8739
  Epoch 1/10 ┃ Batch 250/7916 ┃ Loss: 4.0080
  Epoch 1/10 ┃ Batch 300/7916 ┃ Loss: 3.9612
  Epoch 1/10 ┃ Batch 350/7916 ┃ Loss: 3.8863
  Epoch 1/10 ┃ Batch 400/7916 ┃ Loss: 4.0350
  Epoch 1/10 ┃ Batch 450/7916 ┃ Loss: 3.9998
  Epoch 1/10 ┃ Batch 500/7916 ┃ Loss: 3.8638
  Epoch 1/10 ┃ Batch 550/7916 ┃ Loss: 3.9285
  Epoch 1/10 ┃ Batch 600/7916 ┃ Loss: 3.8726
  Epoch 1/10 ┃ Batch 650/7916 ┃ Loss: 3.9555
  Epoch 1/10 ┃ Batch 700/7916 ┃ Loss: 4.0340
  Epoch 1/10 ┃ Batch 750/7916 ┃ Loss: 4.0162
  Epoch 1/10 ┃ Batch 800/7916 ┃ Loss: 3.9096
  Epoch 1/10 ┃ Batch 850/7916 ┃ Loss: 3.9749
  Epoch 1/10 ┃ Batch 900/7916 ┃ Loss: 3.9106
  Epoch 1/10 ┃ Batch 950/7916 ┃ Loss: 3.9116
  Epoch 1/10 ┃ Batch 1000/7916 ┃ Loss: 3.9052
  Epoch 1/10 ┃ Batch 1050/7916 ┃ Loss: 3.8692
  Epoch 1/10 ┃ Batch 1100/7916 ┃ Loss: 3.8734
  Epoch 

In [10]:
# 消融實驗: 去掉 第四層之 Conv 層
model_123 = train(train_loader, val_loader, ablation_list=[1,2,3])
acc_123, loss_123 = evaluate(model_123, test_loader, nn.CrossEntropyLoss(), device)

  Epoch 1/10 ┃ Batch 50/7916 ┃ Loss: 3.9199
  Epoch 1/10 ┃ Batch 100/7916 ┃ Loss: 3.8405
  Epoch 1/10 ┃ Batch 150/7916 ┃ Loss: 3.8927
  Epoch 1/10 ┃ Batch 200/7916 ┃ Loss: 3.8709
  Epoch 1/10 ┃ Batch 250/7916 ┃ Loss: 3.9442
  Epoch 1/10 ┃ Batch 300/7916 ┃ Loss: 3.9051
  Epoch 1/10 ┃ Batch 350/7916 ┃ Loss: 3.8996
  Epoch 1/10 ┃ Batch 400/7916 ┃ Loss: 3.9111
  Epoch 1/10 ┃ Batch 450/7916 ┃ Loss: 3.9324
  Epoch 1/10 ┃ Batch 500/7916 ┃ Loss: 3.8959
  Epoch 1/10 ┃ Batch 550/7916 ┃ Loss: 3.9633
  Epoch 1/10 ┃ Batch 600/7916 ┃ Loss: 3.9887
  Epoch 1/10 ┃ Batch 650/7916 ┃ Loss: 3.8772
  Epoch 1/10 ┃ Batch 700/7916 ┃ Loss: 3.9115
  Epoch 1/10 ┃ Batch 750/7916 ┃ Loss: 3.9883
  Epoch 1/10 ┃ Batch 800/7916 ┃ Loss: 3.9457
  Epoch 1/10 ┃ Batch 850/7916 ┃ Loss: 3.8589
  Epoch 1/10 ┃ Batch 900/7916 ┃ Loss: 3.9750
  Epoch 1/10 ┃ Batch 950/7916 ┃ Loss: 3.9804
  Epoch 1/10 ┃ Batch 1000/7916 ┃ Loss: 3.9720
  Epoch 1/10 ┃ Batch 1050/7916 ┃ Loss: 3.9442
  Epoch 1/10 ┃ Batch 1100/7916 ┃ Loss: 3.8610
  Epoch 

In [11]:
# print最後test的結果(All和消融實驗)
results = {
    "Full Model": (acc_full, loss_full),
    "Ablation 234": (acc_234, loss_234),
    "Ablation 134": (acc_134, loss_134),
    "Ablation 124": (acc_124, loss_124),
    "Ablation 123": (acc_123, loss_123)
}

results_df = pd.DataFrame(results, index=["Accuracy", "Loss"]).T
results_df.index.name = "Model"
results_df.reset_index(inplace=True)
print(results_df)

          Model  Accuracy      Loss
0    Full Model  0.042222  3.849201
1  Ablation 234  0.048889  3.778282
2  Ablation 134  0.046667  3.836714
3  Ablation 124  0.053333  3.825828
4  Ablation 123  0.035556  3.851097
