In [1]:
# 請從這裡下載作業用資料
import urllib.request

url = "https://cchsu.info/files/images.zip"
output_path = "images.zip"

urllib.request.urlretrieve(url, output_path)
print("下載完成！")


In [1]:
import torch
from torch import nn
from torch.nn import functional as F

import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))  # 如果有 GPU


2.6.0+cu118
11.8
True
NVIDIA GeForce RTX 4060 Laptop GPU


In [2]:
import pandas as pd
import numpy as np
import os
import warnings

warnings.filterwarnings("ignore")

In [3]:
# 圖片讀取
from torch.utils.data import Dataset
from PIL import Image
import os

class TxtImageDataset(Dataset):
    """
    從 txt 檔讀取圖片路徑與類別，格式為：
    /path/to/image1.jpg 0
    /path/to/image2.jpg 1
    """
    def __init__(self, txt_file, transform=None):
        self.samples = []
        with open(txt_file, 'r') as f:
            for line in f:
                path, label = line.strip().split()
                self.samples.append((path, int(label)))
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        img_path, label = self.samples[index]
        image = Image.open(img_path).convert('RGB')  # 強制為 RGB 格式
        if self.transform:
            image = self.transform(image)
        return image, label


In [4]:
# Dynamic Convolution

class SimpleInception(nn.Module):
    """
    簡化版的 Inception 模組，用於每個通道獨立處理。
    """
    def __init__(self, in_channels):
        super(SimpleInception, self).__init__()
        self.branch1 = nn.Conv2d(in_channels, 8, kernel_size=1)
        self.branch3 = nn.Conv2d(in_channels, 8, kernel_size=3, padding=1)
        self.branch5 = nn.Conv2d(in_channels, 8, kernel_size=5, padding=2)
        self.pool_proj = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 8, kernel_size=1)
        )

    def forward(self, x):
        b1 = self.branch1(x)
        b3 = self.branch3(x)
        b5 = self.branch5(x)
        bp = self.pool_proj(x)
        return torch.cat([b1, b3, b5, bp], dim=1)  # (B, 32, H, W)

class ChannelWiseInceptionAggregator(nn.Module):
    """
    主結構：每通道跑簡版 Inception -> concat -> Adaptive Pool -> Conv1x1
    """
    def __init__(self, output_size=(224, 224)):
        super(ChannelWiseInceptionAggregator, self).__init__()
        self.inception_modules = nn.ModuleList([SimpleInception(1) for _ in range(3)])  # 最多支援 RGB
        self.pool = nn.AdaptiveAvgPool2d(output_size)
        self.fusion = nn.Conv2d(32 * 3, 3, kernel_size=1)  # 三通道各有 32 個通道特徵

    def forward(self, x):
        # x shape: (B, C, H, W)
        B, C, H, W = x.size()
        features = []
        for c in range(C):
            xi = x[:, c:c+1, :, :]  # 取單通道
            fi = self.inception_modules[c](xi)  # (B, 32, H, W)
            features.append(fi)

        x = torch.cat(features, dim=1)  # (B, 32*C, H, W)
        x = self.pool(x)               # (B, 32*C, 224, 224)
        x = self.fusion(x)            # (B, 3, 224, 224)
        return x  # 將處理完的 feature map 回傳給外部 (送進 AlexNet 等分類器)


In [1]:
# 圖片處理
import torchvision.transforms as T
from torch.utils.data import DataLoader

# 對照組
transform_control = T.Compose([
    T.Resize((224, 224)),  # Resize 圖片到 224x224，AlexNet 的輸入大小
    T.ToTensor()
])

train_control_dataset = TxtImageDataset('train.txt', transform=transform_control)
val_control_dataset = TxtImageDataset('val.txt', transform=transform_control)
test_control_dataset = TxtImageDataset('test.txt', transform=transform_control)

# 讀取資料集
batch_size = 32
train_control_loader = DataLoader(train_control_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
val_control_loader = DataLoader(val_control_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
test_control_loader = DataLoader(test_control_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

NameError: name 'TxtImageDataset' is not defined

In [6]:
# 實驗組專用DataLoader
from torch.nn.functional import pad

class BatchListDataset(torch.utils.data.Dataset):
    def __init__(self, batches, transform=None):
        self.batches = batches
        self.transform = transform or T.ToTensor()

    def __len__(self):
        return len(self.batches)

    def __getitem__(self, idx):
        batch = self.batches[idx]
        images, labels = [], []
        max_h, max_w = 0, 0

        for path, label in batch:
            img = Image.open(path).convert('RGB')
            tensor = self.transform(img)
            images.append(tensor)
            labels.append(label)
            max_h = max(max_h, tensor.shape[1])
            max_w = max(max_w, tensor.shape[2])

        padded_imgs = [
            pad(img, (0, max_w - img.shape[2], 0, max_h - img.shape[1]), mode='constant', value=0)
            for img in images
        ]
        return torch.stack(padded_imgs), torch.tensor(labels)


In [7]:
def make_grouped_batches(txt_path, batch_size):
    with open(txt_path, 'r') as f:
        lines = [line.strip().split() for line in f if line.strip()]

    entries = []
    for path, label in lines:
        with Image.open(path) as img:
            w, h = img.size
        entries.append((path, int(label), max(w, h)))

    entries.sort(key=lambda x: x[2])
    return [ [(p, l) for p, l, _ in entries[i:i+batch_size]]
             for i in range(0, len(entries), batch_size)
             if len(entries[i:i+batch_size]) == batch_size ]


In [8]:
# 實驗組專用DataLoader
bs = 8
train_batches = make_grouped_batches("train.txt", batch_size=bs)
train_dataset = BatchListDataset(train_batches)
train_exp_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

val_batches = make_grouped_batches("val.txt", batch_size=bs)
val_dataset = BatchListDataset(val_batches)
val_exp_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

test_batches = make_grouped_batches("test.txt", batch_size=bs)
test_dataset = BatchListDataset(test_batches)
test_exp_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [10]:
# 範例模型: AlexNet
from torchvision import models

def evaluate(model, dataloader, criterion, device, feature_module=False):
    model.eval()
    correct, total, total_loss = 0, 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            if feature_module:
                images = images.squeeze(0)
                labels = labels.squeeze(0)
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total
    avg_loss = total_loss / total
    return acc, avg_loss

def train_model(train_loader, val_loader, feature_module=True, num_classes=50, epochs=10, lr=0.000005):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 使用 AlexNet 並替換第一層與最後一層
    alexnet = models.alexnet(pretrained=False)
    alexnet.features[0] = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
    alexnet.classifier[6] = nn.Linear(4096, num_classes)

    if feature_module:
        module = ChannelWiseInceptionAggregator()
        model = nn.Sequential(module, alexnet)
        print('Experiment group: Dynamic Convolution')
    else:
        model = alexnet
        print('Control group: Static Convolution')
        
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for batch_idx, (images, labels) in enumerate(train_loader):
            if feature_module:
                images = images.squeeze(0)
                labels = labels.squeeze(0)
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

            # 🔄 每 10 個 batch 印一次進度（可調整）
            if (batch_idx + 1) % 50 == 0 or (batch_idx + 1) == len(train_loader):
                print(f"  Epoch {epoch+1}/{epochs} ┃ Batch {batch_idx+1}/{len(train_loader)} ┃ Loss: {loss.item():.4f}")

        train_loss = running_loss / len(train_loader.dataset)
        val_acc, val_loss = evaluate(model, val_loader, criterion, device, feature_module)
        print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Acc = {val_acc:.4f}, Val Loss = {val_loss:.4f}")

    return model

In [14]:
# Control組訓練總時長: 0:43:15
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("\n[Control Group] Training with resized input")
model_ctrl = train_model(train_control_loader, val_control_loader, False)
acc_ctrl, loss_ctrl = evaluate(model_ctrl, test_control_loader, nn.CrossEntropyLoss(), device)


[Control Group] Training with resized input
Control group: Static Convolution
  Epoch 1/10 ┃ Batch 50/1979 ┃ Loss: 3.9125
  Epoch 1/10 ┃ Batch 100/1979 ┃ Loss: 3.9119
  Epoch 1/10 ┃ Batch 150/1979 ┃ Loss: 3.9162
  Epoch 1/10 ┃ Batch 200/1979 ┃ Loss: 3.9142
  Epoch 1/10 ┃ Batch 250/1979 ┃ Loss: 3.9144
  Epoch 1/10 ┃ Batch 300/1979 ┃ Loss: 3.9128
  Epoch 1/10 ┃ Batch 350/1979 ┃ Loss: 3.9117
  Epoch 1/10 ┃ Batch 400/1979 ┃ Loss: 3.9154
  Epoch 1/10 ┃ Batch 450/1979 ┃ Loss: 3.9131
  Epoch 1/10 ┃ Batch 500/1979 ┃ Loss: 3.9131
  Epoch 1/10 ┃ Batch 550/1979 ┃ Loss: 3.9034
  Epoch 1/10 ┃ Batch 600/1979 ┃ Loss: 3.9076
  Epoch 1/10 ┃ Batch 650/1979 ┃ Loss: 3.9005
  Epoch 1/10 ┃ Batch 700/1979 ┃ Loss: 3.9281
  Epoch 1/10 ┃ Batch 750/1979 ┃ Loss: 3.7819
  Epoch 1/10 ┃ Batch 800/1979 ┃ Loss: 3.7388
  Epoch 1/10 ┃ Batch 850/1979 ┃ Loss: 3.8383
  Epoch 1/10 ┃ Batch 900/1979 ┃ Loss: 3.7430
  Epoch 1/10 ┃ Batch 950/1979 ┃ Loss: 3.6555
  Epoch 1/10 ┃ Batch 1000/1979 ┃ Loss: 3.6819
  Epoch 1/10 ┃ Batch 

In [None]:
# 實驗組總共耗時: 6:48:14
print("\n[Experimental Group] Training with inception-based module")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_exp = train_model(train_exp_loader, val_exp_loader, True, lr=0.000001)
acc_exp, loss_exp = evaluate(model_exp, test_exp_loader, nn.CrossEntropyLoss(), device)


[Experimental Group] Training with inception-based module
Experiment group: Dynamic Convolution
  Epoch 1/10 ┃ Batch 50/7915 ┃ Loss: 3.9154
  Epoch 1/10 ┃ Batch 100/7915 ┃ Loss: 3.9096
  Epoch 1/10 ┃ Batch 150/7915 ┃ Loss: 3.8984
  Epoch 1/10 ┃ Batch 200/7915 ┃ Loss: 3.8969
  Epoch 1/10 ┃ Batch 250/7915 ┃ Loss: 3.9094
  Epoch 1/10 ┃ Batch 300/7915 ┃ Loss: 3.9175
  Epoch 1/10 ┃ Batch 350/7915 ┃ Loss: 3.9063
  Epoch 1/10 ┃ Batch 400/7915 ┃ Loss: 3.8995
  Epoch 1/10 ┃ Batch 450/7915 ┃ Loss: 3.9083
  Epoch 1/10 ┃ Batch 500/7915 ┃ Loss: 3.9043
  Epoch 1/10 ┃ Batch 550/7915 ┃ Loss: 3.9022
  Epoch 1/10 ┃ Batch 600/7915 ┃ Loss: 3.9112
  Epoch 1/10 ┃ Batch 650/7915 ┃ Loss: 3.9155
  Epoch 1/10 ┃ Batch 700/7915 ┃ Loss: 3.9030
  Epoch 1/10 ┃ Batch 750/7915 ┃ Loss: 3.9174
  Epoch 1/10 ┃ Batch 800/7915 ┃ Loss: 3.8970
  Epoch 1/10 ┃ Batch 850/7915 ┃ Loss: 3.9173
  Epoch 1/10 ┃ Batch 900/7915 ┃ Loss: 3.9207
  Epoch 1/10 ┃ Batch 950/7915 ┃ Loss: 3.9039
  Epoch 1/10 ┃ Batch 1000/7915 ┃ Loss: 3.9092
  E

NameError: name 'device' is not defined

In [None]:
# 前面忘了設置device，所以評估另外寫
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
acc_exp, loss_exp = evaluate(model_exp, test_exp_loader, nn.CrossEntropyLoss(), device, True)

In [15]:
print("\n=== Final Test Results ===")
print(f"Control Group     → Accuracy: {acc_ctrl:.4f}, Loss: {loss_ctrl:.4f}")
print(f"Experimental Group→ Accuracy: {acc_exp:.4f}, Loss: {loss_exp:.4f}")


=== Final Test Results ===
Control Group     → Accuracy: 0.2444, Loss: 2.8422
Experimental Group→ Accuracy: 0.0915, Loss: 3.5688
