使用資料增強(data augmentation)技術，觀察可否提升模型預測績效？

載入資料集、預處理

In [97]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# 原始資料（無增強）
raw_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 資料增強
# 資料增強 pipeline 修改
aug_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.ToTensor(),   
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],std =[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.15), ratio=(0.3, 3.3), value=0        # 或用 'random' 產生隨機值
    ),
])


# 載入原始與增強的資料集（訓練 + 測試）
raw_train_dataset_full = datasets.ImageFolder(root="./data/train", transform=raw_transform)
raw_test_dataset = datasets.ImageFolder(root="./data/test", transform=raw_transform)
aug_train_dataset_full = datasets.ImageFolder(root="./data/train", transform=aug_transform)
aug_test_dataset = datasets.ImageFolder(root="./data/test", transform=raw_transform)

# 顯示資料資訊
print(" 類別對應：", raw_train_dataset_full.classes)
print(" 訓練筆數：", len(raw_train_dataset_full))
print(" 測試筆數：", len(raw_test_dataset))

# 切分訓練 / 驗證集（7:3）
train_ratio = 0.7
raw_train_size = int(len(raw_train_dataset_full) * train_ratio)
raw_val_size = len(raw_train_dataset_full) - raw_train_size
raw_train_dataset, raw_val_dataset = random_split(raw_train_dataset_full, [raw_train_size, raw_val_size])

aug_train_size = int(len(aug_train_dataset_full) * train_ratio)
aug_val_size = len(aug_train_dataset_full) - aug_train_size
aug_train_dataset, aug_val_dataset = random_split(aug_train_dataset_full, [aug_train_size, aug_val_size])

# DataLoader
raw_train_loader = DataLoader(raw_train_dataset, batch_size=64, shuffle=True)
raw_val_loader = DataLoader(raw_val_dataset, batch_size=64, shuffle=False)
raw_test_loader = DataLoader(raw_test_dataset, batch_size=64, shuffle=False)

aug_train_loader = DataLoader(aug_train_dataset, batch_size=64, shuffle=True)
aug_val_loader = DataLoader(aug_val_dataset, batch_size=64, shuffle=False)
aug_test_loader = DataLoader(aug_test_dataset, batch_size=64, shuffle=False)


 類別對應： ['men', 'women']
 訓練筆數： 220
 測試筆數： 80


建立CNN model

In [98]:
import torchvision.models as models
import torch.nn as nn

class GenderCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # 使用預訓練的 EfficientNet
        self.backbone = models.efficientnet_b0(pretrained=True)
        
        
        for param in self.backbone.parameters():
            param.requires_grad = False
        for param in self.backbone.features[-5:].parameters():
            param.requires_grad = True

        # 替換分類頭
        self.backbone.classifier[1] = nn.Sequential(
            nn.Linear(1280, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            nn.ReLU(),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        return self.backbone(x)

training

In [99]:
import torch
import torch.nn as nn
import copy

def train_model(model, train_loader, val_loader, num_epochs, device, label="",):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-2)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  
    
    best_acc = 0.0
    best_model_state_dict = copy.deepcopy(model.state_dict())

    print(f" 開始訓練：{label}")
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        # 訓練階段
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step() 
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        train_acc = 100 * correct / total

        # 驗證
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        val_acc = 100 * val_correct / val_total

        print(f"[{label}] Epoch {epoch+1}/{num_epochs}, Loss: {running_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_state_dict = copy.deepcopy(model.state_dict())

    # 訓練結束後加載最佳模型
    model.load_state_dict(best_model_state_dict)
    print(f"[{label}] 最佳驗證準確率：{best_acc:.2f}%")
    return model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 訓練參數
num_epochs = 20 
model_raw = GenderCNN().to(device)
print("\n" + "="*30 + " Training Raw Model " + "="*30)
model_raw = train_model(model_raw, raw_train_loader, raw_val_loader, num_epochs, device, label="raw")

model_aug = GenderCNN().to(device)
print("\n" + "="*30 + " Training Augmented Model " + "="*30)
model_aug = train_model(model_aug, aug_train_loader, aug_val_loader, num_epochs, device, label="augmented")



 開始訓練：raw




[raw] Epoch 1/20, Loss: 2.1297, Train Acc: 50.65%, Val Acc: 68.18%
[raw] Epoch 2/20, Loss: 1.1804, Train Acc: 90.26%, Val Acc: 75.76%
[raw] Epoch 3/20, Loss: 0.9409, Train Acc: 94.81%, Val Acc: 80.30%
[raw] Epoch 4/20, Loss: 0.7177, Train Acc: 98.05%, Val Acc: 77.27%
[raw] Epoch 5/20, Loss: 0.6789, Train Acc: 97.40%, Val Acc: 78.79%
[raw] Epoch 6/20, Loss: 0.5734, Train Acc: 98.70%, Val Acc: 75.76%
[raw] Epoch 7/20, Loss: 0.6465, Train Acc: 98.70%, Val Acc: 77.27%
[raw] Epoch 8/20, Loss: 0.5395, Train Acc: 97.40%, Val Acc: 75.76%
[raw] Epoch 9/20, Loss: 0.5433, Train Acc: 99.35%, Val Acc: 77.27%
[raw] Epoch 10/20, Loss: 0.5571, Train Acc: 98.70%, Val Acc: 78.79%
[raw] Epoch 11/20, Loss: 0.4605, Train Acc: 98.70%, Val Acc: 80.30%
[raw] Epoch 12/20, Loss: 0.5110, Train Acc: 98.70%, Val Acc: 81.82%
[raw] Epoch 13/20, Loss: 0.5331, Train Acc: 98.70%, Val Acc: 80.30%
[raw] Epoch 14/20, Loss: 0.4520, Train Acc: 100.00%, Val Acc: 78.79%
[raw] Epoch 15/20, Loss: 0.4709, Train Acc: 99.35%, Val 

In [100]:
def evaluate_model(model, test_loader, device, label=""):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"[{label}]  測試集準確率：{acc:.2f}%")
    return acc

evaluate_model(model_raw, raw_test_loader, device, label="raw")
evaluate_model(model_aug, aug_test_loader, device, label="augmented")

[raw]  測試集準確率：78.75%
[augmented]  測試集準確率：82.50%


82.5