In [1]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# --- 步驟 1: 準備資料與 Transforms ---

# 為預訓練模型定義的標準化參數 (這是 ImageNet 的統計值，是固定用法)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# 為訓練集定義包含資料增強的轉換流程
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224), # 隨機裁切並縮放到 224x224
    transforms.RandomHorizontalFlip(), # 隨機水平翻轉
    transforms.ToTensor(),
    normalize,
])

# 為測試集/驗證集定義轉換流程 (通常不做資料增強)
test_transform = transforms.Compose([
    transforms.Resize(256), # 先放大到 256
    transforms.CenterCrop(224), # 再從中心裁切出 224x224
    transforms.ToTensor(),
    normalize,
])

# --- 載入資料 ---
# 假設您的資料集解壓縮後路徑如下
data_dir = './seg_train/' # 訓練集路徑
test_dir = './seg_test/'  # 測試集路徑

# 使用 ImageFolder 載入資料
train_dataset = ImageFolder(root=data_dir, transform=train_transform)
test_dataset = ImageFolder(root=test_dir, transform=test_transform)

# 建立 DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# 印出一些資訊來確認
print(f"訓練集總筆數: {len(train_dataset)}")
print(f"測試集總筆數: {len(test_dataset)}")
print(f"資料集的類別: {train_dataset.classes}")

# 取得類別與索引的對應關係
class_to_idx = train_dataset.class_to_idx
print(f"類別與索引的對應: {class_to_idx}")

訓練集總筆數: 14034
測試集總筆數: 3000
資料集的類別: ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']
類別與索引的對應: {'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4, 'street': 5}


In [2]:
import torchvision.models as models
import torch.nn as nn
# --- 步驟 2: 準備模型 (Transfer Learning) ---

# 載入預訓練的 ResNet-18
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# 1. 凍結所有預訓練層的參數
for param in model.parameters():
    param.requires_grad = False # 將梯度計算設定為 False

# 2. 替換分類頭
# 取得 fc 層的輸入特徵數量
num_ftrs = model.fc.in_features
# 取得我們的資料集類別數量
num_classes = len(train_dataset.classes)

# 將模型的 fc 層替換成一個新的線性層
# 新層的 requires_grad 預設為 True，所以只有這一層的參數會被訓練
model.fc = nn.Linear(num_ftrs, num_classes)

# 將模型移動到 GPU (如果有的話)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用的裝置: {device}")
model = model.to(device)


# 印出改造後的模型最後一部分，確認修改成功
print("--- 改造後的模型分類頭 ---")
print(model.fc)

使用的裝置: cuda:0
--- 改造後的模型分類頭 ---
Linear(in_features=512, out_features=6, bias=True)


In [3]:
# --- 步驟 3: 訓練模型 ---

# 1. 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # lr 是學習率 (learning rate)

# 2. 設定訓練參數
num_epochs = 5 # 這次我們可以多訓練幾個 epoch

# 3. 撰寫訓練迴圈
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # 將資料移動到 GPU (如果有的話)
        images = images.to(device)
        labels = labels.to(device)

        # 1. 前向傳播 (Forward pass)
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # 2. 反向傳播與優化 (Backward and optimize)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

print('訓練完成!')

Epoch [1/5], Step [100/439], Loss: 0.7642
Epoch [1/5], Step [200/439], Loss: 0.5869
Epoch [1/5], Step [300/439], Loss: 0.2333
Epoch [1/5], Step [400/439], Loss: 0.3776
Epoch [2/5], Step [100/439], Loss: 0.3097
Epoch [2/5], Step [200/439], Loss: 0.3009
Epoch [2/5], Step [300/439], Loss: 0.5230
Epoch [2/5], Step [400/439], Loss: 0.2727
Epoch [3/5], Step [100/439], Loss: 0.4382
Epoch [3/5], Step [200/439], Loss: 0.3822
Epoch [3/5], Step [300/439], Loss: 0.3995
Epoch [3/5], Step [400/439], Loss: 0.6688
Epoch [4/5], Step [100/439], Loss: 0.4375
Epoch [4/5], Step [200/439], Loss: 0.3661
Epoch [4/5], Step [300/439], Loss: 0.4486
Epoch [4/5], Step [400/439], Loss: 0.6414
Epoch [5/5], Step [100/439], Loss: 0.3720
Epoch [5/5], Step [200/439], Loss: 0.2042
Epoch [5/5], Step [300/439], Loss: 0.3921
Epoch [5/5], Step [400/439], Loss: 0.3625
訓練完成!


In [6]:
# --- 步驟 4: 評估模型 ---

model.eval() # 將模型設定為評估模式

with torch.no_grad(): # 在此區塊中，所有計算都不會追蹤梯度
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader: # 使用 test_loader
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    # 【優化】使用 len(test_dataset) 讓文字更精確
    print(f'模型在 {len(test_dataset)} 張測試圖片上的準確率為: {acc:.2f} %')

模型在 3000 張測試圖片上的準確率為: 90.60 %
