In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
import os

# 檢查GPU可用性
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'使用設備: {device}')

# 定義神經網路模型（只使用Dense、ReLU、Softmax層）
class FashionMNISTNet(nn.Module):
    def __init__(self):
        super(FashionMNISTNet, self).__init__()
        # 只使用全連接層，符合專案要求
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 256)  # 28*28 = 784
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)    # 10個類別
        
    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)  # 不在這裡套用softmax，訓練時用CrossEntropyLoss
        return x

# 資料預處理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.2860,), (0.3530,))  # Fashion-MNIST的標準化參數
])

# 載入資料集
train_dataset = datasets.FashionMNIST(
    root='./data', 
    train=True, 
    download=True, 
    transform=transform
)

test_dataset = datasets.FashionMNIST(
    root='./data', 
    train=False, 
    download=True, 
    transform=transform
)

# 建立資料載入器
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# 初始化模型
model = FashionMNISTNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 訓練函數
def train_model(model, train_loader, criterion, optimizer, epochs=15):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
            if batch_idx % 100 == 0:
                print(f'Epoch {epoch+1}/{epochs}, Batch {batch_idx}, '
                      f'Loss: {loss.item():.4f}')
        
        epoch_acc = 100 * correct / total
        print(f'Epoch {epoch+1} 完成 - 訓練準確率: {epoch_acc:.2f}%')

# 測試函數
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    print(f'測試準確率: {accuracy:.2f}%')
    return accuracy

# 執行訓練
print("開始訓練...")
train_model(model, train_loader, criterion, optimizer, epochs=15)

# 測試模型
accuracy = test_model(model, test_loader)

# 儲存模型
torch.save(model.state_dict(), 'fashion_mnist_pytorch.pth')
torch.save(model, 'fashion_mnist_complete.pth')
print(f"PyTorch模型已儲存，測試準確率: {accuracy:.2f}%")


使用設備: cuda
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:34<00:00, 772kB/s] 


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 140kB/s]


Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:05<00:00, 811kB/s] 


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<?, ?B/s]


Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

開始訓練...
Epoch 1/15, Batch 0, Loss: 2.3072
Epoch 1/15, Batch 100, Loss: 0.5921
Epoch 1/15, Batch 200, Loss: 0.4152
Epoch 1/15, Batch 300, Loss: 0.4072
Epoch 1/15, Batch 400, Loss: 0.3342
Epoch 1 完成 - 訓練準確率: 81.39%
Epoch 2/15, Batch 0, Loss: 0.3779
Epoch 2/15, Batch 100, Loss: 0.3367
Epoch 2/15, Batch 200, Loss: 0.4231
Epoch 2/15, Batch 300, Loss: 0.3607
Epoch 2/15, Batch 400, Loss: 0.2709
Epoch 2 完成 - 訓練準確率: 86.74%
Epoch 3/15, Batch 0, Loss: 0.3141
Epoch 3/15, Batch 100, Loss: 0.3698
Epoch 3/15, Batch 200, Loss: 0.3601
Epoch 3/15, Batch 300, Loss: 0.3006
Epoch 3/15, Batch 400, Loss: 0.2130
Epoch 3 完成 - 訓練準確率: 88.25%
Epoch 4/15, Batch 0, Loss: 0.3137
Epoch 4/15, Batch 100, Loss: 0.2961
Epoch 4/15, Batch 200, Loss: 0.2286
Epoch 4/15, Batch 300, Loss: 0.2736
Epoch 4/15, Batch 400, Loss: 0.2318
Epoch 4 完成 - 訓練準確率: 89.05%
Epoch 5/15, Batch 0, Loss: 0.2652
Epoch 5/15, Batch 100, Loss: 0.2713
Epoch 5/15, B

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
import numpy as np
import time

# 檢查GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'使用設備: {device}')

# 高性能模型架構
class AdvancedFashionMNISTNet(nn.Module):
    def __init__(self, dropout_rate=0.3):
        super(AdvancedFashionMNISTNet, self).__init__()
        self.flatten = nn.Flatten()
        
        # 更深更寬的網路架構
        self.fc1 = nn.Linear(784, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.dropout1 = nn.Dropout(dropout_rate * 0.5)
        
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(dropout_rate * 0.7)
        
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.dropout3 = nn.Dropout(dropout_rate)
        
        self.fc4 = nn.Linear(256, 128)
        self.bn4 = nn.BatchNorm1d(128)
        self.dropout4 = nn.Dropout(dropout_rate * 1.2)
        
        self.fc5 = nn.Linear(128, 64)
        self.dropout5 = nn.Dropout(dropout_rate * 0.8)
        
        self.fc6 = nn.Linear(64, 10)
        
        # 權重初始化
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        x = self.flatten(x)
        
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = F.relu(self.bn4(self.fc4(x)))
        x = self.dropout4(x)
        
        x = F.relu(self.fc5(x))
        x = self.dropout5(x)
        
        x = self.fc6(x)
        return x

# 進階資料增強
transform_train = transforms.Compose([
    transforms.RandomRotation(15),                    # 隨機旋轉
    transforms.RandomHorizontalFlip(0.5),            # 隨機水平翻轉
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),  # 仿射變換
    transforms.ToTensor(),
    transforms.Normalize((0.2860,), (0.3530,))       # Fashion-MNIST標準化
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.2860,), (0.3530,))
])

# 載入資料集
train_dataset = datasets.FashionMNIST('./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.FashionMNIST('./data', train=False, transform=transform_test)

# 資料載入器
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

# 早停機制
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_acc = 0
        
    def __call__(self, val_acc):
        if val_acc > self.best_acc + self.min_delta:
            self.best_acc = val_acc
            self.counter = 0
            return False
        else:
            self.counter += 1
            return self.counter >= self.patience

# 訓練函數
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        
        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
    
    return running_loss / len(train_loader), 100 * correct / total

# 測試函數
def test_model(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    return test_loss / len(test_loader), 100 * correct / total

# 主訓練流程
def train_advanced_model():
    model = AdvancedFashionMNISTNet(dropout_rate=0.3).to(device)
    
    # 損失函數和優化器
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # 標籤平滑
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    
    # 學習率調度器
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=0.01,
        epochs=60,
        steps_per_epoch=len(train_loader),
        pct_start=0.3,
        anneal_strategy='cos'
    )
    
    early_stopping = EarlyStopping(patience=15, min_delta=0.001)
    best_acc = 0
    best_model_state = None
    
    print("開始訓練進階模型...")
    print("=" * 60)
    
    for epoch in range(60):
        start_time = time.time()
        
        # 訓練
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        
        # 測試
        test_loss, test_acc = test_model(model, test_loader, criterion, device)
        
        # 更新學習率
        scheduler.step()
        
        # 記錄最佳模型
        if test_acc > best_acc:
            best_acc = test_acc
            best_model_state = model.state_dict().copy()
        
        epoch_time = time.time() - start_time
        current_lr = optimizer.param_groups[0]['lr']
        
        print(f'Epoch {epoch+1:2d}/60 | '
              f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | '
              f'Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}% | '
              f'LR: {current_lr:.6f} | Time: {epoch_time:.1f}s')
        
        # 早停檢查
        if early_stopping(test_acc):
            print(f"早停於第 {epoch+1} 輪，最佳準確率: {best_acc:.2f}%")
            break
    
    # 載入最佳模型
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    print("=" * 60)
    print(f"訓練完成！最佳測試準確率: {best_acc:.2f}%")
    
    # 儲存模型
    torch.save(model.state_dict(), 'fashion_mnist_advanced.pth')
    torch.save(model, 'fashion_mnist_complete_advanced.pth')
    
    return model, best_acc

# 執行訓練
if __name__ == "__main__":
    model, accuracy = train_advanced_model()
    print(f"模型已儲存，最終準確率: {accuracy:.2f}%")


使用設備: cuda
開始訓練進階模型...
Epoch  1/60 | Train Loss: 2.2811 | Train Acc: 44.16% | Test Loss: 1.1296 | Test Acc: 70.72% | LR: 0.000400 | Time: 30.8s
Epoch  2/60 | Train Loss: 1.3899 | Train Acc: 62.00% | Test Loss: 1.0573 | Test Acc: 74.40% | LR: 0.000400 | Time: 22.3s
Epoch  3/60 | Train Loss: 1.2774 | Train Acc: 66.87% | Test Loss: 1.0174 | Test Acc: 75.23% | LR: 0.000400 | Time: 22.9s
Epoch  4/60 | Train Loss: 1.2211 | Train Acc: 69.20% | Test Loss: 0.9869 | Test Acc: 77.14% | LR: 0.000400 | Time: 22.9s
Epoch  5/60 | Train Loss: 1.1855 | Train Acc: 70.86% | Test Loss: 0.9715 | Test Acc: 76.74% | LR: 0.000400 | Time: 22.8s
Epoch  6/60 | Train Loss: 1.1541 | Train Acc: 71.95% | Test Loss: 0.9611 | Test Acc: 77.86% | LR: 0.000400 | Time: 25.5s
Epoch  7/60 | Train Loss: 1.1315 | Train Acc: 73.08% | Test Loss: 0.9343 | Test Acc: 79.44% | LR: 0.000400 | Time: 21.8s
Epoch  8/60 | Train Loss: 1.1106 | Train Acc: 73.61% | Test Loss: 0.9316 | Test Acc: 79.58% | LR: 0.000400 | Time: 22.1s
Epoch  9/

RuntimeError: DataLoader worker (pid(s) 25652, 27272) exited unexpectedly