In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# 確保可以使用 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加載 MNIST 數據集，並將每個影像視為序列資料
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

In [2]:
# 定義 LSTM RNN 模型
class LSTM_RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM_RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # 定義 LSTM 層
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

        # 全連接層（將 LSTM 的輸出映射到類別）
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # 初始化隱藏狀態和記憶狀態
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)

        # 通過 LSTM 層
        out, _ = self.lstm(x, (h0, c0))

        # 使用最後時間步的輸出進行分類
        out = self.fc(out[:, -1, :])
        return out

In [3]:
# 超參數設置
input_dim = 28         # 每行影像的特徵數（28個像素）
hidden_dim = 128       # LSTM 隱藏層大小
num_layers = 2         # LSTM 層數
output_dim = 10        # 類別數（0-9）
learning_rate = 0.001
num_epochs = 10

# 初始化模型
model = LSTM_RNN(input_dim, hidden_dim, num_layers, output_dim).to(device)

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
# 訓練模型
print("Training...")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        # 將影像展平為 (batch_size, 28, 28)
        images = images.squeeze(1).to(device)
        labels = labels.to(device)

        # 前向傳播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向傳播與優化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 統計Loss
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct / total * 100
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")


Training...
Epoch 1/10, Loss: 0.0500, Accuracy: 98.50%
Epoch 2/10, Loss: 0.0417, Accuracy: 98.70%
Epoch 3/10, Loss: 0.0344, Accuracy: 98.95%
Epoch 4/10, Loss: 0.0328, Accuracy: 98.99%
Epoch 5/10, Loss: 0.0274, Accuracy: 99.11%
Epoch 6/10, Loss: 0.0255, Accuracy: 99.17%
Epoch 7/10, Loss: 0.0227, Accuracy: 99.26%
Epoch 8/10, Loss: 0.0192, Accuracy: 99.39%
Epoch 9/10, Loss: 0.0188, Accuracy: 99.39%
Epoch 10/10, Loss: 0.0165, Accuracy: 99.49%


In [6]:
# 評估模型
print("Evaluating...")
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.squeeze(1).to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")

Evaluating...
Test Accuracy: 98.73%
