In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [None]:
from tqdm.auto import tqdm

In [None]:
!pip install transforms

Collecting transforms
  Downloading transforms-0.2.1-py3-none-any.whl.metadata (1.8 kB)
Downloading transforms-0.2.1-py3-none-any.whl (18 kB)
Installing collected packages: transforms
Successfully installed transforms-0.2.1


In [None]:
# ----------------- 设置设备和初始化模型 -----------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [None]:
# ----------------- 超参数 -----------------
batch_size = 16
num_epochs = 25
learning_rate = 0.001

In [None]:
# ----------------- 数据加载 -----------------
# 定义数据预处理：转换为 Tensor 并标准化
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [None]:
# 下载并加载 MNIST 训练和测试数据集
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 16.4MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 471kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.44MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 8.61MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [None]:
# ----------------- 定义简单的 ANN 模型 -----------------
class SimpleANN(nn.Module):
    def __init__(self, input_size=28*28, hidden_size=256, num_classes=10):
        super(SimpleANN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # 将输入展平 (B, 1, 28, 28) -> (B, 28*28)
        x = x.view(x.size(0), -1)
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [None]:
# ----------------- 定义简单的 CNN 模型 -----------------
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        # 第一个卷积层：输入 1 通道，输出 32 通道，卷积核 3x3，padding 保持尺寸
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        # 第二个卷积层：输入 32 通道，输出 64 通道
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(2, 2)  # 下采样因子为2

        # 全连接层：假设输入图片为28x28，经过两次池化后尺寸变为7x7
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        # 卷积层1 -> BatchNorm -> ReLU -> 池化
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.pool(x)
        # 卷积层2 -> BatchNorm -> ReLU -> 池化
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.pool(x)
        # 展平
        x = x.view(x.size(0), -1)
        # 全连接层
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [None]:
# ----------------- 定义简单的 RNN 模型 -----------------
class SimpleRNN(nn.Module):
    def __init__(self, input_size=28, hidden_size=256, num_layers=2, num_classes=10):
        super(SimpleRNN, self).__init__()
        # 使用 nn.RNN，采用 tanh 激活函数，设置 batch_first=True
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, nonlinearity='tanh')
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x 的形状: (batch, 1, 28, 28)
        # 将图像转换为序列：去掉 channel 维度，得到 (batch, 28, 28)
        x = x.squeeze(1)
        # 传入 RNN：输出 out 的形状为 (batch, 28, hidden_size)
        out, _ = self.rnn(x)
        # 取最后一个时间步的输出作为整个序列的表示 (batch, hidden_size)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [None]:
#model = SimpleANN().to(device)
model = SimpleCNN(num_classes=10).to(device)
#model = SimpleRNN(num_classes=10).to(device)

In [None]:
# ----------------- 定义损失函数和优化器 -----------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# ----------------- 训练循环 -----------------
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    # 使用 tqdm 包装 train_loader，显示进度条
    progress_bar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]", dynamic_ncols=True)

    for batch_idx, (images, labels) in enumerate(progress_bar):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # 更新进度条后缀信息：显示当前 batch 的 loss
        progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}")

Epoch [1/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [1/25], Average Loss: 0.6908


Epoch [2/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [2/25], Average Loss: 0.4109


Epoch [3/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [3/25], Average Loss: 0.3610


Epoch [4/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [4/25], Average Loss: 0.3260


Epoch [5/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [5/25], Average Loss: 0.3416


Epoch [6/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [6/25], Average Loss: 0.3100


Epoch [7/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [7/25], Average Loss: 0.3340


Epoch [8/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [8/25], Average Loss: 0.3355


Epoch [9/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [9/25], Average Loss: 0.3657


Epoch [10/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [10/25], Average Loss: 0.3390


Epoch [11/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [11/25], Average Loss: 0.3764


Epoch [12/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [12/25], Average Loss: 0.3951


Epoch [13/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [13/25], Average Loss: 0.4379


Epoch [14/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [14/25], Average Loss: 0.4265


Epoch [15/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [15/25], Average Loss: 0.4912


Epoch [16/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [16/25], Average Loss: 0.4545


Epoch [17/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [17/25], Average Loss: 0.4764


Epoch [18/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [18/25], Average Loss: 0.4808


Epoch [19/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [19/25], Average Loss: 0.4731


Epoch [20/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [20/25], Average Loss: 0.5021


Epoch [21/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [21/25], Average Loss: 0.5703


Epoch [22/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [22/25], Average Loss: 0.9428


Epoch [23/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [23/25], Average Loss: 0.7470


Epoch [24/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [24/25], Average Loss: 0.6242


Epoch [25/25]:   0%|          | 0/3750 [00:00<?, ?it/s]

Epoch [25/25], Average Loss: 0.6969


In [None]:
# ----------------- 模型评估 -----------------
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 75.10%
