In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

import os
from PIL import Image

# 检查是否有可用 GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [2]:
# 数据预处理
# transforms.ToTensor() 将 [0,255] 范围的像素值归一化到 [0,1] 
# transforms.Normalize((0.5,), (0.5,)) 再进一步将其线性变换到 [-1,1]
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 下载并加载训练集
train_dataset = torchvision.datasets.MNIST(
    root='./data',       # 数据集下载或存放的路径
    train=True,
    transform=transform,
    download=True
)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,       # 一次处理的样本数
    shuffle=True         # 训练时打乱数据
)

# 下载并加载测试集
test_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=transform,
    download=True
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1000,
    shuffle=False
)

print("训练集大小:", len(train_dataset))
print("测试集大小:", len(test_dataset))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

训练集大小: 60000
测试集大小: 10000





In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        # 输入通道为1 (灰度图)，输出通道为6，卷积核 5x5
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        # 平均池化
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        # 卷积层，输入通道为6，输出通道为16，卷积核 5x5
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        # 全连接层，16*4*4 的原因： 
        # 28x28 -> (conv1后) 24x24 -> (pool后) 12x12 -> (conv2后) 8x8 -> (pool后) 4x4
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # 输出为10类

    def forward(self, x):
        # 第一次卷积 + Tanh 激活
        x = torch.tanh(self.conv1(x))
        # 第一次池化
        x = self.pool(x)
        # 第二次卷积 + Tanh 激活
        x = torch.tanh(self.conv2(x))
        # 第二次池化
        x = self.pool(x)
        # 展平
        x = x.view(x.size(0), -1)  # 等同于 reshape 为 (batch_size, 16*4*4)
        # 全连接层 + Tanh 激活
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        # 最后一层，输出没有激活函数，后面会用 CrossEntropyLoss
        x = self.fc3(x)
        return x

# 实例化模型
model = LeNet().to(device)
print(model)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
criterion = nn.CrossEntropyLoss()              # 交叉熵损失
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # 随机梯度下降

num_epochs = 5  # 训练轮数

for epoch in range(num_epochs):
    model.train()  # 训练模式（启用 dropout、batchnorm 等）
    running_loss = 0.0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # 梯度清零
        optimizer.zero_grad()
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # 反向传播
        loss.backward()
        
        # 参数更新
        optimizer.step()
        
        running_loss += loss.item()
        
        if (batch_idx + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], '
                  f'Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

print("训练完成！")

Epoch [1/5], Step [100/938], Loss: 1.8784
Epoch [1/5], Step [200/938], Loss: 0.6093
Epoch [1/5], Step [300/938], Loss: 0.3961
Epoch [1/5], Step [400/938], Loss: 0.3267
Epoch [1/5], Step [500/938], Loss: 0.2897
Epoch [1/5], Step [600/938], Loss: 0.2470
Epoch [1/5], Step [700/938], Loss: 0.2073
Epoch [1/5], Step [800/938], Loss: 0.1865
Epoch [1/5], Step [900/938], Loss: 0.1591
Epoch [2/5], Step [100/938], Loss: 0.1309
Epoch [2/5], Step [200/938], Loss: 0.1342
Epoch [2/5], Step [300/938], Loss: 0.1278
Epoch [2/5], Step [400/938], Loss: 0.1091
Epoch [2/5], Step [500/938], Loss: 0.1175
Epoch [2/5], Step [600/938], Loss: 0.1043
Epoch [2/5], Step [700/938], Loss: 0.1016
Epoch [2/5], Step [800/938], Loss: 0.0818
Epoch [2/5], Step [900/938], Loss: 0.0856
Epoch [3/5], Step [100/938], Loss: 0.0777
Epoch [3/5], Step [200/938], Loss: 0.0730
Epoch [3/5], Step [300/938], Loss: 0.0825
Epoch [3/5], Step [400/938], Loss: 0.0730
Epoch [3/5], Step [500/938], Loss: 0.0713
Epoch [3/5], Step [600/938], Loss:

In [5]:
model.eval()  # 测试模式
correct = 0
total = 0

with torch.no_grad():  # 测试阶段不需要计算梯度
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f"在测试集上的准确率: {100 * correct / total:.2f}%")

在测试集上的准确率: 98.48%


In [6]:
save_path = "lenet_mnist.pth"
torch.save(model.state_dict(), save_path)
print(f"模型已保存到 {save_path}")

模型已保存到 lenet_mnist.pth


In [7]:
# 创建一个新的模型实例
loaded_model = LeNet().to(device)
# 加载训练好的参数
loaded_model.load_state_dict(torch.load(save_path))
loaded_model.eval()

print("模型加载完成，进入测试模式")

模型加载完成，进入测试模式


  loaded_model.load_state_dict(torch.load(save_path))


In [45]:
def predict_image(image_path, model):
    # 打开图片并转换为灰度
    image = Image.open(image_path).convert('L')
    # 定义与训练时相同的变换
    transform = transforms.Compose([
        transforms.Resize((28, 28)),            # 缩放到 28x28
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))    # 与训练集同样的归一化
    ])
    # 预处理
    image = transform(image).unsqueeze(0).to(device)  # 增加一个维度 (batch_size=1)
    
    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
    return predicted.item()

# 假如你有一张本地图片 my_digit.png
test_image_path = "my_digit.png"  # 替换为实际图片路径
if os.path.exists(test_image_path):
    pred_label = predict_image(test_image_path, loaded_model)
    print(f"模型预测该图片的数字是: {pred_label}")
else:
    print(f"未找到文件 {test_image_path}，请确认路径是否正确。")


模型预测该图片的数字是: 7
