In [1]:
"""
MNIST手写数字识别实验
使用多层感知机(MLP)进行训练和测试
"""

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import time
import json
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import os

# 创建输出目录
os.makedirs('./results', exist_ok=True)

print("="*60)
print("MNIST手写数字识别实验")
print("="*60)

# 设置随机种子以保证结果可复现
torch.manual_seed(42)
np.random.seed(42)

# 检查是否有可用的GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\n[1] 设备信息")
print(f"    使用设备: {device}")
if torch.cuda.is_available():
    print(f"    GPU型号: {torch.cuda.get_device_name(0)}")

# ============================================================================
# 数据准备
# ============================================================================
print(f"\n[2] 数据加载")
print("    正在下载/加载MNIST数据集...")

# 数据预处理：转换为张量并标准化
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST数据集的均值和标准差
])

# 加载训练集和测试集
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# 创建数据加载器
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"    训练集大小: {len(train_dataset)} 张图片")
print(f"    测试集大小: {len(test_dataset)} 张图片")
print(f"    批次大小: {batch_size}")
print(f"    训练批次数: {len(train_loader)}")
print(f"    测试批次数: {len(test_loader)}")

# ============================================================================
# 模型定义
# ============================================================================
print(f"\n[3] 模型构建")

class MLP(nn.Module):
    """多层感知机模型"""
    def __init__(self, input_size=784, hidden_sizes=[512, 256, 128], num_classes=10, dropout=0.2):
        super(MLP, self).__init__()
        
        # 构建网络层
        layers = []
        prev_size = input_size
        
        # 添加隐藏层
        for i, hidden_size in enumerate(hidden_sizes):
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev_size = hidden_size
        
        # 添加输出层
        layers.append(nn.Linear(prev_size, num_classes))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        # 将28x28的图像展平为784维向量
        x = x.view(x.size(0), -1)
        return self.network(x)

# 创建模型
model = MLP(input_size=784, hidden_sizes=[512, 256, 128], num_classes=10, dropout=0.2).to(device)

# 打印模型结构
print("    模型结构:")
print(model)

# 统计参数量
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n    总参数量: {total_params:,}")
print(f"    可训练参数: {trainable_params:,}")

# ============================================================================
# 训练设置
# ============================================================================
print(f"\n[4] 训练配置")

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"    损失函数: CrossEntropyLoss")
print(f"    优化器: Adam")
print(f"    学习率: 0.001")
print(f"    训练轮数: 20 epochs")

# ============================================================================
# 训练和测试函数
# ============================================================================

def train_epoch(model, train_loader, criterion, optimizer, device):
    """训练一个epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # 统计
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def test_epoch(model, test_loader, criterion, device):
    """在测试集上评估"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(test_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

# ============================================================================
# 开始训练
# ============================================================================
print(f"\n[5] 开始训练")
print("-" * 80)

num_epochs = 20
train_losses = []
train_accs = []
test_losses = []
test_accs = []

start_time = time.time()

for epoch in range(num_epochs):
    # 训练
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    # 测试
    test_loss, test_acc = test_epoch(model, test_loader, criterion, device)
    
    # 记录结果
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    # 打印进度
    print(f'Epoch [{epoch+1:2d}/{num_epochs}] '
          f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | '
          f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

training_time = time.time() - start_time
print("-" * 80)
print(f"训练完成! 总用时: {training_time:.2f}秒 ({training_time/60:.2f}分钟)")
print(f"最终测试准确率: {test_accs[-1]:.2f}%")

# ============================================================================
# 可视化训练过程
# ============================================================================
print(f"\n[6] 生成训练曲线")

plt.figure(figsize=(14, 5))

# 损失曲线
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs+1), train_losses, label='Train Loss', marker='o', markersize=4, linewidth=2)
plt.plot(range(1, num_epochs+1), test_losses, label='Test Loss', marker='s', markersize=4, linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('训练与测试损失曲线', fontsize=13, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

# 准确率曲线
plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs+1), train_accs, label='Train Accuracy', marker='o', markersize=4, linewidth=2)
plt.plot(range(1, num_epochs+1), test_accs, label='Test Accuracy', marker='s', markersize=4, linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy (%)', fontsize=12)
plt.title('训练与测试准确率曲线', fontsize=13, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
print("    训练曲线已保存至: ./results/training_curves.png")
plt.close()

# ============================================================================
# 预测样本可视化
# ============================================================================
print(f"\n[7] 生成预测样本可视化")

model.eval()
with torch.no_grad():
    # 获取一批测试数据
    images, labels = next(iter(test_loader))
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

# 可视化前16个样本
num_samples = 16
fig, axes = plt.subplots(4, 4, figsize=(12, 12))

for i in range(num_samples):
    ax = axes[i//4, i%4]
    img = images[i].cpu().numpy().squeeze()
    ax.imshow(img, cmap='gray')
    
    true_label = labels[i].item()
    pred_label = predicted[i].item()
    color = 'green' if true_label == pred_label else 'red'
    
    ax.set_title(f'真实: {true_label}, 预测: {pred_label}', 
                 color=color, fontsize=11, fontweight='bold')
    ax.axis('off')

plt.suptitle('预测样本展示 (绿色=正确, 红色=错误)', 
             fontsize=14, fontweight='bold', y=0.995)
plt.tight_layout()
plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
print("    预测样本已保存至: ./results/prediction_samples.png")
plt.close()

# ============================================================================
# 生成混淆矩阵和详细分析
# ============================================================================
print(f"\n[8] 生成混淆矩阵和分类报告")

# 收集所有预测结果
all_preds = []
all_labels = []

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 计算混淆矩阵
cm = confusion_matrix(all_labels, all_preds)

# 绘制混淆矩阵
plt.figure(figsize=(11, 9))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=range(10), yticklabels=range(10),
            cbar_kws={'label': '样本数量'})
plt.xlabel('预测标签', fontsize=13, fontweight='bold')
plt.ylabel('真实标签', fontsize=13, fontweight='bold')
plt.title('混淆矩阵', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
print("    混淆矩阵已保存至: ./results/confusion_matrix.png")
plt.close()

# 打印分类报告
print("\n" + "="*60)
print("详细分类报告")
print("="*60)
report = classification_report(all_labels, all_preds, 
                               target_names=[f'数字{i}' for i in range(10)],
                               digits=4)
print(report)

# 计算每个类别的准确率
class_correct = [0] * 10
class_total = [0] * 10

for i in range(len(all_labels)):
    label = all_labels[i]
    class_total[label] += 1
    if all_preds[i] == label:
        class_correct[label] += 1

print("\n各类别识别准确率:")
print("-" * 50)
for i in range(10):
    accuracy = 100 * class_correct[i] / class_total[i]
    print(f'数字 {i}: {accuracy:.2f}% ({class_correct[i]}/{class_total[i]})')

# ============================================================================
# 保存实验结果
# ============================================================================
print(f"\n[9] 保存实验数据")

results = {
    'model_architecture': {
        'input_size': 784,
        'hidden_layers': [512, 256, 128],
        'output_size': 10,
        'dropout': 0.2,
        'total_params': total_params,
        'trainable_params': trainable_params
    },
    'training_config': {
        'batch_size': batch_size,
        'learning_rate': 0.001,
        'optimizer': 'Adam',
        'loss_function': 'CrossEntropyLoss',
        'num_epochs': num_epochs,
        'device': str(device)
    },
    'training_history': {
        'train_losses': [float(x) for x in train_losses],
        'train_accs': [float(x) for x in train_accs],
        'test_losses': [float(x) for x in test_losses],
        'test_accs': [float(x) for x in test_accs]
    },
    'final_results': {
        'final_train_acc': float(train_accs[-1]),
        'final_test_acc': float(test_accs[-1]),
        'training_time_seconds': float(training_time),
        'training_time_minutes': float(training_time/60)
    },
    'class_performance': {
        'class_accuracies': [float(100 * class_correct[i] / class_total[i]) for i in range(10)],
        'class_correct': [int(x) for x in class_correct],
        'class_total': [int(x) for x in class_total]
    },
    'confusion_matrix': cm.tolist()
}

# 保存为JSON文件
with open('./results/experiment_results.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4, ensure_ascii=False)

print("    实验数据已保存至: ./results/experiment_results.json")

# 保存模型
torch.save(model.state_dict(), './results/mnist_mlp_model.pth')
print("    模型权重已保存至: ./results/mnist_mlp_model.pth")

# 保存完整模型（包括结构）
torch.save(model, './results/mnist_mlp_model_complete.pth')
print("    完整模型已保存至: ./results/mnist_mlp_model_complete.pth")

# ============================================================================
# 实验总结
# ============================================================================
print("\n" + "="*60)
print("实验完成总结")
print("="*60)
print(f"模型参数量: {total_params:,}")
print(f"训练时间: {training_time:.2f}秒 ({training_time/60:.2f}分钟)")
print(f"训练集准确率: {train_accs[-1]:.2f}%")
print(f"测试集准确率: {test_accs[-1]:.2f}%")
print(f"准确率差异: {abs(train_accs[-1] - test_accs[-1]):.2f}%")
print(f"\n所有结果已保存至 ./results/ 目录")
print("="*60)

print("\n实验结束！")

MNIST手写数字识别实验

[1] 设备信息
    使用设备: cuda
    GPU型号: NVIDIA GeForce RTX 4090

[2] 数据加载
    正在下载/加载MNIST数据集...


100.0%
100.0%
100.0%
100.0%


    训练集大小: 60000 张图片
    测试集大小: 10000 张图片
    批次大小: 128
    训练批次数: 469
    测试批次数: 79

[3] 模型构建
    模型结构:
MLP(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=256, out_features=128, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=128, out_features=10, bias=True)
  )
)

    总参数量: 567,434
    可训练参数: 567,434

[4] 训练配置
    损失函数: CrossEntropyLoss
    优化器: Adam
    学习率: 0.001
    训练轮数: 20 epochs

[5] 开始训练
--------------------------------------------------------------------------------
Epoch [ 1/20] Train Loss: 0.3028, Train Acc: 90.77% | Test Loss: 0.1173, Test Acc: 96.24%
Epoch [ 2/20] Train Loss: 0.1267, Train Acc: 96.25% | Test Loss: 0.1062, Test Acc: 96.66%
Epoch [ 3/20] Train Loss: 0.0931, Train Acc: 97.20% 

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/training_curves.png', dpi=300,

    训练曲线已保存至: ./results/training_curves.png

[7] 生成预测样本可视化


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/prediction_samples.p

    预测样本已保存至: ./results/prediction_samples.png

[8] 生成混淆矩阵和分类报告
    混淆矩阵已保存至: ./results/confusion_matrix.png

详细分类报告
              precision    recall  f1-score   support

         数字0     0.9898    0.9929    0.9913       980
         数字1     0.9938    0.9938    0.9938      1135
         数字2     0.9912    0.9826    0.9869      1032
         数字3     0.9690    0.9911    0.9799      1010
         数字4     0.9816    0.9796    0.9806       982
         数字5     0.9820    0.9765    0.9792       892
         数字6     0.9874    0.9833    0.9854       958
         数字7     0.9807    0.9874    0.9840      1028
         数字8     0.9836    0.9825    0.9831       974
         数字9     0.9840    0.9722    0.9781      1009

    accuracy                         0.9844     10000
   macro avg     0.9843    0.9842    0.9842     10000
weighted avg     0.9844    0.9844    0.9844     10000


各类别识别准确率:
--------------------------------------------------
数字 0: 99.29% (973/980)
数字 1: 99.38% (1128/1135)
数字 2: 98.26% (

  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.savefig('./results/confusion_matrix.png', dpi=300, bbox_inches='tight')
  plt.sa