In [7]:
import sys
import idx2numpy
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, Subset, random_split

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 加载完整的 MNIST 训练数据集
full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# 随机选择 20,000 个样本
subset_indices = torch.randperm(len(full_dataset))[:20000]
subset_dataset = Subset(full_dataset, subset_indices)

# 打乱其中 2,500 个样本的标签
rand_indices = torch.randperm(20000)[:2500]
for idx in rand_indices:
    # 随机生成一个不同的标签
    original_label = subset_dataset[idx][1]
    new_label = torch.randint(0, 10, (1,)).item()
    while new_label == original_label:
        new_label = torch.randint(0, 10, (1,)).item()
    subset_dataset.dataset.targets[subset_indices[idx]] = new_label

# 将 20,000 个样本分为 10,000 个测试集和 10,000 个训练+验证集
train_val_set, test_set = random_split(subset_dataset, [10000, 10000])

# 将 10,000 个训练+验证集样本分为 5,000 个训练集和 5,000 个验证集
train_set, val_set = random_split(train_val_set, [5000, 5000])

# 创建数据加载器
trainloader = DataLoader(train_set, batch_size=64, shuffle=True)
valloader = DataLoader(val_set, batch_size=64, shuffle=True)
testloader = DataLoader(test_set, batch_size=64, shuffle=True)


In [11]:
# 定义神经网络
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

net = SimpleNet()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)


# 测试网络
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

# 训练网络
for epoch in range(5):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

print('Finished Training')

# 测试网络
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

Accuracy of the network on the 10000 test images: 9.82 %
Finished Training
Accuracy of the network on the 10000 test images: 77.11 %
