<a href="https://colab.research.google.com/github/Cz1544252489/DailyWork/blob/main/jupyter%20notebook/version0.4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import sys
import copy
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, Subset, random_split

def load_dataset2():
    # 数据预处理
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # 加载完整的 MNIST 训练数据集
    full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

    # 随机选择 20,000 个样本
    subset_indices = torch.randperm(len(full_dataset))[:20000]
    subset_dataset = Subset(full_dataset, subset_indices)

    # 将 20,000 个样本分为 5,000 个训练集、5,000 个验证集和 10,000 个测试集
    train_set, val_set, test_set = random_split(subset_dataset, [5000, 5000, 10000])

    # 打乱训练集中的 2,500 个样本的标签
    rand_indices = torch.randperm(len(train_set))[:2500]
    for idx in rand_indices:
        # 随机生成一个新的标签
        new_label = torch.randint(0, 10, (1,)).item()
        train_set.dataset.dataset.targets[subset_indices[train_set.indices[idx]]] = new_label

    # 创建数据加载器
    trainloader = DataLoader(train_set, batch_size=64, shuffle=True)
    valloader = DataLoader(val_set, batch_size=64, shuffle=True)
    testloader = DataLoader(test_set, batch_size=64, shuffle=True)

    return trainloader, valloader, testloader

def test(net, testloader):
    # 测试网络
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


trainloader, valloader, testloader = load_dataset2()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 146275916.67it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 41814184.96it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 35591135.00it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 21096931.08it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [3]:
# 定义神经网络
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(28*28, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc(x)
        return x

net_old = SimpleNet()

N = 5000
la = torch.rand([N,1],requires_grad=True)

In [9]:
# 使用相同的参数以比较优化的好坏
net = copy.deepcopy(net_old)

# 定义损失函数和优化器
def lower_function(output, label, la):
    crossentropy = nn.CrossEntropyLoss()
    loss = crossentropy(output, label)*la
    return loss

def upper_function(output, label):
    crossentropy = nn.CrossEntropyLoss()
    loss = crossentropy(output, label)+0.01*(torch.norm(net.fc.weight)+torch.norm(net.fc.bias))
    return loss

# SGD的效果明显要比Adam好不少
# optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.01)

# 定义内层循环
def inner_loop(trainloader, net, la):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = lower_function(outputs, labels, la[i])
        #s = torch.cat((net.fc.weight.data, net.fc.bias.data.view(-1,1)), dim=1)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    return running_loss, net

# 第一次测试网络
test(net, testloader)


T = 100
# 训练网络
for epoch in range(T):
    lower_loss, net  = inner_loop(trainloader, net, la)

    s = torch.cat((net.fc.weight.data, net.fc.bias.data.view(-1,1)), dim=1).view(-1)
    s_grad = torch.cat((net.fc.weight.grad.data, net.fc.bias.grad.data.view(-1,1)), dim=1).view(-1)

    if epoch % 10 ==9:
        print(f'[Epoch {epoch + 1}] lower_loss: {lower_loss / 200:.3f}')

    B = la.grad
    A = torch.cat((net.fc.weight.grad.data, net.fc.bias.grad.data.view(-1,1)), dim=1).view(-1)


    upper_loss = 0.0
    for i, data in enumerate(valloader, 0):
        inputs, labels = data

        outputs = net(inputs)
        loss = upper_function(outputs, labels)

        upper_loss += loss


print(f'upper_loss: {upper_loss / 200:.3f}')


test(net, testloader)

Accuracy of the network on the 10000 test images: 8.31 %
[Epoch 10] lower_loss: 0.654
[Epoch 20] lower_loss: 0.539
[Epoch 30] lower_loss: 0.504
[Epoch 40] lower_loss: 0.574
[Epoch 50] lower_loss: 0.492
[Epoch 60] lower_loss: 0.518
[Epoch 70] lower_loss: 0.505
[Epoch 80] lower_loss: 0.534
[Epoch 90] lower_loss: 0.479
[Epoch 100] lower_loss: 0.475
upper_loss: 0.795
Accuracy of the network on the 10000 test images: 44.91 %


In [10]:
test(net, testloader)

Accuracy of the network on the 10000 test images: 44.91 %


In [11]:
print(torch.norm(la.grad))

tensor(5969.7881)


In [12]:
print(net.fc.weight.grad.shape,net.fc.bias.shape)

torch.Size([10, 784]) torch.Size([10])


In [13]:
B = la.grad
A = net.fc.weight.grad
B = net.fc.bias.grad.view(-1,1)
C = torch.cat((A, B),dim=1)
s = torch.cat((net.fc.weight.data, net.fc.bias.data.view(-1,1)), dim=1).view(-1)
s_grad = torch.cat((net.fc.weight.grad.data, net.fc.bias.grad.data.view(-1,1)), dim=1).view(-1)
print(s.shape)
print(s_grad.shape)

torch.Size([7850])
torch.Size([7850])
