In [None]:
from torchvision import datasets, transforms
from torch.utils.data import Subset

learn_rate = 0.005  # 学习率
# 数据预处理：将图片转为Tensor且归一化到[0,1]
transform = transforms.ToTensor()
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图片转换为Tensor    0~255
    transforms.Lambda(lambda x:x - 0.5)  # 将像素值从[0, 1]转换到[-0.5, 0.5]
])
# 下载训练集，若已下载则直接读取
train_dataset = datasets.MNIST(root='./data',  # 存放数据的目录
                               train=True,     # 加载训练集(True) or 测试集(False)
                               transform=transform,  # 上面定义的预处理
                               download=True)  # 没数据时自动下载

# 同理，加载测试集
test_dataset = datasets.MNIST(root='./data',
                              train=False,
                              transform=transform,
                              download=True)

print(len(train_dataset))  # 60000
print(len(test_dataset))   # 10000

train_dataset = Subset(train_dataset, range(1000))  # pick 前1000个样本
# datas = [subset[i][0].numpy() for i in range(len(subset))]  # datas是list，存所有图片
# labels = [subset[i][1] for i in range(len(subset))]         # labels是list

60000
10000


In [None]:
import torch 
#28x28x1→26x26x8

class Conv3x3:
    def __init__(self, num_filters):
        self.num_filters = num_filters#记录一下卷积核层数
        self.filters = torch.randn(num_filters, 3, 3) / 9#初始化卷积核
    def iterate_patchs(self, input):
        h, w = input.shape#获取图像的高和宽
        for i in range(h - 2):
            for j in range(w - 2):
                patch = input[i:i + 3, j:j + 3]#每次取一个3x3的patch
                # print(patch.shape)
                yield patch, i, j
    def forward(self, input):
        self.last_input = input
        h, w = input.shape
        output = torch.zeros((h - 2, w - 2, self.num_filters))#output是少了（3-1）的八层
        for patch, i, j in self.iterate_patchs(input):#遍历每个patch
            output[i, j] = torch.sum(patch * self.filters, dim = (1, 2))#对每个patch和卷积核进行卷积操作
        return output
    def backprop(self, d_L_d_out):
        d_L_d_filters = torch.zeros(self.filters.shape)#初始化梯度
        # print(d_L_d_filters.shape, d_L_d_out.shape, self.last_input.shape)
        for patch, i, j in self.iterate_patchs(self.last_input):
            for f in range(self.num_filters):
                d_L_d_filters[f] += patch * d_L_d_out[i, j, f]

        self.filters -= learn_rate * d_L_d_filters#更新卷积核


class MaxPool2:
    def iterate_patchs(self, input):
        h, w, _ = input.shape
        new_h = h // 2
        new_w = w // 2
        for i in range(new_h):
            for j in range(new_w):
                patch = input[i * 2:i * 2 + 2, j * 2:j * 2 + 2]
                yield patch, i, j
    def forward(self, input):
        self.last_input = input#记录输入
        h, w, d = input.shape
        output = torch.zeros((h // 2, w // 2, d))#output是大小折半的八层
        for patch, i, j in self.iterate_patchs(input):
            output[i, j] = torch.amax(patch, dim = (0, 1))
        return output#13x13x8的输出
    def backprop(self, d_L_d_out):
        d_L_d_input = torch.zeros(self.last_input.shape)#初始化梯度
        # print(d_L_d_out.shape, self.last_input.shape)
        for patch, i, j in self.iterate_patchs(self.last_input):
            # 找到最大值的索引
            h, w, d = patch.shape
            amax = torch.amax(patch, dim=(0, 1))
            # 将梯度传递到最大值的位置
            for i2 in range(h):
                for j2 in range(w):
                    for d2 in range(d):
                        if patch[i2, j2, d2] == amax[d2]:#如果patch中的值等于最大值
                            d_L_d_input[i * 2 + i2, j * 2 + j2, d2] += d_L_d_out[i, j, d2]
        return d_L_d_input

class Softmax:
    def __init__(self, input_size, nodes):#输入有多大来创建W和b
        self.weights = torch.randn(input_size, nodes) / input_size#初始化W
        self.bias = torch.zeros(nodes)
    def forward(self, input):
        self.last_input_shape = input.shape#记录输入的形状
        input = input.flatten()#将输入展平,这里的输入是13x13x8的
        self.last_input = input#记录输入
        # print(input.shape, self.weights.shape, self.bias.shape)
        totals = torch.matmul(input, self.weights) + self.bias#计算总和
        self.last_totals = totals#记录总和
        exp = torch.exp(totals)#计算指数,10维向量
        return exp / torch.sum(exp)
    def backprop(self, d_L_d_out):
        for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue
            # 计算梯度
            t_exp = torch.exp(self.last_totals)#计算指数
            S = torch.sum(t_exp)#计算总和
            # print('t_exp:', t_exp)
            # print('t_exp.shape:', t_exp.shape)
            # print('i:', i)
            d_out_d_t = -t_exp[i] * t_exp / (S ** 2)#梯度
            d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)

            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights

            d_L_d_t = gradient * d_out_d_t#损失函数对总和的梯度
            # print(type(d_t_d_w))
            d_t_d_w = d_t_d_w.unsqueeze(1)  # 确保d_t_d_w是二维的
            d_L_d_t = d_L_d_t.unsqueeze(0)  # 确保d_L_d_t是二维的
            # print(type(d_t_d_w), type(d_L_d_t))
            # print(d_t_d_w.shape, d_L_d_t.shape)
            # print(d_t_d_w.numel(), d_L_d_t.numel())
            d_L_d_w = torch.matmul(d_t_d_w, d_L_d_t)
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = torch.matmul(d_t_d_inputs, d_L_d_t.T)
            # print(d_L_d_w, d_L_d_b, d_L_d_inputs.shape)
            self.weights -= learn_rate * d_L_d_w
            self.bias -= learn_rate * d_L_d_b.squeeze()
            # print(self.weights, self.bias)
            # print(self.last_input_shape)#1352
            return d_L_d_inputs.reshape(13, 13, 8)



#训练部分，包括loss损失函数
conv = Conv3x3(8)
pool = MaxPool2()
softmax = Softmax(13 * 13 * 8, 10)

def forward(input, label):#input是一个28x28的图像，label是对应的标签
    out = conv.forward(input)
    out = pool.forward(out)
    out = softmax.forward(out)
    loss = -torch.log(out[label])
    acc = 1 if torch.argmax(out) == label else 0
    return out, loss, acc


def train(im, label):
    out, loss, acc = forward(im, label)
    gradient = torch.zeros(10)
    gradient[label] = -1 / out[label]

    gradient = softmax.backprop(gradient)
    gradient = pool.backprop(gradient)
    gradient = conv.backprop(gradient)

    return loss, acc

for epoch in range(3):  # 训练3个epoch
    print(f"Epoch {epoch + 1}")
    # permutation = torch.randperm(len(train_dataset))  # 打乱数据集
    # train_dataset = train_dataset[permutation]  # 打乱后的训练集
    permutation = torch.randperm(len(train_dataset))
    train_dataset = Subset(train_dataset, permutation.tolist())  # 变成新的Dataset
    loss = 0
    num_correct = 0
    # 遍历训练集
    for i, (image, label) in enumerate(train_dataset):
        image = image.squeeze(0)  # 添加batch维度
        l, acc = train(image, label)
        loss += l.item()  # 累加损失
        num_correct += acc  # 累加正确预测的数量
        if i % 100 == 99:  # 每1000个样本输出一次
            # print(f"Step {i+1}, Loss: {loss / (i + 1)}, Accuracy: {num_correct / (i + 1)}")
            print("[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%" % (i + 1, loss/100, num_correct))
            loss = 0  # 重置损失
            num_correct = 0  # 重置正确预测计数
print("Training complete.")
loss = 0
num_correct = 0
for (image, label) in test_dataset:
    _, l, acc = forward(image.squeeze(0), label)
    loss += l.item()  # 累加损失
    num_correct += acc  # 累加正确预测的数量

num_tests = len(test_dataset)
print("Test Loss: %.3f" % (loss / num_tests))
print("Test Accuracy: %.2f%%" % (num_correct / num_tests * 100))

Epoch 1
[Step 100] Past 100 steps: Average Loss 2.269 | Accuracy: 21%
[Step 200] Past 100 steps: Average Loss 2.080 | Accuracy: 36%
[Step 300] Past 100 steps: Average Loss 1.577 | Accuracy: 59%
[Step 400] Past 100 steps: Average Loss 1.056 | Accuracy: 65%
[Step 500] Past 100 steps: Average Loss 0.972 | Accuracy: 68%
[Step 600] Past 100 steps: Average Loss 0.880 | Accuracy: 71%
[Step 700] Past 100 steps: Average Loss 0.592 | Accuracy: 83%
[Step 800] Past 100 steps: Average Loss 0.692 | Accuracy: 83%
[Step 900] Past 100 steps: Average Loss 0.726 | Accuracy: 76%
[Step 1000] Past 100 steps: Average Loss 0.761 | Accuracy: 75%
Epoch 2
[Step 100] Past 100 steps: Average Loss 0.529 | Accuracy: 86%
[Step 200] Past 100 steps: Average Loss 0.474 | Accuracy: 88%
[Step 300] Past 100 steps: Average Loss 0.619 | Accuracy: 76%
[Step 400] Past 100 steps: Average Loss 0.549 | Accuracy: 84%
[Step 500] Past 100 steps: Average Loss 0.522 | Accuracy: 85%
[Step 600] Past 100 steps: Average Loss 0.590 | Accur

In [34]:
a = torch.tensor([1, 2, 3])
print(a.shape)
a = a.unsqueeze(0)  # 移除维度为1的维度
print(a.shape)

torch.Size([3])
torch.Size([1, 3])


Test

In [None]:
print(train_dataset[0][0].shape)  # 输出第一个样本的形状

torch.Size([1, 28, 28])


In [5]:
def AAA(a):
    for i in range(a):
        for j in range(a):
            yield i, j

for i, j in AAA(4):
    print(i," ",j)

0   0
0   1
0   2
0   3
1   0
1   1
1   2
1   3
2   0
2   1
2   2
2   3
3   0
3   1
3   2
3   3
