In [None]:

import numpy as np

class Relu:
    def forward(self, x):
        self.x = x
        return np.maximum(self.x, 0)

    def backward(self, eta):
        eta[self.x <= 0] = 0
        return eta
    


In [None]:
class parameter:
    def __init__(self, w):
        self.data = w     # 权重
        self.grad = None  # 传到下一层的梯度


In [None]:
class BatchNormalization:
    def __init__(self, num_channels, moving_decay=0.9, is_training=True):
        """
        初始化BatchNormalization层。

        :param num_channels: 输入需要归一化的通道数。
        :param moving_decay: 移动平均的衰减率。
        :param is_training: 当前是否为训练状态。
        """
        self.gamma = parameter(np.ones((num_channels, 1, 1)))
        self.beta = parameter(np.zeros((num_channels, 1, 1)))
        self.is_training = is_training
        self.epsilon = 1e-5     # 数据归一化时防止下溢

        self.moving_mean = np.zeros((num_channels, 1, 1))
        self.moving_var = np.zeros((num_channels, 1, 1))
        self.moving_decay = moving_decay

    def forward(self, x, is_training=True):
        """
        BatchNormalization层的前向传播。

        :param x: 输入的 feature map，形状为[N, C, H, W]。
        :return: Batch Normalization 的结果，形状为[N, C, H, W]。
        """
        self.is_training = is_training
        N, C, H, W = x.shape
        self.x = x

        if self.is_training:
            # 计算均值和方差
            mean = np.mean(x, axis=(0, 2, 3))[:, np.newaxis, np.newaxis]
            var = np.var(x, axis=(0, 2, 3))[:, np.newaxis, np.newaxis]

            # 计算滑动平均
            if np.sum(self.moving_mean) == 0 and np.sum(self.moving_var) == 0:
                self.moving_mean = mean
                self.moving_var = var
            else:
                self.moving_mean = self.moving_mean * self.moving_decay + (1 - self.moving_decay) * mean
                self.moving_var = self.moving_var * self.moving_decay + (1 - self.moving_decay) * var

            # 归一化
            self.y = (x - mean) / np.sqrt(var + self.epsilon)
            return self.gamma.data * self.y + self.beta.data
        else:
            # 测试阶段使用移动平均的均值和方差进行标准化
            self.y = (x - self.moving_mean) / np.sqrt(self.moving_var + self.epsilon)
            return self.gamma.data * self.y + self.beta.data

    def backward(self, eta, learning_rate):
        """
        BatchNormalization层的反向传播。

        :param eta: 上一层传回的梯度，形状为[N, C, H, W]。
        :param learning_rate: 学习率。
        :return: 传到上一层的梯度，形状为[N, C, H, W]。
        """
        # 计算 gamma 和 beta 的梯度
        N, _, H, W = eta.shape
        gamma_grad = np.sum(eta * self.y, axis=(0, 2, 3))
        beta_grad = np.sum(eta, axis=(0, 2, 3))

        # 返回到上一层的梯度
        yx_grad = eta * self.gamma.data
        ymean_grad = (-1.0 / np.sqrt(self.var + self.epsilon)) * yx_grad
        ymean_grad = np.sum(ymean_grad, axis=(2, 3))[:, :, np.newaxis, np.newaxis] / (H * W)
        yvar_grad = -0.5 * yx_grad * (self.x - self.mean) / (self.var + self.epsilon)**(3.0/2)
        yvar_grad = 2 * (self.x - self.mean) * np.sum(yvar_grad, axis=(2, 3))[:, :, np.newaxis, np.newaxis] / (H * W)
        result = yx_grad * (1 / np.sqrt(self.var + self.epsilon)) + ymean_grad + yvar_grad

        # 更新 gamma 和 beta
        self.gamma.data -= learning_rate * gamma_grad[:, np.newaxis, np.newaxis] / N
        self.beta.data -= learning_rate * beta_grad[:, np.newaxis, np.newaxis] / N

        return result

### 卷积层

In [None]:
class conv:
    def __init__(self, filter_shape, stride=1, padding='SAME', bias=True, requires_grad=True):
        """
        初始化卷积层。

        :param filter_shape: 元组 (O, C, K, K)，表示输出通道数、输入通道数和卷积核大小。
        :param stride: 卷积操作的步幅。
        :param padding: 填充类型：{"SAME", "VALID"}。
        :param bias: 是否包含偏置。
        :param requires_grad: 是否在反向传播中计算梯度。
        """
        # 使用 Kaiming 初始化权重
        self.weight = parameter(np.random.randn(*filter_shape) * (2/reduce(lambda x, y: x*y, filter_shape[1:]))**0.5)
        self.stride = stride
        self.padding = padding
        self.requires_grad = requires_grad
        self.output_channel = filter_shape[0]
        self.input_channel = filter_shape[1]
        self.filter_size = filter_shape[2]

        # 如果启用偏置，则初始化偏置
        if bias:
            self.bias = parameter(np.random.randn(self.output_channel))
        else:
            self.bias = None

    def forward(self, input):
        """
        执行卷积层的前向传播。

        :param input: 形状为 [N, C, H, W] 的特征图。
        :return: 卷积输出，形状为 [N, O, output_H, output_W]。
        """
        # 第一步：应用填充
        if self.padding == "VALID":
            self.x = input
        if self.padding == "SAME":
            p = self.filter_size // 2
            self.x = np.lib.pad(input, ((0, 0), (0, 0), (p, p), (p, p)), "constant")

        # 第二步：调整输入维度以适应步幅
        self.adjust_input_dimensions()

        # 第三步：实现卷积
        N, _, H, W = self.x.shape
        O, C, K, K = self.weight.data.shape
        weight_cols = self.weight.data.reshape(O, -1).T
        x_cols = self.img2col(self.x, self.filter_size, self.filter_size, self.stride)
        result = np.dot(x_cols, weight_cols) + self.bias.data
        output_H, output_W = (H-self.filter_size)//self.stride + 1, (W-self.filter_size)//self.stride + 1
        result = result.reshape((N, result.shape[0]//N, -1)).reshape((N, output_H, output_W, O))
        return result.transpose((0, 3, 1, 2))

    def backward(self, eta, lr):
        """
        反向传播，更新权重和计算传递到上一层的梯度。

        :param eta: 上一层返回的梯度 [N, O, output_H, output_W]。
        :param lr: 学习率。
        :return: 上一层的梯度。
        """
        # 在eta的行和列之间插入零，处理步长大于1的情况
        self.insert_zeros_in_eta(eta)

        # 计算本层的权重和偏置的梯度
        N, _, output_H, output_W = eta.shape
        self.calculate_gradients(eta, N, output_H, output_W)

        # 更新权重和偏置
        self.update_weights(lr, N)

        # 第四步：边缘填充
        self.pad_eta(eta)

        # 计算传递到上一层的梯度
        result = self.calculate_gradient_to_prev_layer(eta)

        return result

    def adjust_input_dimensions(self):
        """
        调整输入特征图的维度以适应卷积步幅。
        """
        if self.stride > 1:
            x_fit = self.x.shape[2] % self.stride
            y_fit = self.x.shape[3] % self.stride
            if x_fit != 0:
                self.x = self.x[:, :, 0:self.x.shape[2] - x_fit, :]
            if y_fit != 0:
                self.x = self.x[:, :, :, 0:self.x.shape[3] - y_fit]

    def insert_zeros_in_eta(self, eta):
        """
        在eta的行和列之间插入零，以处理步长大于1的情况。

        :param eta: 上一层返回的梯度 [N, O, output_H, output_W]。
        """
        if self.stride > 1:
            N, O, output_H, output_W = eta.shape
            inserted_H, inserted_W = output_H + (output_H-1)*(self.stride-1), output_W + (output_W-1)*(self.stride-1)
            inserted_eta = np.zeros((N, O, inserted_H, inserted_W))
            inserted_eta[:,:,::self.stride, ::self.stride] = eta
            eta = inserted_eta

    def calculate_gradients(self, eta, N, output_H, output_W):
        """
        计算本层的权重和偏置的梯度。

        :param eta: 上一层返回的梯度 [N, O, output_H, output_W]。
        :param N: batch大小。
        :param output_H: 输出特征图的高度。
        :param output_W: 输出特征图的宽度。
        """
        self.b_grad = eta.sum(axis=(0,2,3))
        self.W_grad = np.zeros(self.weight.data.shape)
        for i in range(self.filter_size):
            for j in range(self.filter_size):
                self.W_grad[:,:,i,j] = np.tensordot(eta, self.x[:,:,i:i+output_H,j:j+output_W], ([0,2,3], [0,2,3]))

    def update_weights(self, lr, N):
        """
        更新权重和偏置。

        :param lr: 学习率。
        :param N: batch大小。
        """
        self.weight.data -= lr * self.W_grad / N
        if self.bias is not None:
            self.bias.data -= lr * self.b_grad / N

    def pad_eta(self, eta):
        """
        边缘填充eta，处理填充类型为"VALID"或"SAME"的情况。

        :param eta: 上一层返回的梯度 [N, O, output_H, output_W]。
        """
        if self.padding == "VALID":
            p = self.filter_size - 1
            pad_eta = np.lib.pad(eta, ((0,0),(0,0),(p,p),(p,p)), "constant", constant_values=0)
            eta = pad_eta
        elif self.padding == "SAME":
            p = self.filter_size // 2
            pad_eta = np.lib.pad(eta, ((0, 0), (0, 0), (p, p), (p, p)), "constant", constant_values=0)
            eta = pad_eta

    def calculate_gradient_to_prev_layer(self, eta):
        """
        计算传递到上一层的梯度。

        :param eta: 上一层返回的梯度 [N, O, output_H, output_W]。
        :return: 传递到上一层的梯度。
        """
        _, C, _, _ = self.weight.data.shape
        weight_flip = np.flip(self.weight.data, (2,3))
        weight_flip_swap = np.swapaxes(weight_flip, 0, 1)
        weight_flip = weight_flip_swap.reshape(C, -1).T
        x_cols = self.img2col(eta, self.filter_size, self.filter_size, self.stride)
        result = np.dot(x_cols, weight_flip)
        N, _, H, W = eta.shape
        output_H, output_W = (H - self.filter_size) // self.stride + 1, (W - self.filter_size) // self.stride + 1
        result = result.reshape((N, result.shape[0] // N, -1)).reshape((N, output_H, output_W, C))
        self.weight.grad = result.transpose((0, 3, 1, 2))
        return self.weight.grad

    def img2col(self, x, filter_size_x, filter_size_y, stride):
        """
        将输入特征图转换为二维矩阵。

        :param x: 输入特征图，形状为 [N, C, H, W]。
        :param filter_size_x: 卷积核的尺寸x。
        :param filter_size_y: 卷积核的尺寸y。
        :param stride: 卷积步长。
        :return: 二维矩阵，形状为 [(H-filter_size+1)/stride * (W-filter_size+1)/stride*N, C * filter_size_x * filter_size_y]。
        """
        N, C, H, W = x.shape
        output_H, output_W = (H-filter_size_x)//stride + 1, (W-filter_size_y)//stride + 1
        out_size = output_H * output_W
        x_cols = np.zeros((out_size*N, filter_size_x*filter_size_y*C))
        for i in range(0, H-filter_size_x+1, stride):
            i_start = i * output_W
            for j in range(0, W-filter_size_y+1, stride):
                temp = x[:,:, i:i+filter_size_x, j:j+filter_size_y].reshape(N,-1)
                x_cols[i_start+j::out_size, :] = temp
        return x_cols


### 池化层

In [None]:
# 池化层 分为 平均池化和最大池化
class MaxPooling:
    def __init__(self, kernel_size=(2, 2), stride=2):
        """
        :param kernel_size: 池化核的大小(kx, ky)
        :param stride: 步长
        这里有个默认的前提条件就是：kernel_size=stride
        """
        self.ksize = kernel_size
        self.stride = stride
        self.mask = None

    def forward(self, input):
        """
        :param input: feature map形状[N, C, H, W]
        :return: maxpooling后的结果[N, C, H//ksize, W//ksize]
        """
        N, C, H, W = input.shape
        input_grid = input.reshape(N, C, H // self.stride, self.stride, W // self.stride, self.stride)
        out = np.max(input_grid, axis=(3, 5))
        self.mask = (out.repeat(self.stride, axis=2).repeat(self.stride, axis=3) != input)
        return out

    def backward(self, eta):
        """
        :param eta: 上一层返回的梯度[N, C, H//ksize, W//ksize]
        :return: 反向传播的梯度[N, C, H, W]
        """
        result = eta.repeat(self.stride, axis=2).repeat(self.stride, axis=3)
        result[self.mask] = 0
        return result


class Averagepooling():
    def __init__(self, kernel_size=(2,2), stride=2):
        """
        :param kernel_size:池化核的大小(kx,ky)
        :param stride: 步长
        """
        self.ksize = kernel_size
        self.stride = stride

    def forward(self, input):
        N, C, H, W = input.shape
        out = input.reshape((N, C, H//self.ksize, self.ksize, W//self.ksize, self.ksize))
        out = out.sum(axis=(3,5))
        out = out / self.ksize**2
        return out

    def backward(self, eta):
        result = eta.repeat(self.ksize, axis=2).repeat(self.ksize, axis=3)
        return result


### 全连接层

In [None]:
class fc:
    def __init__(self, input_num, output_num, bias=True, requires_grad=True):
        """
        :param input_num:输入神经元个数
        :param output_num: 输出神经元的个数
        """
        self.input_num = input_num          # 输入神经元个数
        self.output_num = output_num        # 输出神经元个数
        self.requires_grad = requires_grad
        self.weight = parameter(np.random.randn(self.input_num, self.output_num) * (2/self.input_num**0.5))
        if bias:
            self.bias = parameter(np.random.randn(self.output_num))
        else:
            self.bias = None


    def forward(self, input):
        """
        :param input: 输入的feature map 形状：[N,C,H,W]或[N,C*H*W]
        :return:
        """
        self.input_shape = input.shape    # 记录输入数据的形状
        if input.ndim > 2:
            N, C, H, W = input.shape
            self.x = input.reshape((N, -1))
        elif input.ndim == 2:
            self.x = input
        else:
            print("fc.forward的输入数据维度存在问题")
        result = np.dot(self.x, self.weight.data)
        if self.bias is not None:
            result = result + self.bias.data
        return result


    def backward(self, eta, lr):
        """
        :param eta:由上一层传入的梯度 形状：[N,output_num]
        :param lr:学习率
        :return: self.weight.grad 回传到上一层的梯度
        """
        N, _ = eta.shape
        # 计算传到下一层的梯度
        next_eta = np.dot(eta, self.weight.data.T)
        self.weight.grad = np.reshape(next_eta, self.input_shape)

        # 计算本层W,b的梯度
        x = self.x.repeat(self.output_num, axis=0).reshape((N, self.output_num, -1))
        self.W_grad = x * eta.reshape((N, -1, 1))
        self.W_grad = np.sum(self.W_grad, axis=0) / N
        self.b_grad = np.sum(eta, axis=0) / N

        # 权重更新
        self.weight.data -= lr * self.W_grad.T
        self.bias.data -= lr * self.b_grad

        return self.weight.grad


In [None]:
import numpy as np

class Dropout():
    def __init__(self, drop_rate=0.5, is_train=True):
        """
        :param drop_rate: 随机丢弃神经元的概率
        :param is_train: 当前是否为训练状态
        """
        self.drop_rate = drop_rate
        self.is_train = is_train
        self.fix_value = 1 - drop_rate   # 修正期望，保证输出值的期望不变


    def forward(self, x):
        """
        :param x:[N, m] N为batch_size, m为神经元个数
        :return:
        """
        if self.is_train==False:    # 当前为测试状态
            return x
        else:             # 当前为训练状态
            N, m = x.shape
            self.save_mask = np.random.uniform(0, 1, m) > self.drop_rate   # save_mask中为保留的神经元
            return (x * self.save_mask) / self.fix_value


    def backward(self, eta):
        if self.is_train==False:
            return eta
        else:
            return eta * self.save_mask



In [None]:
# softmax 计算损失函数
class softmax:

    def calculate_loss(self, x, label):
        """
        :param x: 上一层输出的向量：[N, m] 其中N表示batch，m表示输出节点个数
        :param label:数据的真实标签：[N]
        :return:
        """
        N, _ = x.shape
        self.label = np.zeros_like(x)
        for i in range(self.label.shape[0]):
            self.label[i, label[i]] = 1

        self.x = np.exp(x - np.max(x, axis=1)[:, np.newaxis])   # 为了防止x中出现极值导致溢出，每个样本减去其中最大的值
        sum_x = np.sum(self.x, axis=1)[:, np.newaxis]
        self.prediction = self.x / sum_x

        self.loss = -np.sum(np.log(self.prediction+1e-6) * self.label)  # 防止出现log(0)的情况
        return self.loss / N

    def prediction_func(self, x):
        x = np.exp(x - np.max(x, axis=1)[:, np.newaxis])  # 为了防止x中出现极值导致溢出，每个样本减去其中最大的值
        sum_x = np.sum(x, axis=1)[:, np.newaxis]
        self.out = x / sum_x
        return self.out


    def gradient(self):
        self.eta = self.prediction.copy() - self.label
        return self.eta

In [None]:
# Lenet5
class LeNet5:

    def __init__(self):
        ## 第一层卷积 输入通道数 1 输出通道数 6 卷积核5*5
        self.conv1 = conv((6, 1, 5, 5), stride=1, padding='SAME', bias=True, requires_grad=True)
        self.pooling1 = MaxPooling(kernel_size=(2, 2), stride=2)
        #self.pooling1 = Averagepooling(kernel_size=(2, 2), stride=2)
        self.BN1 =BatchNormalization(6, moving_decay=0.95, is_train=True)
        self.relu1 = Relu()
         ## 第一层卷积 输入通道数 6 输出通道数 16 卷积核5*5
        self.conv2 = conv((16, 6, 5, 5), stride=1, padding="VALID", bias=True, requires_grad=True)
        self.pooling2=MaxPooling(kernel_size=(2,2),stride=2)
        #self.pooling2 = Averagepooling(kernel_size=(2, 2), stride=2)
        self.BN2 = BatchNormalization(16, moving_decay=0.95, is_train=True)
        self.relu2 = Relu()
         ## 第一层卷积 输入通道数 16 输出通道数 120 卷积核5*5
        self.conv3 = conv((120, 16, 5, 5), stride=1, padding="VALID", bias=True, requires_grad=True)
        self.fc4 = fc(120*1*1, 84, bias=True, requires_grad=True)
        self.relu4 = Relu()
        self.dropout=Dropout(drop_rate=0.5,is_train=True)
        self.fc5 = fc(84, 10, bias=True, requires_grad=True)

        self.softmax = softmax()

    def forward(self, imgs, labels, is_train=True):
        """
        :param imgs:输入的图片：[N,C,H,W]
        :param labels:
        :return:
        """
        x = self.conv1.forward(imgs)
        x = self.pooling1.forward(x)
        x = self.BN1.forward(x, is_train)
        x = self.relu1.forward(x)

        x = self.conv2.forward(x)
        x = self.pooling2.forward(x)
        x = self.BN2.forward(x, is_train)
        x = self.relu2.forward(x)

        x = self.conv3.forward(x)

        x = self.fc4.forward(x)
        x = self.relu4.forward(x)
        x = self.dropout.forward(x)  # 应用Dropout
        x = self.fc5.forward(x)

        loss = self.softmax.calculate_loss(x, labels)
        prediction = self.softmax.prediction_func(x)
        return loss, prediction

    def backward(self, lr):
        """
        :param lr:学习率
        :return:
        """
        eta = self.softmax.gradient()

        eta = self.fc5.backward(eta, lr)
        eta = self.dropout.forward(eta)  # 应用Dropout
        eta = self.relu4.backward(eta)
        eta = self.fc4.backward(eta, lr)

        eta = self.conv3.backward(eta, lr)

        eta = self.relu2.backward(eta)  # 激活层没有参数，不需要学习
        eta = self.BN2.backward(eta, lr)
        eta = self.pooling2.backward(eta)     # 池化层没有参数，不需要学习
        eta = self.conv2.backward(eta, lr)

        eta = self.relu1.backward(eta)
        eta = self.BN1.backward(eta, lr)
        eta = self.pooling1.backward(eta)
        eta = self.conv1.backward(eta, lr)



In [None]:
import numpy as np
import struct
import matplotlib.pyplot as plt

import LeNet5


def load_mnist(mnistdir , train):
    ministfile = open(mnistdir,'rb')
    ministdata = ministfile.read()
    ministfile.close()
    rows=1
    cols=1
    # 加载训练集
    if train:
        magic_num,images,rows,cols = struct.unpack_from('>iiii', ministdata,0)
    else:
        # 加载标签集
        magic_num,images = struct.unpack_from('>ii', ministdata,0)
    print('图片数量: %d张, 图片大小: %d*%d' % ( images, rows, cols))
    # 计算加载的总像素是多少
    size = images * rows * cols
    if train:
        pointer = struct.calcsize('>iiii')
    else :
        pointer =  struct.calcsize('>ii')
    pack_data = struct.unpack_from('>' + str(size) + 'B', ministdata,pointer)
    if train:
        pack_data = np.reshape(pack_data,[images,rows,cols])
    else:
        pack_data = np.reshape(pack_data,[images])
    # 最终返回了一个矩阵，矩阵的大小由是训练集还是标签集决定
    # 训练集就相当于返回好多页纸，每一页纸上面有对应的行数和列数
    print('本次解析的矩阵格式为[%d,%d,%d]' % (images,rows,cols))
    return pack_data


train_images = load_mnist("data/train-images.idx3-ubyte",True)
train_labels = load_mnist("data/train-labels.idx1-ubyte",False)
test_images = load_mnist("data/t10k-images.idx3-ubyte",True)
test_labels = load_mnist("data/t10k-labels.idx1-ubyte",False)

train_batch = 64  # 训练时的batch size
test_batch = 50  # 测试时的batch size
epoch = 10
lr = 1e-3
# 绘图所需变量
TrainTimes = []
TrainLoss = []
TrainAcc = []
TestTimes = []
TestAcc = []

plt.ion()

IterNum = 0     # 迭代次数
net = LeNet5.LeNet5()

for E in range(epoch):
    batch_loss = 0
    batch_acc = 0

    epoch_loss = 0
    epoch_acc = 0

    ###
    # 训练
    for i in range(train_images.shape[0] // train_batch):
        img = train_images[i * train_batch:(i + 1) * train_batch].reshape(train_batch, 1, 28, 28)
        label = train_labels[i * train_batch:(i + 1) * train_batch]
        loss, pred = net.forward(img, label, is_train=True)   # 训练阶段

        epoch_loss += loss
        batch_loss += loss
        for j in range(pred.shape[0]):
            if np.argmax(pred[j]) == label[j]:
                epoch_acc += 1
                batch_acc += 1

        net.backward(lr)

        ###
        # 日志输出、图像绘制
        print_size = 10
        if (i+1) % print_size == 0:
            print(f"Epoch{E+1}:\tbatch:{i+1}\tBatch acc:{batch_acc/(train_batch * print_size):{6}.{4}}\tBatch loss:{batch_loss/(train_batch * print_size):{6}.{4}}")
            IterNum += 1
            TrainTimes.append(IterNum)
            TrainLoss.append(batch_loss / (train_batch * 10))
            TrainAcc.append(batch_acc / (train_batch * 10))
            batch_loss = 0
            batch_acc = 0
        ###
    ###
    print(f"[Epoch{E+1}]\tTarin accuracy:{epoch_acc/train_images.shape[0]:.{4}}\tTarin loss:{epoch_loss/train_images.shape[0]:.{4}}")
    ###
    # 测试集
    test_acc = 0
    for k in range(test_images.shape[0] // test_batch):
        img = test_images[k*test_batch:(k+1)*test_batch].reshape(test_batch, 1, 28, 28)
        label = test_labels[k*test_batch:(k+1)*test_batch]
        _, pred = net.forward(img, label, is_train=False)

        for j in range(pred.shape[0]):
            if np.argmax(pred[j]) == label[j]:
                test_acc += 1

    print(f"[Epoch{E+1}]\tTest Accuracy:{test_acc / test_images.shape[0]:.{4}}")
    print("-------------------Epoch end---------------------------")
    TestTimes.append(E+1)
    TestAcc.append(test_acc / test_images.shape[0])

# 绘制图表
plt.ioff()
plt.figure(figsize=(12, 6))

# 训练集损失和准确率
plt.subplot(1, 2, 1)
plt.title('Training Metrics')
plt.xlabel('Iterations')
plt.ylabel('Value')
plt.plot(TrainTimes, TrainLoss, label='Loss')
plt.plot(TrainTimes, TrainAcc, label='Accuracy')
plt.legend()

# 测试集准确率
plt.subplot(1, 2, 2)
plt.title('Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(TestTimes, TestAcc, label='Test Accuracy')
plt.legend()

plt.show()


