# 数据集准备

In [1]:
from PIL import Image
import numpy as np
import torch
import os
import torchvision.transforms as transforms


def read_img(img_path):
    #inp=np.array(Image.open('D:/数据库/palmdata/tju1/1/1.jpeg').resize((28,28)))
    #inp=np.array(Image.open('D:/数据库/palmdata/tju1/1/1.jpeg')
    transform = transforms.Compose([lambda img: Image.open(img_path).convert('L'),  #转成灰度图
                                    lambda img: img.resize((28, 28)),  #统一大小
                                    lambda img: np.reshape(img, (28, 28, 1)),
                                    lambda img: np.transpose(img, [2, 0, 1]),   #通道在前
                                    lambda img: img / 255.  #归一化
                                    ])
    inp = transform(img_path)  #图像处理
    return inp

In [2]:
def get_images(data_path):
    img_items = []
    files = os.listdir(data_path)
    files=list(map(int,files))
    files.sort()
    files=list(map(str,files))
    #print(files)
    total_images=[]
    for file in files:  #1~600
        f_images=[]
        images=os.listdir(os.path.join(data_path,file))
        for i in range(1,len(images)+1): #1~10
            image_path=os.path.join(data_path,file,str(i)+'.jpeg')
            image=read_img(image_path)
            f_images.append(image)
        total_images.append(f_images)
    total_images=torch.from_numpy(np.array(total_images)).unsqueeze(2)
    return total_images

In [3]:
# train_path='D:/数据库/palmdata/tju1'
# test_path='D:/数据库/palmdata/tju2'

train_path='D:/数据库/TJU1/train'
test_path='D:/数据库/TJU1/test'

x_train=get_images(train_path).numpy()
x_test=get_images(test_path).numpy()

datasets={'train':x_train,'test':x_test}

print("DB: train", x_train.shape, "test", x_test.shape)

DB: train (600, 10, 1, 1, 28, 28) test (600, 10, 1, 1, 28, 28)


In [4]:
type(x_train)

numpy.ndarray

# 生成一个batch的数据，一个batch包含8个N way k shot 任务

In [5]:
n_way = 5
k_spt = 1  ## support data 的个数
k_query = 5  ## query data 的个数
imgsz = 28  ##图片边长
resize = imgsz
task_num = 8
batch_size = task_num  #一个batch包含8个任务，每个任务都是（s集+q集）？
indexes = {"train": 0, "test": 0}  #记录当前取到第几batch

In [6]:
#生成一个epoch的模型的训练集或测试集列表，[support_set_x, support_set_y, target_x, target_y]
def load_data_cache(dataset):
    #  take 5 way 1 shot as example: 5 * 1
    setsz = k_spt * n_way   #5way*1shot
    querysz = k_query * n_way  #5way*15shot
    data_cache = []  #（10，8，[x_spts, y_spts, x_qrys, y_qrys]）  #存储一个epoch的数据集，一个epoch包含10个batch，每个batch包含8个N way K shot 任务

    # print('preload next 10 caches of batch_size of batch.')
    for sample in range(10):  # 遍历一个epoch，生成10个batch的任务组

        x_spts, y_spts, x_qrys, y_qrys = [], [], [], []  #1个batch=8个任务的数据
        for i in range(batch_size):  # 遍历每个batch任务组，生成8个任务，1个任务=1支持集（5*1）+1查询集（5*15）

            x_spt, y_spt, x_qry, y_qry = [], [], [], []  #一个任务的数据
            #随机从1200或423类中选出n_way给类别
            selected_cls = np.random.choice(dataset.shape[0], n_way, replace=False)

            # 构造一个任务的support集和query集
            for j, cur_class in enumerate(selected_cls):
                #对于每个选出的类别，从20个样本中随机选出k_spt + k_query个样本
                selected_img = np.random.choice(10, k_spt + k_query, replace=False)

                x_spt.append(dataset[cur_class][selected_img[:k_spt]])
                x_qry.append(dataset[cur_class][selected_img[k_spt:]])
                y_spt.append([j for _ in range(k_spt)])
                y_qry.append([j for _ in range(k_query)])

            # np.random.permutation数据打乱，生成新数组
            perm = np.random.permutation(n_way * k_spt)
            
            x_spt = np.array(x_spt).reshape(n_way * k_spt, 1, resize, resize)[perm]  #(5,1,28,28)
            y_spt = np.array(y_spt).reshape(n_way * k_spt)[perm]  #(5)

            perm = np.random.permutation(n_way * k_query)
            x_qry = np.array(x_qry).reshape(n_way * k_query, 1, resize, resize)[perm]  #(15*5,1,28,28)
            y_qry = np.array(y_qry).reshape(n_way * k_query)[perm]  #(75)

            # append [sptsz, 1, 28, 28] => [batch_size, setsz, 1, 28, 28]
            #一个batch=8个任务的数据集
            x_spts.append(x_spt)  #(8，5,1,28,28)
            y_spts.append(y_spt)
            x_qrys.append(x_qry)  #（8，75，1，28，28）
            y_qrys.append(y_qry)

        # [b, setsz = n_way * k_spt, 1, 28, 28]
        x_spts = np.array(x_spts).astype(np.float32).reshape(batch_size, setsz, 1, resize, resize)
        y_spts = np.array(y_spts).astype(np.int).reshape(batch_size, setsz)
        # [b, qrysz = n_way * k_query, 1, 28, 28]
        x_qrys = np.array(x_qrys).astype(np.float32).reshape(batch_size, querysz, 1, resize, resize)
        y_qrys = np.array(y_qrys).astype(np.int).reshape(batch_size, querysz)

        data_cache.append([x_spts, y_spts, x_qrys, y_qrys])

    return data_cache

#元网络的数据集
datasets_cache = {"train": load_data_cache(x_train),
                  "test": load_data_cache(x_test)}

In [7]:
#迭代地取出一个个batch的数据集
def next(mode='train'):
    """
    Gets next batch from the dataset with name.
    :param mode: The name of the splitting (one of "train", "val", "test")
    :return:
    """
    # update cache if indexes is larger than len(data_cache)
    if indexes[mode] >= len(datasets_cache[mode]):
        indexes[mode] = 0
        datasets_cache[mode] = load_data_cache(datasets[mode])

    next_batch = datasets_cache[mode][indexes[mode]]
    indexes[mode] += 1

    return next_batch

# 模型

In [8]:
#=======================================模型
import torch
from torch import nn
from torch.nn import functional as F
from copy import deepcopy, copy

#构建基学习器,学习一个任务
class BaseNet(nn.Module):
    def __init__(self):
        super(BaseNet, self).__init__()
        self.vars = nn.ParameterList()  ## 包含了所有需要被优化的tensor参数w和b
        self.vars_bn = nn.ParameterList()  ##bn层参数

        # 第1个conv2d=================================
        # in_channels = 1, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2
        weight = nn.Parameter(torch.ones(64, 1, 3, 3))
        nn.init.kaiming_normal_(weight)
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        # 第1个BatchNorm层
        weight = nn.Parameter(torch.ones(64))
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        running_mean = nn.Parameter(torch.zeros(64), requires_grad=False)
        running_var = nn.Parameter(torch.zeros(64), requires_grad=False)
        self.vars_bn.extend([running_mean, running_var])   #标准化？

        # 第2个conv2d====================================
        # in_channels = 64, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2
        weight = nn.Parameter(torch.ones(64, 64, 3, 3))
        nn.init.kaiming_normal_(weight)
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        # 第2个BatchNorm层
        weight = nn.Parameter(torch.ones(64))
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        running_mean = nn.Parameter(torch.zeros(64), requires_grad=False)
        running_var = nn.Parameter(torch.zeros(64), requires_grad=False)
        self.vars_bn.extend([running_mean, running_var])

        # 第3个conv2d====================================
        # in_channels = 64, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2
        weight = nn.Parameter(torch.ones(64, 64, 3, 3))
        nn.init.kaiming_normal_(weight)
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        # 第3个BatchNorm层
        weight = nn.Parameter(torch.ones(64))
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        running_mean = nn.Parameter(torch.zeros(64), requires_grad=False)
        running_var = nn.Parameter(torch.zeros(64), requires_grad=False)
        self.vars_bn.extend([running_mean, running_var])

        # 第4个conv2d======================================
        # in_channels = 64, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2
        weight = nn.Parameter(torch.ones(64, 64, 3, 3))
        nn.init.kaiming_normal_(weight)
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        # 第4个BatchNorm层
        weight = nn.Parameter(torch.ones(64))
        bias = nn.Parameter(torch.zeros(64))
        self.vars.extend([weight, bias])

        running_mean = nn.Parameter(torch.zeros(64), requires_grad=False)
        running_var = nn.Parameter(torch.zeros(64), requires_grad=False)
        self.vars_bn.extend([running_mean, running_var])

        ##linear===========================================
        weight = nn.Parameter(torch.ones([5, 64]))
        bias = nn.Parameter(torch.zeros(5))
        self.vars.extend([weight, bias])

    #         self.conv = nn.Sequential(
    #             nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2),
    #             nn.BatchNorm2d(64),
    #             nn.ReLU(),
    #             nn.MaxPool2d(2),

    #             nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2),
    #             nn.BatchNorm2d(64),
    #             nn.ReLU(),
    #             nn.MaxPool2d(2),

    #             nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2),
    #             nn.BatchNorm2d(64),
    #             nn.ReLU(),
    #             nn.MaxPool2d(2),

    #             nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,3), padding = 2, stride = 2),
    #             nn.BatchNorm2d(64),
    #             nn.ReLU(),
    #             nn.MaxPool2d(2),

    #             FlattenLayer(),
    #             nn.Linear(64,5)
    #         )

    def forward(self, x, params=None, bn_training=True):
        '''
        :bn_training: set False to not update
        :return:
        '''
        if params is None:
            params = self.vars

        weight, bias = params[0], params[1]  # 第1个CONV层
        x = F.conv2d(x, weight, bias, stride=2, padding=2)
        weight, bias = params[2], params[3]  # 第1个BN层
        running_mean, running_var = self.vars_bn[0], self.vars_bn[1]
        x = F.batch_norm(x, running_mean, running_var, weight=weight, bias=bias, training=bn_training)
        x = F.max_pool2d(x, kernel_size=2)  # 第1个MAX_POOL层
        x = F.relu(x, inplace=[True])  # 第1个relu

        weight, bias = params[4], params[5]  # 第2个CONV层
        x = F.conv2d(x, weight, bias, stride=2, padding=2)
        weight, bias = params[6], params[7]  # 第2个BN层
        running_mean, running_var = self.vars_bn[2], self.vars_bn[3]
        x = F.batch_norm(x, running_mean, running_var, weight=weight, bias=bias, training=bn_training)
        x = F.max_pool2d(x, kernel_size=2)  # 第2个MAX_POOL层
        x = F.relu(x, inplace=[True])  # 第2个relu

        weight, bias = params[8], params[9]  # 第3个CONV层
        x = F.conv2d(x, weight, bias, stride=2, padding=2)
        weight, bias = params[10], params[11]  # 第3个BN层
        running_mean, running_var = self.vars_bn[4], self.vars_bn[5]
        x = F.batch_norm(x, running_mean, running_var, weight=weight, bias=bias, training=bn_training)
        x = F.max_pool2d(x, kernel_size=2)  # 第3个MAX_POOL层
        x = F.relu(x, inplace=[True])  # 第3个relu

        weight, bias = params[12], params[13]  # 第4个CONV层
        x = F.conv2d(x, weight, bias, stride=2, padding=2)
        x = F.relu(x, inplace=[True])  # 第4个relu
        weight, bias = params[14], params[15]  # 第4个BN层
        running_mean, running_var = self.vars_bn[6], self.vars_bn[7]
        x = F.batch_norm(x, running_mean, running_var, weight=weight, bias=bias, training=bn_training)
        x = F.max_pool2d(x, kernel_size=2)  # 第4个MAX_POOL层

        x = x.view(x.size(0), -1)  ## flatten
        weight, bias = params[16], params[17]  # linear
        x = F.linear(x, weight, bias)

        return x

    def parameters(self):
        return self.vars

#构建元学习器，学习一个batch任务（8个任务为一个batch）
class MetaLearner(nn.Module):
    def __init__(self):
        super(MetaLearner, self).__init__()
        self.update_step = 5  ## 基学习器训练更新5次
        self.update_step_test = 5
        self.net = BaseNet().cuda()
        self.meta_lr = 2e-4  #元学习器的学习率
        self.base_lr = 4 * 1e-2  #基学习器的学习率
        self.inner_lr = 0.4
        self.outer_lr = 1e-2
        self.meta_optim = torch.optim.Adam(self.net.parameters(), lr=self.meta_lr)  #元学习器的优化器

    def forward(self, x_spt, y_spt, x_qry, y_qry):  #输入一个batch的数据=8个任务的数据，每执行一次这个函数，元学习器就更新一次
        x_spt, y_spt, x_qry, y_qry=x_spt.float(),y_spt.long(),x_qry.float(),y_qry.long()

        #x_spt, y_spt, x_qry, y_qry=x_spt,torch.LongTensor(y_spt),torch.LongTensor(x_qry),torch.LongTensor(y_qry)
        # 初始化
        # 8         5    1    28  28
        task_num, ways, shots, h, w = x_spt.size()
        #      8，75，1，28，28
        query_size = x_qry.size(1)  # 75 = 15 * 5
        loss_list_qry = [0 for _ in range(self.update_step + 1)]   #[0,0,0,0,0,0]  #保存一个任务内，基学习器的初始损失加后面5次更新的损失
        correct_list = [0 for _ in range(self.update_step + 1)]   #[0,0,0,0,0,0]  #保存一个任务内，基学习器的初始准确数加后面5次更新的准确数
        #遍历一个batch内的每个任务，对每个任务训练更新基学习器5次
        for i in range(task_num):
            ## 一个任务上基学习器的第0步更新
            y_hat = self.net(x_spt[i], params=None, bn_training=True)  # (ways * shots, ways) 用支持集，基学习器学习某一个任务

            loss = F.cross_entropy(y_hat, y_spt[i]) #在支持集上，计算基学习器在某个任务的损失
            grad = torch.autograd.grad(loss, self.net.parameters()) #计算该任务的梯度
            tuples = zip(grad, self.net.parameters())  ## 将梯度和参数\theta一一对应起来
            # fast_weights这一步相当于求了一个\theta - \alpha*\nabla(L)
            fast_weights = list(map(lambda p: p[1] - self.base_lr * p[0], tuples))  #更新一次后的基学习器参数
            # 在query集上测试，计算准确率
            # 这一步使用更新前的数据
            with torch.no_grad():
                y_hat = self.net(x_qry[i], self.net.parameters(), bn_training=True) #使用前一步的基学习器计算查询集上的损失和准确数
                loss_qry = F.cross_entropy(y_hat, y_qry[i])
                loss_list_qry[0] += loss_qry
                pred_qry = F.softmax(y_hat, dim=1).argmax(dim=1)  # size = (75)
                correct = torch.eq(pred_qry, y_qry[i]).sum().item()
                correct_list[0] += correct

            # 使用更新后的数据在query集上测试。
            with torch.no_grad():
                y_hat = self.net(x_qry[i], fast_weights, bn_training=True)  #使用更新后的基学习器计算查询集上的损失和准确数
                loss_qry = F.cross_entropy(y_hat, y_qry[i])
                loss_list_qry[1] += loss_qry
                pred_qry = F.softmax(y_hat, dim=1).argmax(dim=1)  # size = (75)
                correct = torch.eq(pred_qry, y_qry[i]).sum().item()
                correct_list[1] += correct

            for k in range(1, self.update_step):   #再进行4次支持集上训练后更新基学习器的参数，并记录查询集上的验证损失和准确数
                y_hat = self.net(x_spt[i], params=fast_weights, bn_training=True)
                loss = F.cross_entropy(y_hat, y_spt[i])
                grad = torch.autograd.grad(loss, fast_weights)
                tuples = zip(grad, fast_weights)
                fast_weights = list(map(lambda p: p[1] - self.base_lr * p[0], tuples))

                y_hat = self.net(x_qry[i], params=fast_weights, bn_training=True)
                loss_qry = F.cross_entropy(y_hat, y_qry[i])
                loss_list_qry[k + 1] += loss_qry

                with torch.no_grad():
                    pred_qry = F.softmax(y_hat, dim=1).argmax(dim=1)
                    correct = torch.eq(pred_qry, y_qry[i]).sum().item()
                    correct_list[k + 1] += correct
        #         print('hello')

        #元学习器参数更新
        loss_qry = loss_list_qry[-1] / task_num  #计算最后一次更新基学习器（第五次更新）上查询集的平均每个任务的损失（表示这个batch的损失）===》作为外部元学习器的一次损失，更新一次元学习器的参数
        self.meta_optim.zero_grad()  # 梯度清零
        loss_qry.backward() #
        self.meta_optim.step()

        #计算当前这步的元学习器学到的基学习器对于一个任务1+（5次更新）的对应的查询集平均准确率和损失
        accs = np.array(correct_list) / (query_size * task_num)
        loss = np.array(loss_list_qry) / (task_num)
        return accs, loss  #返回每一步（基学习器在每个任务上更新共六步）的平均损失和准确数

    #对MAML训练得到的基学习器测试准确率
    def finetunning(self, x_spt, y_spt, x_qry, y_qry):  #输入一个任务的数据
        assert len(x_spt.shape) == 4
        x_spt, y_spt, x_qry, y_qry = x_spt.float(), y_spt.long(), x_qry.float(), y_qry.long()

        query_size = x_qry.size(0)  #75
        correct_list = [0 for _ in range(self.update_step_test + 1)]   #[0,0,0,0,0,0]

        new_net = deepcopy(self.net)
        y_hat = new_net(x_spt)
        loss = F.cross_entropy(y_hat, y_spt)
        grad = torch.autograd.grad(loss, new_net.parameters())
        fast_weights = list(map(lambda p: p[1] - self.base_lr * p[0], zip(grad, new_net.parameters())))

        # 在query集上测试，计算准确率
        # 这一步使用更新前的基学习器参数
        with torch.no_grad():
            y_hat = new_net(x_qry, params=new_net.parameters(), bn_training=True)
            pred_qry = F.softmax(y_hat, dim=1).argmax(dim=1)  # size = (75)
            correct = torch.eq(pred_qry, y_qry).sum().item()
            correct_list[0] += correct

        # 使用更新后的基学习器参数。
        with torch.no_grad():
            y_hat = new_net(x_qry, params=fast_weights, bn_training=True)
            pred_qry = F.softmax(y_hat, dim=1).argmax(dim=1)  # size = (75)
            correct = torch.eq(pred_qry, y_qry).sum().item()
            correct_list[1] += correct

        for k in range(1, self.update_step_test):  #五步更新基学习器（这里是剩下四步）
            y_hat = new_net(x_spt, params=fast_weights, bn_training=True)
            loss = F.cross_entropy(y_hat, y_spt)
            grad = torch.autograd.grad(loss, fast_weights)
            fast_weights = list(map(lambda p: p[1] - self.base_lr * p[0], zip(grad, fast_weights)))

            y_hat = new_net(x_qry, fast_weights, bn_training=True)

            with torch.no_grad():
                pred_qry = F.softmax(y_hat, dim=1).argmax(dim=1)
                correct = torch.eq(pred_qry, y_qry).sum().item()
                correct_list[k + 1] += correct

        del new_net
        accs = np.array(correct_list) / query_size  #返回一个任务上基学习器的平均准确率
        return accs

#模型训练
import time
device = torch.device('cuda')

meta = MetaLearner().to(device)  #初始化元学习器

epochs =800 # 60000
for step in range(epochs):
    start = time.time()
    x_spt, y_spt, x_qry, y_qry = next('train')  #取出一个batch的数据（包含8个任务）
    x_spt, y_spt, x_qry, y_qry = torch.from_numpy(x_spt).to(device), \
                                 torch.from_numpy(y_spt).to(device), \
                                 torch.from_numpy(x_qry).to(device), \
                                 torch.from_numpy(y_qry).to(device)
    accs, loss = meta(x_spt, y_spt, x_qry, y_qry)  #更新一次元学习器，返回这一次在八个任务上基学习器每一步更新得到的的损失和准确率
    end = time.time()

    if step % 20 == 0:  #每更新100次元学习器，输出一次当前的基学习器准确率和损失情况
        print("epoch:", step)
        print(accs)
        print(loss)

    if step % 30 == 0:  #每更新1000次元学习器，从测试集分别取出一个batch数据，从中依此取出一个任务的数据测试当前学到的基学习器
        accs = []
        for _ in range(1000 // task_num):
            # db_train.next('test')
            x_spt, y_spt, x_qry, y_qry = next('test')
            x_spt, y_spt, x_qry, y_qry = torch.from_numpy(x_spt).to(device), \
                                         torch.from_numpy(y_spt).to(device), \
                                         torch.from_numpy(x_qry).to(device), \
                                         torch.from_numpy(y_qry).to(device)

            for x_spt_one, y_spt_one, x_qry_one, y_qry_one in zip(x_spt, y_spt, x_qry, y_qry):
                test_acc = meta.finetunning(x_spt_one, y_spt_one, x_qry_one, y_qry_one)
                accs.append(test_acc)
        print('在mean process之前：', np.array(accs).shape)
        accs = np.array(accs).mean(axis=0).astype(np.float16)
        print('测试集准确率:', accs)

epoch: 0
[0.2   0.385 0.635 0.685 0.685 0.71 ]
[tensor(1.6094, device='cuda:0') tensor(1.4572, device='cuda:0')
 tensor(1.3445, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(1.2418, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(1.1759, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(1.1320, device='cuda:0', grad_fn=<DivBackward0>)]
在mean process之前： (1000, 6)
测试集准确率: [0.2062 0.4165 0.633  0.642  0.6553 0.667 ]
epoch: 20
[0.2   0.685 0.7   0.755 0.76  0.765]
[tensor(1.6097, device='cuda:0') tensor(1.4005, device='cuda:0')
 tensor(1.2315, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(1.0760, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.9827, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.9167, device='cuda:0', grad_fn=<DivBackward0>)]
在mean process之前： (1000, 6)
测试集准确率: [0.1996 0.6626 0.7534 0.778  0.7915 0.791 ]
epoch: 40
[0.215 0.77  0.825 0.845 0.845 0.83 ]
[tensor(1.6055, device='cuda:0') tensor(1.3127, device='cuda:0')
 tensor(1.0564, device='cuda:0', grad_fn=<DivB

epoch: 420
[0.2   0.85  0.82  0.915 0.92  0.94 ]
[tensor(1.6726, device='cuda:0') tensor(0.8253, device='cuda:0')
 tensor(0.6787, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.4796, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.4255, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.3455, device='cuda:0', grad_fn=<DivBackward0>)]
在mean process之前： (1000, 6)
测试集准确率: [0.2035 0.824  0.899  0.9146 0.924  0.9277]
epoch: 440
[0.2   0.805 0.925 0.945 0.925 0.925]
[tensor(1.7039, device='cuda:0') tensor(0.7979, device='cuda:0')
 tensor(0.5753, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.4113, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.3765, device='cuda:0', grad_fn=<DivBackward0>)
 tensor(0.3307, device='cuda:0', grad_fn=<DivBackward0>)]
在mean process之前： (1000, 6)
测试集准确率: [0.2012 0.8164 0.892  0.9136 0.9243 0.9277]
epoch: 460
[0.235 0.845 0.91  0.93  0.945 0.93 ]
[tensor(1.6951, device='cuda:0') tensor(0.7566, device='cuda:0')
 tensor(0.5103, device='cuda:0', grad_fn=<