In [1]:
import matplotlib.pyplot as plt
import math
from torch.cuda.amp import autocast 
from torch.cuda.amp import GradScaler
import torch
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import optim
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torch.nn as nn
import torchvision
from torchsummary import summary
import time
from torch.nn import init
from typing import Union, List, Dict, Any, Optional, cast
from PIL import Image
import warnings
warnings.filterwarnings("ignore")

In [2]:
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

In [3]:
#保存训练数据和模型
data_csv_path = "D:\\OneModel\\qyxx-eca_net-KA-MCML-information.csv"   #修改此处文件名 
model_save_path = "D:\\OneModel\\qyxx-eca_net-KA-MCML-information.pkl"  #修改此处文件名

In [4]:
train_path = "D:\\Dataset\\inputdataset\\train"
val_path =  "D:\\Dataset\\inputdataset\\val"
master_path = "D:\\Dataset\\CK_division\\division"
csv_path = "D:\\Dataset\\provincial_characteristics.csv"

#模型批次大小
batch_size = 128
resume = True
#动态学习率，学习率和循环次数增加
lr = 5e-4
epochs = 200
D_epoch = 0 
best_acc  = 0
print("epochs:",epochs,"learning_rate:",lr,"batch_size:",batch_size)

epochs: 200 learning_rate: 0.0005 batch_size: 128


In [5]:
#设备选取
flag = torch.cuda.is_available()
if flag:
    print("GPU")
else:
    print("CPU")
ngpu = 1
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
# device = torch.device("cpu")
#查看显卡名称
#torch.cuda.get_device_name()
print("divice is ", device)

GPU
divice is  cuda:0


In [6]:
class DataSet_pre(Dataset):
    '''
    初始化函数
    root文件路径，resize转换大小，mode训练还是测试或者其他的
    name2label标签集合
    没有编写输入错误的mode处理方法
    master_path 分割文件夹主路径
    csv_path 位置信息存储csv路径
    '''
    def __init__(self, root, resize,master_path,csv_path):
        super(DataSet_pre, self).__init__()
        self.root = root
        self.resize = resize
        self.name2label = {} # 给类别进行数字标签 类似于 'anger':1,...
        #获取名称
        for name in sorted(os.listdir(os.path.join(root))):
            if not os.path.isdir(os.path.join(root, name)):
                continue
            self.name2label[name] = len(self.name2label.keys())

        print(self.name2label)  #查看类别
        self.images, self.labels = [], []
          # 'pokemon\\mewtwo\\00001.png
        for name in self.name2label.keys():
            
            dataset_classes_path = os.path.join(self.root,name)
            image_name_list = os.listdir(dataset_classes_path)
            for i in image_name_list:
                self.images.append(os.path.join(dataset_classes_path,i))
                self.labels.append(self.name2label[name])
           

        self.master_path = master_path
        self.csv_path = csv_path

    def __len__(self):
        #不是总数量，根据划分数据集所添加的
        return len(self.images)
    
    def data_search(self,path,master_path):
        Transform = transforms.Compose([
            lambda x:Image.open(x).convert('RGB'), # string path= > image data
            transforms.Resize((24,24)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        
        ImageFileName = str(path).split('\\')[-1]
        #获取读取图片类别anger
        expression_type = ImageFileName.split('_')[0]
        #获取指定类别路径 
        Path_joining_together = os.path.join(master_path,expression_type)
        #获取指定图片类别路径
        left_eye = Transform(os.path.join(Path_joining_together,'left_eye'+"\\"+str(ImageFileName)))
        mouth = Transform(os.path.join(Path_joining_together,'mouth'+"\\"+str(ImageFileName)))
        nose = Transform(os.path.join(Path_joining_together,'nose'+"\\"+str(ImageFileName)))
        right_eye = Transform(os.path.join(Path_joining_together,'right_eye'+"\\"+str(ImageFileName)))
        return [left_eye,mouth,nose,right_eye]
    
    def posi_info(self,path,positional_information):
        df = pd.read_csv(positional_information)
        try:
                #获取图片名称
                ImageFileName = str(path).split('\\')[-1]
#                 print(ImageFileName)  #anger_0.png
                ImageName = df[df['image_name'].isin([ImageFileName])]
                LeftEye_index = ImageName['left_eye'].index.tolist()[0]
                RightEye_index = ImageName['right_eye'].index.tolist()[0]
                Nose_index = ImageName['nose'].index.tolist()[0]
                
                Mouth_index = ImageName['mouth'].index.tolist()[0]
                LeftEye_info = df.at[LeftEye_index,'left_eye']
                RightEye_info = df.at[RightEye_index,'right_eye']
                Nose_info = df.at[Nose_index,'nose']
                Mouth_info = df.at[Mouth_index,'mouth']
                
                one = torch.Tensor(eval(LeftEye_info))
#                 print(type(one))  #list
                two = torch.Tensor(eval(RightEye_info))
                three = torch.Tensor(eval(Nose_info))
                four = torch.Tensor(eval(Mouth_info))
        except IndexError:
                print('error...')
                print(path)
                print(ImageFileName)
        
        return one,two,three,four
        
    
    def __getitem__(self, idx):
        # idx  [0~len(images)]
        # self.images, self.labels
        # img: 'pokemon\\bulbasaur\\00000000.png'
        # label: 0
        path, label = self.images[idx], self.labels[idx]

        Transform = transforms.Compose([
            lambda x:Image.open(x).convert('RGB'), # string path= > image data
            transforms.Resize((224,224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        img = Transform(path)
        division_list = self.data_search(path,self.master_path)
        pn_list = self.posi_info(path,self.csv_path)
        label = torch.tensor(label)
        return img,label,division_list,pn_list

In [7]:
#参数设定
train_data = DataSet_pre(root=train_path,resize=224,master_path = master_path,csv_path = csv_path)
data_train = DataLoader(train_data, batch_size=batch_size, shuffle=True,drop_last=True)
#root, resize,mode,master_path,csv_path
val_data = DataSet_pre(root=val_path,resize=224,master_path = master_path,csv_path = csv_path)
data_val = DataLoader(val_data, batch_size=batch_size, shuffle=True,drop_last=True)

{'anger': 0, 'contempt': 1, 'disgust': 2, 'fear': 3, 'happy': 4, 'sadness': 5, 'surprise': 6}
{'anger': 0, 'contempt': 1, 'disgust': 2, 'fear': 3, 'happy': 4, 'sadness': 5, 'surprise': 6}


In [8]:
class eca_layer(nn.Module):
    """Constructs a ECA module.
    Args:
        channel: Number of channels of the input feature map
        k_size: Adaptive selection of kernel size
    """
    def __init__(self, channel, k_size=3):
        super(eca_layer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # feature descriptor on the global spatial information
        y = self.avg_pool(x)

        # Two different branches of ECA module
        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)

        # Multi-scale information fusion
        y = self.sigmoid(y)

        return x * y.expand_as(x)
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class ECABasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, k_size=3):
        super(ECABasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes, 1)
        self.bn2 = nn.BatchNorm2d(planes)
        self.eca = eca_layer(planes, k_size)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.eca(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ECABottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, k_size=3):
        super(ECABottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.eca = eca_layer(planes * 4, k_size)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)
        out = self.eca(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, k_size=[3, 3, 3, 3]):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], int(k_size[0]))
        self.layer2 = self._make_layer(block, 128, layers[1], int(k_size[1]), stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], int(k_size[2]), stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], int(k_size[3]), stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, k_size, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, k_size))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, k_size=k_size))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def eca_resnet18(k_size=[3, 5, 7, 7], num_classes=1_000, pretrained=False):
    """Constructs a ResNet-18 model.
    Args:
        k_size: Adaptive selection of kernel size
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        num_classes:The classes of classification
    """
    model = ResNet(ECABasicBlock, [2, 2, 2, 2], num_classes=num_classes, k_size=k_size)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model


In [9]:
eca_net = eca_resnet18()

In [10]:
class ECAAttention(nn.Module):

    def __init__(self, kernel_size=7):
        super().__init__()
        self.gap=nn.AdaptiveAvgPool2d(1)
        self.maxpool=nn.AdaptiveMaxPool2d(1)
        self.conv=nn.Conv1d(1,1,kernel_size=kernel_size,padding=3)
        self.sigmoid=nn.Sigmoid()
        self.init_weights()
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    def forward(self, x):
        avp_result =self.gap(x) #bs,c,1,1
        max_result = self.maxpool(x)
        
        avp_result=avp_result.squeeze(-1).permute(0,2,1) #bs,1,c
        max_result=max_result.squeeze(-1).permute(0,2,1) #bs,1,c
        
        avp_result=self.conv(avp_result) #bs,1,c
        max_result=self.conv(max_result) #bs,1,c
        
        y=self.sigmoid(max_result + avp_result) #bs,1,c
        y=y.permute(0,2,1).unsqueeze(-1) #bs,c,1,1
        return x*y.expand_as(x)

class SpatialAttention(nn.Module):
    def __init__(self,kernel_size=7):
        super().__init__()
        self.conv=nn.Conv2d(2,1,kernel_size=kernel_size,padding=3)
        self.sigmoid=nn.Sigmoid()
        self.init_weights()
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x) :
        max_result,_=torch.max(x,dim=1,keepdim=True)
        avg_result=torch.mean(x,dim=1,keepdim=True)
        result=torch.cat([max_result,avg_result],1)
        output=self.conv(result)
        output=self.sigmoid(output)
        return x*output
    
class CA_SA(nn.Module):
    def __init__(self):
        super().__init__()
        self.eca = ECAAttention()
        self.sa = SpatialAttention()
        
    def forward(self,x):
        x = self.eca(x)
        x = self.sa(x)
        return x
    
class K_Attention(nn.Module):
    def __init__(self):
        super().__init__()
        self.eca = ECAAttention()
        self.sa = SpatialAttention()
        self.eca_sa = CA_SA()
    def forward(self,x):
        out1 = self.eca_sa(x)
        out2 = self.eca(x) + self.sa(x)
        return out2 + out1

In [11]:
class SKNet(nn.Module):
    def __init__(self, num_class=7):
        super(SKNet, self).__init__()
        self.features = nn.Sequential(*list(eca_net.children())[:-2])
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.KA = K_Attention()
        self.fc = nn.Linear(512*4, num_class)

    def forward(self, x):
        
        x = self.features(x)
        out1 = self.KA(x)
        out2 = self.KA(x)
        out3 = self.KA(x)
        out4 = self.KA(x)
        out = torch.cat((out1,out2,out3,out4),dim=1)
        out = self.avgpool(out)
        out = torch.flatten(out,1)
        out = self.fc(out)
        return out

In [12]:
model_path = "D:\\OneModel\\迁移学习\\ECANet\\ISA-MCML\\qyxx-eca_net-KA-MCML.pkl"
sknet = SKNet()
checkpoint = torch.load(model_path)
sknet.load_state_dict(checkpoint['model'])
del checkpoint
for p in sknet.parameters():
    p.requires_grad=False
sknet

SKNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): ECABasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (eca): eca_layer(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (conv): Conv1d(1, 1, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
          (sigmoid): Sigmoid()
        )
      )


In [13]:
class X_model(nn.Module):
    def __init__(self, num_class=7):
        super(X_model, self).__init__()
        self.features = nn.Sequential(*list(sknet.children())[:-1])
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.KA = K_Attention()
        self.fc = nn.Linear(556, num_class)

    def forward(self,x,division_list,pn_list):
        division_list[0] = division_list[0].to(device)
        division_list[1] = division_list[1].to(device)
        division_list[2] = division_list[2].to(device)
        division_list[3] = division_list[3].to(device)
        pn_list[0] = pn_list[0].to(device)
        pn_list[1] = pn_list[1].to(device)
        pn_list[2] = pn_list[2].to(device)
        pn_list[3] = pn_list[3].to(device)
        #原图特征提取
        x = torch.flatten(self.avgpool(self.features(x)),1)
        #分割特征提取，深层注意力机制
        out1 = torch.flatten(self.avgpool(self.KA(division_list[0])),1)
        out2 = torch.flatten(self.avgpool(self.KA(division_list[1])),1)
        out3 = torch.flatten(self.avgpool(self.KA(division_list[2])),1)
        out4 = torch.flatten(self.avgpool(self.KA(division_list[3])),1)

        out = torch.cat((x,out1,out2,out3,out4,pn_list[0],pn_list[1],pn_list[2],pn_list[3]),dim=1)
        out = self.fc(out)
        return out

In [14]:
#版本3，矩阵优化
class MCML_Loss(nn.Module):
    #第二种实现，F(x) 维度 （bs，512）
    def __init__(self):
        super(MCML_Loss, self).__init__()
        self.KL = nn.KLDivLoss(reduction="batchmean")  #KL散度
    def forward(self, x, labels):
#         one = time.time()
        label = labels.cpu()
        bs = x.shape[0]
        P  = torch.ones([bs, bs]).cuda()   #根据真实标签计算条件概率
        D = torch.cdist(x, x, p=2)
        Q_E = (-D).exp()
        Q_E = Q_E - torch.diag_embed(torch.diag(Q_E))  #设置i=j的元素都为0，不用担心log0的存在 KL散度y(logy-logy')  其中y=y'=0 
        fm_sum = torch.sum(Q_E,dim=1)  #构建条件概率分母   每一行运算出的结果 张量形状（bs）
        fm_sum = torch.reshape(fm_sum,(-1,1))
        Q = torch.div(Q_E, fm_sum)
        number = 0
        for i in label:
            indexs = np.argwhere(label==i)
            for j in indexs:
                P[number][j] = 0
            number +=1
        # input should be a distribution in the log space
        q = F.log_softmax(Q)
        # Sample a batch of distributions. Usually this would come from the dataset
        p = F.softmax(P)
        loss = self.KL(q, p)
        return loss

In [15]:
#to(device)将模型加入GPU中加速计算
model = X_model().to(device)
#设置优化器
mcml_loss = MCML_Loss().to(device)

params = list(model.parameters()) + list(mcml_loss.parameters())
optimizer = optim.AdamW(params, lr=lr)

# optimizer = optim.AdamW(model.parameters(), lr=lr)
#设置损失函数
criteon = nn.CrossEntropyLoss().to(device)
#余弦衰减学习率
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=80, eta_min=0)
#形如TensorFlow中的summary函数输出模型参数
# summary(model, input_size=[(3, 224, 224)], batch_size=batch_size, device="cuda")
print(model)

X_model(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): ECABasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (eca): eca_layer(
            (avg_pool): AdaptiveAvgPool2d(output_size=1)
            (conv): Conv1d(1, 1, kernel_size=(3,), stride=(1,), padding=(1,), bias=False

In [16]:
#测试函数
def evalute_(model,val_loader):
    model.eval()
    test_loss2 = 0.0
    test_corrects2 = 0.0
    number = 0
    for batchidx,(x,label,division_list,pn_list) in enumerate(val_loader):
#         print(number)
    #torch.cuda.empty_cache()  #清除非必要GPU缓存，但是我建议不要在训练中使用此句，这可能会损失你相当多的时间
        number = number + 1
        x, label = x.to(device), label.to(device)
        #测试函数中加入no_grad()，如果不加会增加计算和显存
        with torch.no_grad():
            y1 = model(x,division_list,pn_list)
            #虽然可以直接使用max函数，但是我建议在y1的比较重你最好使用F.softmax(y1,dim=1)，这样可能会有更好的效果，我在训练中使用了它
            _, preds1 = torch.max(F.softmax(y1,dim=1), 1)
            loss = criteon(y1, label)  
            
            test_loss2 += loss.item()*batch_size
            test_corrects2 += torch.sum(preds1 == label.data)
    #由于使用了最后一次抛弃，我不能使用全部测试集作为分母，这样会使最后的准确率变小
    test_loss1 = test_loss2 / (number*batch_size)
    test_acc1 = test_corrects2.double() / (number*batch_size)
#     print("TestDataset loss is ", test_loss1,"TestDataset accuracy is ",test_acc1)
    return test_acc1, test_loss1
print("执行结束")

执行结束


In [17]:
#关于AMP自动精度求解，我也并不是很熟悉，只能使用官方给的实例进行照葫芦画瓢。
scaler = torch.cuda.amp.GradScaler()
torch.cuda.empty_cache()
for epoch in range(D_epoch, epochs):
    time_one = time.time()                         #标记训练开始时间戳
    train_acc1 = 0.0
    train_loss1 = 0.0
    train_acc = 0.0
    train_loss = 0.0
    
    val_acc = 0.0
    val_loss = 0.0
    number = 0
    model.train()
    print("epoch:",epoch)

    for batchidx , (x,label,division_list,pn_list) in enumerate(data_train):
        x = x.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            y1 = model(x,division_list,pn_list)
            loss = criteon(y1,label)  + mcml_loss(y1,label)
        _, preds1 = torch.max(F.softmax(y1,dim=1), 1)
        #AMP优化
        scaler.scale(loss).backward()
#         loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = 5, norm_type=2)  #梯度剪裁
        scaler.step(optimizer)
#         optimizer.step()
        scaler.update()
        train_loss1 += loss.item()*batch_size
        train_acc1 += torch.sum(preds1 == label.data).double()
        number = number + 1
    time_two = time.time()             #标记训练结束时间戳
    #输出训练一轮所需时间，用于分析对比
    print("The Model-Train-Time spent  %d min %.2f s"%((time_two-time_one)//60,(time_two-time_one)%60))
    #计算训练时候的平均损失和平均准确率
    train_loss = train_loss1 / (number*batch_size)
    train_acc = train_acc1 / (number*batch_size)
    #计算测试时候的平均损失和平均准确率
    val_acc, val_loss = evalute_(model, data_val)
    
    train_acc = train_acc.cpu()
    val_acc = val_acc.cpu()
    print('Accuracy : Train is {} , Valid is {} ;  Loss : Train is  {} ,Valid is {}'.format(train_acc, val_acc, train_loss , val_loss))
    #如果你不需要训练以及验证的准确率和损失值，你可以注释这下面的两行，它们不是非必须的，理论上只存在于汇报和论文中
    dataframe = pd.DataFrame(columns = [epoch,train_acc,train_loss,val_acc, val_loss])
    dataframe.to_csv(data_csv_path,line_terminator="\n",mode='a',index=False,sep=',')
    if val_acc > best_acc:
        print("覆盖最好的模型...")
        best_acc = val_acc 
        checkpoint = {
            'epoch': epoch,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'best_acc':best_acc
        }
        torch.save(checkpoint,model_save_path)
#     time_three = time.time() 
#     print("测试花费时间",time_three-time_two)
    scheduler.step()  #动态学习率更新 
#如果你不是非必须，我建议你尽量不要使用n折交叉验证，使用数据增强可能效果更优于它。

epoch: 0
The Model-Train-Time spent  0 min 15.98 s
Accuracy : Train is 0.5377604166666666 , Valid is 0.65625 ;  Loss : Train is  1.6206334233283997 ,Valid is 1.2676005363464355
覆盖最好的模型...
epoch: 1
The Model-Train-Time spent  0 min 10.96 s
Accuracy : Train is 0.7252604166666666 , Valid is 0.6796875 ;  Loss : Train is  1.1117705702781677 ,Valid is 0.9398254156112671
覆盖最好的模型...
epoch: 2
The Model-Train-Time spent  0 min 10.78 s
Accuracy : Train is 0.7526041666666666 , Valid is 0.75 ;  Loss : Train is  0.8566168447335561 ,Valid is 0.7502306699752808
覆盖最好的模型...
epoch: 3
The Model-Train-Time spent  0 min 10.73 s
Accuracy : Train is 0.7643229166666666 , Valid is 0.8046875 ;  Loss : Train is  0.7240327696005503 ,Valid is 0.6163817048072815
覆盖最好的模型...
epoch: 4
The Model-Train-Time spent  0 min 10.69 s
Accuracy : Train is 0.7916666666666666 , Valid is 0.8046875 ;  Loss : Train is  0.6521100699901581 ,Valid is 0.5340917110443115
epoch: 5
The Model-Train-Time spent  0 min 10.62 s
Accuracy : Train 

The Model-Train-Time spent  0 min 11.44 s
Accuracy : Train is 0.8880208333333333 , Valid is 0.890625 ;  Loss : Train is  0.36868780851364136 ,Valid is 0.2866174578666687
epoch: 47
The Model-Train-Time spent  0 min 11.13 s
Accuracy : Train is 0.8802083333333333 , Valid is 0.875 ;  Loss : Train is  0.36482128004233044 ,Valid is 0.3401562571525574
epoch: 48
The Model-Train-Time spent  0 min 10.88 s
Accuracy : Train is 0.8971354166666666 , Valid is 0.8828125 ;  Loss : Train is  0.35869719088077545 ,Valid is 0.3266463875770569
epoch: 49
The Model-Train-Time spent  0 min 10.83 s
Accuracy : Train is 0.8841145833333333 , Valid is 0.8671875 ;  Loss : Train is  0.3650728911161423 ,Valid is 0.398722380399704
epoch: 50
The Model-Train-Time spent  0 min 10.79 s
Accuracy : Train is 0.88671875 , Valid is 0.8984375 ;  Loss : Train is  0.3521818319956462 ,Valid is 0.3257908821105957
epoch: 51
The Model-Train-Time spent  0 min 10.75 s
Accuracy : Train is 0.88671875 , Valid is 0.90625 ;  Loss : Train is 

The Model-Train-Time spent  0 min 10.80 s
Accuracy : Train is 0.8854166666666666 , Valid is 0.8984375 ;  Loss : Train is  0.3488479753335317 ,Valid is 0.33453434705734253
epoch: 94
The Model-Train-Time spent  0 min 10.92 s
Accuracy : Train is 0.890625 , Valid is 0.8671875 ;  Loss : Train is  0.34757956862449646 ,Valid is 0.3846011757850647
epoch: 95
The Model-Train-Time spent  0 min 10.78 s
Accuracy : Train is 0.8854166666666666 , Valid is 0.8984375 ;  Loss : Train is  0.3597580095132192 ,Valid is 0.3486541509628296
epoch: 96
The Model-Train-Time spent  0 min 10.81 s
Accuracy : Train is 0.8932291666666666 , Valid is 0.875 ;  Loss : Train is  0.3413452406724294 ,Valid is 0.35209447145462036
epoch: 97
The Model-Train-Time spent  0 min 10.81 s
Accuracy : Train is 0.8880208333333333 , Valid is 0.890625 ;  Loss : Train is  0.3461432059605916 ,Valid is 0.36091986298561096
epoch: 98
The Model-Train-Time spent  0 min 10.80 s
Accuracy : Train is 0.8815104166666666 , Valid is 0.875 ;  Loss : Tra

The Model-Train-Time spent  0 min 10.81 s
Accuracy : Train is 0.9036458333333333 , Valid is 0.890625 ;  Loss : Train is  0.31103049715360004 ,Valid is 0.32292240858078003
epoch: 140
The Model-Train-Time spent  0 min 10.82 s
Accuracy : Train is 0.9049479166666666 , Valid is 0.875 ;  Loss : Train is  0.30449581642945606 ,Valid is 0.3490264415740967
epoch: 141
The Model-Train-Time spent  0 min 10.81 s
Accuracy : Train is 0.9075520833333333 , Valid is 0.890625 ;  Loss : Train is  0.3107470323642095 ,Valid is 0.3693159222602844
epoch: 142
The Model-Train-Time spent  0 min 10.68 s
Accuracy : Train is 0.9140625 , Valid is 0.8984375 ;  Loss : Train is  0.2975005855162938 ,Valid is 0.35267776250839233
epoch: 143
The Model-Train-Time spent  0 min 10.71 s
Accuracy : Train is 0.8997395833333333 , Valid is 0.8671875 ;  Loss : Train is  0.2973930438359578 ,Valid is 0.3414459228515625
epoch: 144
The Model-Train-Time spent  0 min 10.65 s
Accuracy : Train is 0.91015625 , Valid is 0.875 ;  Loss : Train 

Accuracy : Train is 0.9309895833333333 , Valid is 0.8828125 ;  Loss : Train is  0.24793036778767905 ,Valid is 0.3141951262950897
epoch: 186
The Model-Train-Time spent  0 min 10.86 s
Accuracy : Train is 0.9244791666666666 , Valid is 0.8828125 ;  Loss : Train is  0.2647332052389781 ,Valid is 0.2938065826892853
epoch: 187
The Model-Train-Time spent  0 min 10.80 s
Accuracy : Train is 0.9375 , Valid is 0.875 ;  Loss : Train is  0.24368350704511008 ,Valid is 0.32655853033065796
epoch: 188
The Model-Train-Time spent  0 min 10.81 s
Accuracy : Train is 0.9296875 , Valid is 0.875 ;  Loss : Train is  0.2568406363328298 ,Valid is 0.27299824357032776
epoch: 189
The Model-Train-Time spent  0 min 10.78 s
Accuracy : Train is 0.93359375 , Valid is 0.890625 ;  Loss : Train is  0.23538041363159815 ,Valid is 0.29823586344718933
epoch: 190
The Model-Train-Time spent  0 min 10.81 s
Accuracy : Train is 0.9283854166666666 , Valid is 0.8984375 ;  Loss : Train is  0.25285593916972476 ,Valid is 0.272047579288482