# Import Packages

In [1]:
import os
!nvidia-smi
%config Completer.use_jedi = False # 运行后在敲语句时按tab即可查看补全
#pip install --upgrade d2l   在 Console 中运行导入 李沐老师开发的 d2l 库
from d2l import torch as d2l
#pip install einops  导入能优雅处理张量的 einops 库
#from einops import rearrange, repeat, reduce

Thu Jun  6 08:51:06 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   72C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla T4                       Off | 00000000:00:05.0 Off |  

In [2]:
import math
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.nn.init as init # 高斯分布初始化
# import keras.backend as K

import torch.optim as optim
from torch.optim.lr_scheduler import StepLR # 时间衰减学习率

import torchvision
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
# 导入tqdm库的auto模块中的tqdm函数，它是一个快速、可扩展的Python进度条库，可以在长循环中添加一个进度提示信息，用户可以实时了解程序的运行进度。
import random

_exp_name = "WSCNet_sample"

myseed = 114514  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed) #设置随机种子打乱
random.seed(myseed) #设置随机种子打乱
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

device = "cuda" if torch.cuda.is_available() else "cpu"

# Transforms

这些增强操作可以帮助你的模型在训练时看到更多样化的数据，从而提高其泛化能力。

In [3]:
# 将输入图像预处理加工，防止训练过拟合
# 在测试时通常不需要这些增强操作，因为你想评估模型在未经修改的数据上的性能。
test_tfm = transforms.Compose([
    transforms.Resize((448, 448)),
    # 图像太大也会产生内存超限报错
    transforms.ToTensor(),
])
train_tfm = transforms.Compose([
    #transforms.RandomVerticalFlip(p=0.2), # 随机垂直翻转图像，概率为0.4。
    transforms.RandomResizedCrop(size=(448, 448), scale=(0.08, 1.0), ratio=(3./4., 4./3.)),
    transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转图像，概率为0.5。
    #transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # 随机调整图像的亮度、对比度、饱和度和色相。
    # ToTensor() should be the last one of the transforms. 最后转为张量
    transforms.ToTensor(),
])

# MyDataset

In [4]:
class MyDataset(Dataset):
    def __init__(self, path, tfm = test_tfm, files = None):
        super(MyDataset).__init__()
        
        self.my_dict = {'anger': '0', 'disgust': '1', 'fear': '2', 'joy': '3', 
                        'sadness': '4', 'surprise': '5'}
        self.files = []
        self.transform = tfm

        path = os.path.join('/kaggle/input/emotionroi/training_testing_split', path) 
        
        # 检查文件是否存在
        if os.path.exists(path):
            # 打开文件
            with open(path, 'r') as file:
                # 读取文件的每一行
                for line in file:
                    line = line.strip().replace('_', '/') 
                    # strip() 方法用于移除字符串头尾的空白字符，包括空格、制表符、换行符等
                    # 将下划线 "_" 替换为斜杠 "/"
                    self.files.append(line)
        else:
            print(f"文件不存在：{path}")
        
        if files != None:
            self.files = files

    def __len__(self):
        return len(self.files)

    def __getitem__(self,idx): # 通过下标找文件
                
        label = int(self.my_dict[self.files[idx].split("/")[0]]) # label 就是 int 类型！！！

        fname = os.path.join('/kaggle/input/emotionroi/images', self.files[idx])

        im = Image.open(fname) # 开启图像文件
        im = self.transform(im) # transform转化文件

        return im,label # 返回图像和标签

# Dataloader

In [5]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
path = 'training.txt'
train_set = MyDataset(path, tfm=train_tfm) # 训练集的图像变换
train_loader = DataLoader(train_set, batch_size=train_set.__len__(), shuffle=True, num_workers=0, pin_memory=True)    
# torch.Size([1386, 3, 256, 256])
# torch.Size([1386])

# WSCNet Model

In [6]:
class Self_Attention(nn.Module):
    def __init__(self, dim, dk, dv, dropout):
        super(Self_Attention, self).__init__()
        self.scale = dk ** -0.5
        self.q = nn.Linear(dim, dk)
        self.k = nn.Linear(dim, dk)
        self.v = nn.Linear(dim, dv)
        self.dropout = nn.Dropout(dropout)
        init.normal_(self.q.weight, mean=0, std=math.sqrt(2.0/dim))
        init.normal_(self.k.weight, mean=0, std=math.sqrt(2.0/dim))
        init.normal_(self.v.weight, mean=0, std=math.sqrt(2.0/dim))
    
    def forward(self, x):
        # x = [Batch, x[1], dim] = [Batch, token数量, token内部向量长度]
        q = self.q(x)
        k = self.k(x)
        v = self.v(x)
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        x = self.dropout(attn) @ v # @是矩阵乘法，*是逐元素相乘
        return x # [Batch, x[1], dv] 

class Cross_spatial_pooling(nn.Module):
    def __init__(self):
        super(Cross_spatial_pooling, self).__init__()

    def forward(self, out, Batch, C, K):
        out1 = nn.AdaptiveMaxPool2d(output_size=(1, 1))(out) # 全局最大池化得到 [B, kC, 1, 1]
        out1 = out1.view(Batch, -1) # 转化向量, -1表示自动计算长度，[B, kC, 1, 1] -> [B, kC*1*1]
        
        out1 = out1.view(Batch, C, K)
        # 首先，我们将张量重新形状为 [B, C, K]，这样我们可以沿着最后一个维度取平均值
        
        #out1 = out1.mean(dim=2)
        #然后，我们沿着最后一个维度取平均值，得到形状为 [B, C] 的张量
        return out1

class Sentiment_map_Coupling(nn.Module):
    def __init__(self):
        super(Sentiment_map_Coupling, self).__init__()

    def forward(self, Res_out, out, out1, Batch, C, K, N, H, W):
        Vi = out1.unsqueeze(-1).unsqueeze(-1) # 情感权重 Vi = [B, C, 1, 1]
        # 然后，我们将 out 分割为 C 个部分，每个部分对应一个类别
        # 这可以通过重塑 out 为 [B, C, K, H, W] 来实现，重塑yyds！
        out_reshaped = out.view(Batch, C, K, H, W)  # [B, C, K, H, W]
        # 接下来，我们对每个类别的 K 个通道取平均值，这可以通过在 K 维度上应用 mean 函数来实现
        St_map_subgraph = out_reshaped.mean(dim=2)  # 情感子图 = [B, C, H, W]
        Vi = Vi.repeat(1, 1, St_map_subgraph.size(2), St_map_subgraph.size(3))
        
        # 现在，将 out1 拓展后的 Vi 作为权值乘上 St_map_subgraph
        St_map = St_map_subgraph * Vi  # [B, C, H, W]
        
        St_map = St_map.sum(dim=1)  # [B, H, W]
        # 最后，我们可以通过 sum 在 C 维度上合并所有的类别贡献
        St_map = St_map.unsqueeze(1).repeat(1, N, 1, 1)
        # 层数不足则要用 unsqueeze + repeat，每个参数对应每个维度的倍数，会复制数据

        U_out = Res_out * St_map  # [B, N, H, W]
        # 耦合，两个通道数拼接起来 2N [B, 2N, H, W] 
        out2 = torch.cat((U_out, Res_out), dim = 1)
        out2 = nn.AdaptiveAvgPool2d(output_size=(1, 1))(out2) # [B, 2N, 1, 1]
        out2 = out2.view((Batch, -1)) # [B, 2N]
        # 报错显示Linear未放入device，要手动放入
        return out2
    
class WSCNet(nn.Module):
    def __init__(self, K = 4, C = 6):
        super().__init__()

        self.K = K
        self.C = C
        self.Conv2d = nn.Conv2d(in_channels=2048, out_channels=self.K * self.C, kernel_size=1)
        self.linear = nn.Linear(in_features=4096, out_features=self.C)
        init.normal_(self.Conv2d.weight, mean=0, std=math.sqrt(1.0/2048))
        init.normal_(self.linear.weight, mean=0, std=math.sqrt(2.0/4096))
        self.Attention = Self_Attention(dim=4, dk=2, dv=4, dropout=0.5)
        
        self.Conv1x1 = nn.Sequential(
            self.Conv2d,
            nn.BatchNorm2d(self.K * self.C),
            nn.ReLU(inplace=True),
        )
        self.Cross_spatial_pooling = Cross_spatial_pooling()
        self.Sentiment_map_Coupling = Sentiment_map_Coupling()

    def forward(self, Res_out):

        Batch = Res_out.shape[0]
        N = Res_out.shape[1]
        H = Res_out.shape[2]
        W = Res_out.shape[3]
        
        # 检测分支————————————————————————————————————————————————————————
        out = self.Conv1x1(Res_out) # [B, kC, h, w]   F'矩阵
        out1 = self.Cross_spatial_pooling(out, Batch, self.C, self.K) # [B, C, K]
        
        out1 = self.Attention(out1) # [B, C, K]
        out1 = out1.mean(dim=-1) # [B, C]
        
        D_branch = out1
        
        # 分类分支————————————————————————————————————————————————————————
        # 使用矩阵运算来代替循环，这样才可以在GPU中加速！！！从每epoch 12分钟到 30 秒......
        # out 是 shape [B, K*C, H, W] 的张量, out1 是 shape [B, C] 的张量
        out2 = self.Sentiment_map_Coupling(Res_out, out, out1, Batch, self.C, self.K, N, H, W)
        C_branch = self.linear(out2) #[B, C]

        return D_branch, C_branch # 返回两个 [B, C] 张量

# Net and Loss and Accuracy

In [7]:
get_loss = nn.CrossEntropyLoss() # 交叉熵损失函数

def get_net():
    pretrained_weights = torch.load('/kaggle/input/resnet101-tranlearn/resnet101-5d3b4d8f.pth')
    resnet101 = torchvision.models.resnet101(weights = pretrained_weights)# 预训练
    resnet101 = nn.Sequential(*list(resnet101.children())[:-2])
    net = nn.Sequential(resnet101, WSCNet(K=4, C=6))
    return net
#get_net()

In [8]:
def get_acc(features, labels):
    # 为了在取对数时进一步稳定该值，将小于1的值设置为1
    _, predicted = torch.max(features, 1)
    correct = predicted.eq(labels).cpu().sum().numpy()
    return 100.0 * correct / features.size(0)

# Train function

In [9]:
def train(net, train_features, train_labels, valid_features, valid_labels,
          num_epochs, batch_size):
    
    train_ls, train_acc, valid_ls, valid_acc = [], [], [], [] # 训练和验证
    train_iter = d2l.load_array((train_features, train_labels), batch_size)
    if valid_features is not None:
        valid_iter = d2l.load_array((valid_features, valid_labels), batch_size)
    
    # 这里使用的是Adam优化算法
    optimizer = torch.optim.SGD(
        [
            
            {"params": net[0].parameters(), "lr": 0.00005},
            {"params": net[1].Attention.parameters(), "lr": 0.001},
            {"params": net[1].Conv1x1.parameters(), "lr": 0.001},
            {"params": net[1].linear.parameters(), "lr": 0.01},
        ],
        weight_decay=5e-3, momentum=0.9)#momentum=0.9 Adam_lr = 1e-5
    # 先用 Adam 后用 SGD 可能有更好效果 lr = 0.0003，Adam的效果比SGD好多了，默认 lr = 0.0003 最佳？
    
    scheduler = StepLR(optimizer, step_size=6, gamma=0.1) # step_size 适当缩小
    # 创建一个 StepLR scheduler，每10个epoch将学习率乘以0.1
    
    for epoch in range(num_epochs):
          
        avg_loss = 0
        avg_acc = 0
        step = 0
        
        net.train()
        for imgs, labels in tqdm(train_iter):
            
            imgs = imgs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            dy, cy = net(imgs)
            
            loss1 = get_loss(dy, labels) # label 就是要 int 型！
            loss2 = get_loss(cy, labels)
            Loss = loss1 + loss2
            
            avg_loss += Loss.item() # 数值都转到cpu，不然会存GPU值，有些运算报错
            avg_acc += get_acc(cy, labels)
            step += 1
            
            Loss.backward()
            optimizer.step()
            
        train_ls.append(avg_loss/step)
        train_acc.append(avg_acc/step)      
            
        if valid_features is not None:
            avg_loss = 0
            avg_acc = 0
            step = 0
            
            net.eval()
            for imgs, labels in tqdm(valid_iter):

                imgs = imgs.to(device)
                labels = labels.to(device)
                
                with torch.no_grad():
                    dy, cy = net(imgs)

                loss1 = get_loss(dy, labels)
                loss2 = get_loss(cy, labels)
                Loss = loss1 + loss2
                
                avg_loss += Loss.item()
                avg_acc += get_acc(cy, labels)
                step += 1
                
            valid_ls.append(avg_loss/step)
            valid_acc.append(avg_acc/step)
            
        scheduler.step() # 每8个epoch结束后调整学习率, Adam也可以调整
        current_lr = scheduler.get_last_lr()
        print(f"Epoch {epoch + 1}, Current Learning Rate: \n{current_lr}")
        print(f"train_ls: {train_ls[-1]}", end = " ")
        print(f"train_acc: {train_acc[-1]}")
        
        if valid_features is not None:
            print(f"valid_ls: {valid_ls[-1]}", end = " ")
            print(f"valid_acc: {valid_acc[-1]}")
        
    return train_ls, valid_ls, train_acc, valid_acc

# K折交叉验证

In [10]:
def get_k_fold_data(k, i, X, y):
    assert k > 1
    fold_size = X.shape[0] // k
    X_train, y_train = None, None
    
    for j in range(k):
        idx = slice(j * fold_size, (j + 1) * fold_size) # 创造切片对象
        X_part, y_part = X[idx, :, :, :], y[idx]
        
        if j == i:
            X_valid, y_valid = X_part, y_part
        elif X_train is None:
            X_train, y_train = X_part, y_part
        else:
            X_train = torch.cat([X_train, X_part], 0)
            y_train = torch.cat([y_train, y_part], 0)
            
    return X_train, y_train, X_valid, y_valid

In [11]:
def k_fold(net, k, X_train, y_train, num_epochs, batch_size):
    
    train_l_sum, valid_l_sum, train_acc_sum, valid_acc_sum = 0, 0, 0, 0
    
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train) # K折处理数据
        
        train_ls, valid_ls, train_acc, valid_acc = train(net, *data, num_epochs, batch_size) # 训练函数
        
        train_l_sum += train_ls[-1]
        train_acc_sum += train_acc[-1]
        valid_l_sum += valid_ls[-1]
        valid_acc_sum += valid_acc[-1]
        
        if i == 0:
            print("大折")
            d2l.plot(list(range(1, num_epochs + 1)), [train_ls, train_acc, valid_ls, valid_acc], # 合并两组数据到一个列表
                     xlabel='epoch', ylabel='num', xlim=[1, num_epochs],
                     legend=['train_ls', 'train_acc', 'valid_ls', 'valid_acc'], yscale='log')
            
        print(f'折{i + 1}，训练log CEloss: {float(train_ls[-1])}')
        print(f'验证log CEloss: {float(valid_ls[-1])}')
        
        print(f'折{i + 1}，训练log accuracy: {float(train_acc[-1])}')
        print(f'验证log accuracy: {float(valid_acc[-1])}')
        
    return train_l_sum / k, valid_l_sum / k, train_acc_sum / k, valid_acc_sum / k

# Start Training

In [12]:
train_imgs, train_labels = None, None
for batch in tqdm(train_loader):
    train_imgs, train_labels = batch

  0%|          | 0/1 [00:00<?, ?it/s]

In [13]:
# The number of batch size. 第一维
batch_size = 22

# The number of training epochs.
num_epochs = 16

k = 1 # K折

net = get_net().to(device)# 得到训练网络！

data = [train_imgs, train_labels, None, None]
train_ls, valid_ls, train_acc, valid_acc = train(net, *data, num_epochs, batch_size)



  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1, Current Learning Rate: 
[5e-05, 0.001, 0.001, 0.01]
train_ls: 4.232204520513141 train_acc: 34.34343434343434


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2, Current Learning Rate: 
[5e-05, 0.001, 0.001, 0.01]
train_ls: 3.546270586195446 train_acc: 48.05194805194807


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3, Current Learning Rate: 
[5e-05, 0.001, 0.001, 0.01]
train_ls: 3.441809616391621 train_acc: 50.793650793650784


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 4, Current Learning Rate: 
[5e-05, 0.001, 0.001, 0.01]
train_ls: 3.3826951904902383 train_acc: 52.38095238095236


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 5, Current Learning Rate: 
[5e-05, 0.001, 0.001, 0.01]
train_ls: 3.1073183559236073 train_acc: 59.595959595959556


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 6, Current Learning Rate: 
[5e-06, 0.0001, 0.0001, 0.001]
train_ls: 2.7984546441880482 train_acc: 65.007215007215


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 7, Current Learning Rate: 
[5e-06, 0.0001, 0.0001, 0.001]
train_ls: 2.4316055623311845 train_acc: 76.11832611832614


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 8, Current Learning Rate: 
[5e-06, 0.0001, 0.0001, 0.001]
train_ls: 2.3277310076214017 train_acc: 79.43722943722948


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 9, Current Learning Rate: 
[5e-06, 0.0001, 0.0001, 0.001]
train_ls: 2.328959480164543 train_acc: 80.37518037518045


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 10, Current Learning Rate: 
[5e-06, 0.0001, 0.0001, 0.001]
train_ls: 2.316531593837435 train_acc: 81.16883116883116


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 11, Current Learning Rate: 
[5e-06, 0.0001, 0.0001, 0.001]
train_ls: 2.260740030379522 train_acc: 83.98268398268398


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 12, Current Learning Rate: 
[5.000000000000001e-07, 1e-05, 1e-05, 0.0001]
train_ls: 2.25217238305107 train_acc: 82.97258297258298


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 13, Current Learning Rate: 
[5.000000000000001e-07, 1e-05, 1e-05, 0.0001]
train_ls: 2.2819685406155057 train_acc: 83.62193362193364


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 14, Current Learning Rate: 
[5.000000000000001e-07, 1e-05, 1e-05, 0.0001]
train_ls: 2.240186479356554 train_acc: 84.63203463203462


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 15, Current Learning Rate: 
[5.000000000000001e-07, 1e-05, 1e-05, 0.0001]
train_ls: 2.2459323973882768 train_acc: 84.48773448773451


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch 16, Current Learning Rate: 
[5.000000000000001e-07, 1e-05, 1e-05, 0.0001]
train_ls: 2.2470588305639843 train_acc: 84.12698412698411


# Dataloader for Test

In [14]:
path = 'testing.txt'
#path = 'training.txt'
test_set = MyDataset(path, tfm=test_tfm) # 测试
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, 
                         num_workers=0, pin_memory=True)

# Testing

In [15]:
avg_loss = 0
avg_acc = 0
step = 0

test_ls, test_acc = [], []

net.eval()
for batch in tqdm(test_loader):

    imgs, labels = batch
    imgs = imgs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        dy, cy = net(imgs)

    loss1 = get_loss(dy, labels)
    loss2 = get_loss(cy, labels)
    Loss = loss1 + loss2

    avg_loss += Loss.item()
    avg_acc += get_acc(cy, labels)
    step += 1

test_ls.append(avg_loss/step)
test_acc.append(avg_acc/step)
print(f"test_ls: {test_ls[-1]}", end = " ")
print(f"test_acc: {test_acc[-1]}")

  0%|          | 0/27 [00:00<?, ?it/s]

test_ls: 3.3000311763198287 test_acc: 52.86195286195287
