In [1]:
import os
from glob import glob
import time
import imageio
import math
import numpy
from operator import itemgetter
import pickle
import argparse
from PIL import Image
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss
import gc
import shutil

import numpy
from transformers import AutoTokenizer
import numpy
import torch
from torch import nn
from transformers import BertModel
import os
import pickle
import warnings

import torch
from torch import nn
import torch.nn.functional
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
import torch.utils.data as data
import torchvision
from torchvision import transforms


read_images_dir = "/kaggle/input/dataset_images/"  # 原始图片
save_array_dir = "/kaggle/input/imagevector2/"  # 生成后的向量
save_images_dir = "/kaggle/input/prevectorimages/"  # 统一尺寸后的图片

wordPrefix = "/kaggle/input/extract/"  # 每个图片的物品类别 ["id", "class_name" * 5]
dataPrefix = "/kaggle/input/text---/"  # 图片对应的文本 ["id", "text", "is_sarcasm"]
imagePrefix = "/kaggle/input/imageVector/"  # 图片的对应区域向量 id.npy
wordsPrefix = "/kaggle/input/words-/"  # 词表
imageClassDir = "/kaggle/input/extractwords/"  # 类名对应的编号和GLove向量
classEmbeddingDir = "/kaggle/input/extractwords/vector"  # 训练完成的嵌入式向量
textEmbeddingDir = "/kaggle/input//words-/vector"
imageVectorDir = "/kaggle/input/imagevector2/imageVector2/"  # 图片向量的存储目录
modelWightsDir = "/kaggle/input/modelwights/"  # 模型权重
saveModelWightsDir = "/kaggle/working/modelwights/"

if not os.path.exists(saveModelWightsDir):
    os.mkdir(saveModelWightsDir)




### Residual

In [2]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/10 21:48
# @Author  : CaoQixuan
# @File    : ResNet.py
# @Description : 这个文件是用来获得预训练模型的 downsample变量不能改为其他名字，服了

class Residual(nn.Module):
    """ 残差块 -50"""
    expansion = 4  # 残差块第3个卷积层的通道膨胀倍率

    def __init__(self, in_channel, out_channel, stride=1, down_sample=None, use_1x1conv=False):
        """
        :param in_channel:残差块输入通道数
        :param out_channel:残差块输出通道数
        :param stride:卷积步长
        :param down_sample:在_make_layer函数中赋值，用于控制shortcut图片下采样 H/2 W/2
        这里的意思是 在整个卷积层的开始时，会发生 H/2 W/2
        :param use_1x1conv:
        """
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1,
                               bias=False)  # H,W不变: in_channel -> out_channel
        self.bn1 = nn.BatchNorm2d(num_features=out_channel)

        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride,
                               padding=1, bias=False)  # H/2，W/2 C不变
        self.bn2 = nn.BatchNorm2d(num_features=out_channel)

        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion, kernel_size=1,
                               stride=1, bias=False)  # H,W不变 C: out_channel -> 4*out_channel
        self.bn3 = nn.BatchNorm2d(num_features=out_channel * self.expansion)

        self.relu = nn.ReLU(inplace=True)

        self.downsample = down_sample

    def forward(self, X):
        X_res = X
        if self.downsample is not None:
            X_res = self.downsample(X_res)
        output = self.relu(self.bn1(self.conv1(X)))
        output = self.relu(self.bn2(self.conv2(output)))
        output = self.bn3(self.conv3(output))
        output += X_res  # 残差连接
        return self.relu(output)


class ResNet(nn.Module):
    def __init__(self, block, block_num, num_classes=1000):
        """
        :param block:堆叠的基本模块
        :param block_num:基本模块堆叠个数,是一个list,对于resnet50=[3,4,6,3]
        :param num_classes:num_classes: 全连接之后的分类特征维度
        """
        super(ResNet, self).__init__()
        self.in_channel = 64  # conv1的输出通道数
        # 网络开始 224 * 224-> 112 * 112
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.in_channel, kernel_size=7, stride=2, padding=3,
                               bias=False)  # H/2,W/2。C:3->64
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        # 网络开始 112 * 112-> 56 * 56
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self.resnet_block(block=block, channel=64, block_num=block_num[0],
                                        stride=1)  # H W 不变 不需要下采样
        self.layer2 = self.resnet_block(block=block, channel=128, block_num=block_num[1],
                                        stride=2)  # H W 减半 50 101 150 需要下采样
        self.layer3 = self.resnet_block(block=block, channel=256, block_num=block_num[2],
                                        stride=2)  # H W 减半 50 101 150 需要下采样
        self.layer4 = self.resnet_block(block=block, channel=512, block_num=block_num[3],
                                        stride=2)  # H W 减半 50 101 150 需要下采样

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))  # 将每张特征图大小->(1,1)，则经过池化后的输出维度=通道数
        self.fc = nn.Linear(in_features=512 * block.expansion, out_features=num_classes)

        for m in self.modules():  # 权重初始化
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def resnet_block(self, block, channel, block_num, stride=1):
        """
        :param block: 堆叠的基本模块
        :param channel:基本模块堆叠个数,是一个list,对于resnet50=[3,4,6,3]
        :param block_num:当期stage堆叠block个数
        :param stride: 默认卷积步长
        :return: 生成的blocks
        """
        downsample = None  # 用于控制下采样的 即减半的
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels=self.in_channel, out_channels=channel * block.expansion, kernel_size=1,
                          stride=stride, bias=False),  # out_channels决定输出通道数x4，stride决定特征图尺寸H,W/2
                nn.BatchNorm2d(num_features=channel * block.expansion))

        blocks = []
        blocks.append(block(in_channel=self.in_channel, out_channel=channel, down_sample=downsample,
                            stride=stride))  # 定义convi_x中的第一个残差块，只有第一个需要设置down_sample和stride
        self.in_channel = channel * block.expansion  # 在下一次调用_make_layer函数的时候，self.in_channel已经x4
        for _ in range(1, block_num):  # 通过循环堆叠其余残差块(堆叠了剩余的block_num-1个)
            blocks.append(block(in_channel=self.in_channel, out_channel=channel))
        return nn.Sequential(*blocks)

    def forward(self, X):
        output = self.max_pool(self.bn1(self.bn1(self.conv1(X))))

        output = self.layer1(output)
        output = self.layer2(output)
        output = self.layer3(output)
        output = self.layer4(output)

        output = self.avg_pool(output)
        output = torch.flatten(output, 1)
        # output = self.fc(output)

        return output

In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/11 15:34
# @Author  : CaoQixuan
# @File    : ImageVector.py
# @Description : 获得每个图片的原始特征向量
# 该类是通过输入的标准型图片，进行多个分region后通过resnet网络得到向量后平均，生成图片模态向量


class ImageFeature(nn.Module):

    def __init__(self, net, block_num=196, kernel_size=64, stride=32, output_size=2048, in_channel=3):
        super().__init__()
        self.output_size = output_size  # 输出特征向量长度
        self.net = net  # 网络
        self.block_num = block_num  # 生成块数
        self.kernel_size = kernel_size  # 块的大小
        self.stride = stride  # 步长
        self.in_channel = in_channel  # 输入通道数

    def forward(self, input):
        batch_size, in_channel = input.shape[0], input.shape[1]
        # print("input_size: ", input.shape)
        output = nn.Unfold(kernel_size=(self.kernel_size, self.kernel_size), stride=self.stride)(input)
        # print("unfold_size: ", output.shape)
        output = output.transpose(1, 2).reshape(-1, in_channel, self.kernel_size,
                                                self.kernel_size)  # 一个图片划分为多个Region (batch_size * block_num, channel, kernel_size, kernel_size)

        output = self.net.forward(output).reshape(batch_size,
                                                  self.block_num,
                                                  self.output_size)  # 输入resNet网络后得到 (batch_size, block_num, h, w)

        # # 这里的向量平均化在tensorflow的网络里做了  # # # #
        # h = self.output_size // 64  # 这里是加速运算效果，输出默认是2048
        # w = 64
        # output = output.reshape(batch_size, -1, self.kernel_size, self.kernel_size)
        # filters = torch.ones(1, output.shape[1], 1, 1) * 1.0 / output.shape[1]  # 生成过滤器
        # output = F.conv2d(input=output, weight=filters, groups=1)  # 卷积
        # # # # #
        return output  # 返回向量


In [4]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/5/5 10:50
# @Author  : CaoQixuan
# @File    : Function.py
# @Description :常用的函数

def try_gpu(i=0):  # @save
    """如果存在，则返回gpu(i)，否则返回cpu()"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')


def getScore(y_true, y_pred, threshold=0.5):
    """
    :param y_true:
    :param y_pred:
    :param threshold: 阈值
    :return:
    """
    # y_true = y_true.flatten()
    # y_pred = (y_pred.flatten() > threshold).type(torch.float32)
    auc = roc_auc_score(y_true, y_pred)  # 预测值是概率
    loss = log_loss(y_true, y_pred)
    y_pred = (y_pred > threshold).type(torch.float32)
    acc = accuracy_score(y_true, y_pred)
    pre = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    return acc, pre, rec, f1, auc, loss


def getResNet50(num_classes=1000):
    # https://download.pytorch.org/models/resnet50-19c8e357.pth
    return ResNet.ResNet(block=ResNet.Residual, block_num=[3, 4, 6, 3], num_classes=num_classes)


def Load_ResNet50(num_classes=1000):
    device = try_gpu()
    model_weight_path = modelWightsDir + "resnet50.pth"
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
    net = getResNet50(num_classes)
    net.load_state_dict(torch.load(model_weight_path, map_location=device))
    return net


def getResNet101(num_classes=1000):
    # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
    return ResNet.ResNet(ResNet.Residual, [3, 4, 23, 3], num_classes=num_classes)


def Load_ResNet101(num_classes=1000):
    device = try_gpu()
    model_weight_path = modelWightsDir + "resnet101.pth"
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
    net = getResNet101(num_classes)
    net.load_state_dict(torch.load(model_weight_path, map_location=device))
    return net


class DataSet(data.Dataset):
    """
    自定义的数据集参数,用于提取图片的特征向量
    """

    def __init__(self, img_dir, resize):
        super(DataSet, self).__init__()
        self.img_paths = glob('{:s}/*'.format(img_dir))
        self.transform = transforms.Compose([transforms.Resize(size=(resize, resize))])

    def __getitem__(self, item):
        img = Image.open(self.img_paths[item]).convert('RGB')
        img = self.transform(img)

        return img, self.img_paths[item]

    def __len__(self):
        return len(self.img_paths)


def ProcessPreImages(img_dir, resize, save_dir):
    """
    :param img_dir:
    :param resize: 改为需要的大小
    :param save_dir:
    :return:
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--img_dir', type=str, default=img_dir)
    parser.add_argument('--resize', type=int, default=resize)
    parser.add_argument('--save_dir', type=str, default=save_dir)
    args = parser.parse_args()
    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)
    else:
        if len(os.listdir(img_dir)) >= 1:  # 说明已经有文件了 - 默认已经处理完了图片
            return None

    dataset = DataSet(args.img_dir, args.resize)
    print('dataset:', len(dataset))
    count = 0
    start = time.time()
    for i in range(len(dataset)):
        img, path = dataset[i]
        path = os.path.basename(path)
        if count % 1000 == 0:
            print('Processing: ', count, " files")
        count += 1
        if not os.path.exists(args.save_dir + "/{:s}".format(path[0:-4])):  # 生成transformer要求的数据集格式
            os.mkdir(args.save_dir + "/{:s}".format(path[0:-4]))
        imageio.imwrite(args.save_dir + '/{:s}/{:s}'.format(path[0:-4], path), img)
    end = time.time()
    print("finished total cost: {:.2f} min".format((end - start) / 60))


def getDatasetIter(img_dir, batch_size, shuffle=True, num_workers=4):
    """
    :param img_dir:
    :param batch_size: 批量大小
    :param shuffle: 是否随机
    :param num_workers: 使用的线程数
    :return: 数据集, 类别名称
    """
    transform = transforms.ToTensor()
    train_data = torchvision.datasets.ImageFolder(img_dir, transform=transform)
    print(train_data)
    train_iter = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=shuffle,
                                             num_workers=num_workers)
    return train_iter, train_data.classes


def generateImageVecFiles(imageSize=480, inChannel=3, batchSize=4, blockNum=196, kernelSize=64, stride=32,
                          outputSize=2048):
    """
    :param imageSize: 图像统一调整为多少
    :param inChannel: 输入通道
    :param batchSize:
    :param blockNum: 一个图片分为多少个区域
    :param kernelSize: 每个区域多大
    :param stride: 步长
    :param outputSize: 输出的向量多少
    :return:
    """
    net = getResNet50().to(device=try_gpu())

    if not os.path.exists(save_array_dir):
        os.mkdir(save_array_dir)
    else:
        if len(os.listdir(save_array_dir)) >= 1:  # 说明已经有文件了 - 默认已经得到了向量
            return None
    if not os.path.exists(save_images_dir):
        os.mkdir(save_images_dir)

    ProcessPreImages(read_images_dir, imageSize, save_images_dir)
    preVectorIter, classes = getDatasetIter(save_images_dir, batch_size=batchSize, shuffle=False)
    extractImageFeature = ImageFeature(net=net, block_num=blockNum,
                                       kernel_size=kernelSize, stride=stride,
                                       output_size=outputSize, in_channel=inChannel)

    def saveArray(array, index):
        array = array.unsqueeze(0).detach().numpy()
        numpy.save(save_array_dir + classes[int(index)], array)

    count = 0
    net.eval()
    start = time.time()
    with torch.no_grad():
        for X, y in preVectorIter:
            end = time.time()
            if count % (batchSize * 50) == 0:
                print("have got {} image vectors, total cost:{:.2f} min".format(count, (end - start) / 60))
            count += batchSize
            if os.path.exists(save_array_dir + classes[int(y[0])] + ".npy"):
                continue
            batch_tensor = extractImageFeature.forward(X.type(torch.float32).cuda()).to(torch.device('cpu'))
            torch.cuda.empty_cache()
            [saveArray(data, index) for data, index in zip(batch_tensor, y)]  # 加速


def modelScoresVision(writer, scoresValues, scoresNames, lrValues=None):
    """
    :param lrValues:
    :param scoresNames:
    :param writer: Tensoboard
    :param scoresValues:  epochs * 评估参数个数 * 数据集字典
    :return:
    """
    if lrValues is not None:
        for batch in range(len(lrValues)):
            writer.add_scalar(tag="train_lr", scalar_value=lrValues[batch], global_step=batch)

    if scoresValues is None:
        return None
    for epoch in range(len(scoresValues) - 1):
        for i in range(len(scoresNames)):
            mapDict = {
                "train": scoresValues[epoch][i][0],
                "test": scoresValues[epoch][i][1],
                "valid": scoresValues[epoch][i][2],
            }
            writer.add_scalars(main_tag=scoresNames[i], tag_scalar_dict=mapDict, global_step=epoch)


In [5]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/5/4 19:24
# @Author  : CaoQixuan
# @File    : DATASET.py
# @Description :
from enum import Enum


class DATASET(Enum):
    TRAIN = "train_text"
    TEST = "test_text"
    VALID = "valid_text"


In [6]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/24 15:02
# @Author  : CaoQixuan
# @File    : LoadData.py
# @Description : 读取各种数据


class MyDataSet(Dataset):
    def __init__(self, seqLen, imageClassDir, imageVectorDir, textDir, wordVocabDir, dataType=DATASET.TRAIN):
        """
        :param seqLen:
        :param imageClassDir:图片对应类的字典序列化文件
        :param imageVectorDir:ResNet生成的文本向量的文件目录
        :param textDir:推特数据的文本数据文件
        :param wordVocabDir:词表的字典序列化文件
        """
        self.sqLen = seqLen
        mapDataSet = {
            DATASET.TRAIN: "train_text",
            DATASET.TEST: "test_text",
            DATASET.VALID: "valid_text"
        }
        self.seqLen = seqLen
        self.imageVectorDir = imageVectorDir
        self.id2text = []
        self.tokenizer = AutoTokenizer.from_pretrained(modelWightsDir + "bert-base-cased")
        with open(wordVocabDir + "vocab.py3", 'rb') as f:
            self.word2id = pickle.load(f)  # 词表
        with open(imageClassDir + "class2id.py3", 'rb') as f:
            self.attribute2id = pickle.load(f)  # 类表
        with open(imageClassDir + "image2class.py3", 'rb') as f:
            self.dictExtractWords = pickle.load(f)  # 类表
        with open(textDir + mapDataSet[dataType], 'r', encoding="utf-8") as f:
            for line in f:
                self.id2text.append(eval(line))
        self.id2text = numpy.array(self.id2text)

    def processText(self, sqLen, source):
        """
        :param sqLen:文本长度
        :param source:字符串
        :return:对应词表的对应SqLen长度
        """
        strs = source.split(" ")
        if len(strs) > sqLen:
            strs = strs[:sqLen]
        strs = numpy.array(strs)
        func = numpy.vectorize(lambda x: self.word2id[x] if x in self.word2id else self.word2id['<unk>'])
        return numpy.pad(func(strs), (0, sqLen - len(strs)))

    def __getitem__(self, index):
        id = self.id2text[index, 0]
        text = ' '.join(self.dictExtractWords[int(id)]) + self.id2text[index, 1]
        reText = torch.tensor(self.processText(self.seqLen, text))
        retY = torch.tensor(self.id2text[index, 2].astype(numpy.float32))
        reWords = torch.tensor(itemgetter(*self.dictExtractWords[int(id)])(self.attribute2id))
        image = torch.tensor(numpy.load(self.imageVectorDir + id + ".npy").squeeze())
        encodedInput = self.tokenizer(text, return_tensors='pt', padding="max_length", max_length=self.sqLen,
                                      truncation=True)
        input_ids, token_type_ids, attention_mask = encodedInput["input_ids"], encodedInput[
            "token_type_ids"], encodedInput["attention_mask"]
        return (reText, image, reWords, (input_ids, token_type_ids, attention_mask)), retY

    def __len__(self):
        return self.id2text.shape[0]

In [7]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/25 16:13
# @Author  : CaoQixuan
# @File    : ExtractFeature.py
# @Description :图像向量部


class ExtractFeature(nn.Module):
    def __init__(self, embeddingDir, device="cpu"):
        super().__init__()
        self.embeddingArray = torch.Tensor(
            numpy.loadtxt(embeddingDir, delimiter=" ", dtype="float32"))  # 1001 * 300 1001为1000个类和1个unk
        if device == "gpu":
            self.embeddingArray = self.embeddingArray.to(try_gpu())
        self.embSize = self.embeddingArray.shape[1]  # 向量后的大小
        self.vocabSize = self.embeddingArray.shape[0]  # 类表大小
        self.embedding = nn.Embedding(self.vocabSize, self.embSize).from_pretrained(self.embeddingArray)
        self.linear1 = nn.Linear(self.embSize, self.embSize // 2)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(self.embSize // 2, 1)
        self.softmax = nn.Softmax()
        self.attention = AdditiveAttention(key_size=self.embSize, query_size=self.embSize,
                                           num_hiddens=self.embSize // 2)
        nn.init.kaiming_normal_(self.linear1.weight)
        nn.init.xavier_normal_(self.linear2.weight)

    def forward(self, X):
        batch_size, classes = X.shape[0], X.shape[1]
        output1 = self.embedding(X)  # batch * 5 * 200
        # # 这里也可以用之前写的注意力机制 两个版本
        return output1, torch.mean(self.attention.forward(queries=output1, keys=output1, values=output1),
                                   dim=1).squeeze()
        # output2 = self.relu(self.linear1(output1))  # batch * 5 * 100
        # output3 = self.softmax(self.linear2(output2)).reshape(batch_size, 1, classes)  # batch * 1 * 5
        # return output1, torch.squeeze(output3 @ output1)  # batch * 5 * 100, batch * 200

In [8]:
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/25 19:20
# @Author  : CaoQixuan
# @File    : ImageFeature.py
# @Description :对于图片提取出来的向量加入网络

class ImageFeature(nn.Module):
    def __init__(self, defaultFeatureSize=1024, device="cpu"):
        super().__init__()
        self.defaultFeatureSize = defaultFeatureSize
        self.linear = torch.nn.Linear(2048, self.defaultFeatureSize)
        self.relu = torch.nn.ReLU()

    @staticmethod
    def weight_init(m):
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            nn.init.kaiming_normal_(m.weight)

    def forward(self, X):
        # batch_size = X.shape[0]
        output = self.relu(self.linear(X))
        return output, torch.mean(output, dim=1)  # batch * 196 *1024,  batch  * 1024

In [9]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/25 19:33
# @Author  : CaoQixuan
# @File    : TextFeature.py
# @Description :文本特征提取


class TextFeature_LSTM(nn.Module):
    def __init__(self, nHidden, seqLen, guideLen, textEmbeddingDir, numLayers=1, dropout=0, device="cpu"):
        """
        :param nHidden: 隐藏层
        :param seqLen:  步长
        :param guideLen: 引导向量的维度 - 物品类别嵌入后的维度
        :param textEmbeddingDir: 文本glove后的向量
        :param numLayers: 网络层数 - 构建深层网络结构
        :param dropout:
        """
        super(TextFeature_LSTM, self).__init__()
        self.nHidden = nHidden
        self.seqLen = seqLen
        self.numLayers = numLayers
        self.dropout = dropout
        self.guideLen = guideLen
        self.embeddingArray = torch.Tensor(
            numpy.loadtxt(textEmbeddingDir, delimiter=" ", dtype="float32"))
        if device == "gpu":
            self.embeddingArray = self.embeddingArray.to(try_gpu())
        self.embSize = self.embeddingArray.shape[1]  # 向量后的大小
        self.vocabSize = self.embeddingArray.shape[0]  # 类表大小
        self.embedding = nn.Embedding(self.vocabSize, self.embSize).from_pretrained(self.embeddingArray)
        self.layerNorm = nn.LayerNorm(self.embSize)
        self.fwLinearH = torch.nn.Linear(guideLen, self.nHidden)
        self.fwLinearC = torch.nn.Linear(guideLen, self.nHidden)
        self.bwLinearH = torch.nn.Linear(guideLen, self.nHidden)
        self.bwLinearC = torch.nn.Linear(guideLen, self.nHidden)
        self.relu = torch.nn.ReLU()
        self.biLSTM = nn.LSTM(input_size=self.embSize,
                              hidden_size=self.nHidden,
                              batch_first=True,
                              num_layers=self.numLayers,
                              dropout=self.dropout,
                              # 没有加dropout 因为不知道如何在测试时取消 20230426 - 预测时model.eval() / model.train() 来控制 20230427
                              bidirectional=True)

    @staticmethod
    def weight_init(m):
        # 默认方法
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight)
        if isinstance(m, nn.LSTM):
            for name, param in m.named_parameters():
                if 'bias' in name:
                    nn.init.constant_(param, 0.0)
                elif 'weight' in name:
                    nn.init.xavier_normal_(param)

    def forward(self, X, guideVector):
        """
        :param X:
        :param guideVector:  引导向量的 - 物品类别嵌入后
        :return: (batch_size, 2 * nHidden)
        """
        if guideVector is None:
            guideVector = torch.zeros((len(X), self.guideLen)).cuda()
        X = self.embedding(X)
        X = self.layerNorm(X)
        fw_h0 = self.relu(self.fwLinearH(guideVector))
        fw_c0 = self.relu(self.fwLinearC(guideVector))
        bw_h0 = self.relu(self.bwLinearH(guideVector))
        # bw_h0 = self.relu(self.fwLinearH(guideVector)) # 这里是否使用同一感知机层初始化正向和反向的H，C，需要进一步实验 20230427
        bw_c0 = self.relu(self.bwLinearC(guideVector))
        # bw_c0 = self.relu(self.fwLinearC(guideVector))
        init_h0 = torch.stack((fw_h0,) * self.numLayers + (bw_h0,) * self.numLayers,
                              dim=0)  # 深层LSTM是初始化为(D * layer , nHidden) -> (D, layers, nHidden) 观察API得出 存疑20230427
        init_c0 = torch.stack((fw_c0,) * self.numLayers + (bw_c0,) * self.numLayers,
                              dim=0)  # 加入stack 后网络的感知层是否会更新？ 20230427
        output, (_, _) = self.biLSTM(X, (init_h0, init_c0))  # output = batch_size * seqLen * (2 * hidden)
        return output, torch.mean(output, dim=1)  # batch_size * seqLen * (2 * hidden), batch_size * (2 * hidden)


class TextFeature_Bert(nn.Module):
    def __init__(self, nHidden, sqLen, dropout, device="cpu"):
        super(TextFeature_Bert, self).__init__()
        self.device = device
        self.nHidden = nHidden
        self.sqLen = sqLen
        self.bert = BertModel.from_pretrained(modelWightsDir + "bert-base-cased")
        self.layerNorm = nn.LayerNorm(768)  # 模型一般是768 如果是别的自己改一下
        self.linear = nn.Linear(768, self.nHidden * 2)
        self.dropout = nn.Dropout(dropout)
        self.tanh = nn.Tanh()

    @staticmethod
    def weight_init(m):
        # 默认方法
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)

    def forward(self, text):
        input_ids, token_type_ids, attention_mask = text
        with torch.no_grad():
            output = self.bert(input_ids=input_ids.squeeze(), token_type_ids=token_type_ids.squeeze(),
                               attention_mask=attention_mask.squeeze())[0].detach()
        output = self.layerNorm(output)
        output = self.tanh(self.linear(output))
        output = self.dropout(output)
        return output, torch.mean(output, dim=1)


class TextFeature(nn.Module):
    def __init__(self, nHidden, seqLen, guideLen, textEmbeddingDir, numLayers=1, dropout=0, device="cpu"):
        super(TextFeature, self).__init__()
        self.nHidden = nHidden
        self.lstm = TextFeature_LSTM(nHidden, seqLen, textEmbeddingDir=textEmbeddingDir,
                                     numLayers=numLayers,
                                     guideLen=guideLen, dropout=dropout, device=device)
        self.bert = TextFeature_Bert(nHidden=nHidden, sqLen=seqLen, dropout=dropout)
        self.attentionLSTM = AdditiveAttention(query_size=nHidden * 2, key_size=nHidden * 2, dropout=dropout,
                                               num_hiddens=nHidden)
        self.elu = nn.ELU()
        self.lstm.apply(self.lstm.weight_init)
        self.bert.apply(self.bert.weight_init)
        self.attentionLSTM.apply(self.attentionLSTM.weight_init)

    def forward(self, reText, text, guideVector):
        lstm_o, lstm_vec = self.lstm.forward(reText, guideVector)
        bert_o, bert_vec = self.bert(text)
        lstm_o = self.attentionLSTM.forward(lstm_o, lstm_o, lstm_o)
        o = (lstm_o + bert_o) / 2
        output = self.elu(o)
        output_vec = (lstm_vec + bert_vec) / 2
        return output, output_vec

In [10]:
# -*- coding: utf-8 -*-
# @Time    : 2023/5/4 9:58
# @Author  : CaoQixuan
# @File    : Attention.py
# @Description :各种注意力机制

def sequence_mask(X, valid_len, value=0):
    maxlen = X.size(1)
    mask = torch.arange((maxlen), dtype=torch.float32,
                        device=X.device)[None, :] < valid_len[:, None]
    X[~mask] = value
    return X


def masked_softmax(X, valid_lens):
    """
     通过在最后一个轴上掩蔽元素来执行softmax操作
    :param X: X:3D张量
    :param valid_lens: valid_lens:1D或2D张量
    :return:
    """
    if valid_lens is None:
        return nn.functional.softmax(X, dim=-1)
    else:
        shape = X.shape
        if valid_lens.dim() == 1:
            valid_lens = torch.repeat_interleave(valid_lens, shape[1])
        else:
            valid_lens = valid_lens.reshape(-1)
        # 最后一轴上被掩蔽的元素使用一个非常大的负值替换，从而其softmax输出为0
        X = sequence_mask(X.reshape(-1, shape[-1]), valid_lens,
                          value=-1e6)
        return nn.functional.softmax(X.reshape(shape), dim=-1)


class AdditiveAttention(nn.Module):
    """加性注意力"""

    def __init__(self, key_size, query_size, num_hiddens, dropout=0):
        super(AdditiveAttention, self).__init__()
        self.W_k = nn.Linear(key_size, num_hiddens, bias=False)
        self.W_q = nn.Linear(query_size, num_hiddens, bias=False)
        self.w_v = nn.Linear(num_hiddens, 1, bias=False)
        self.dropout = nn.Dropout(dropout)

    @staticmethod
    def weight_init(m):
        # 默认方法
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)

    def forward(self, queries, keys, values, valid_lens=None):
        queries, keys = self.W_q(queries), self.W_k(keys)
        # 在维度扩展后，
        # queries的形状：(batchSize，查询的个数，1，num_hidden)
        # key的形状：(batchSize，1，“键－值”对的个数，num_hiddens)
        # 使用广播方式进行求和
        features = queries.unsqueeze(2) + keys.unsqueeze(1)
        features = torch.tanh(features)
        # self.w_v仅有一个输出，因此从形状中移除最后那个维度。
        # scores的形状：(batchSize，查询的个数，“键-值”对的个数)
        scores = self.w_v(features).squeeze(-1)
        self.attention_weights = masked_softmax(scores, valid_lens)
        # values的形状：(batchSize，“键－值”对的个数，值的维度)
        return torch.bmm(self.dropout(self.attention_weights), values)


class DotProductAttention(nn.Module):
    """缩放点积注意力"""

    def __init__(self, dropout):
        super(DotProductAttention, self).__init__()
        self.attention_weights = None
        self.dropout = nn.Dropout(dropout)

    def forward(self, X, valid_lens=None):
        """
        :param valid_lens: 忽略某些键值对时用
        :param X: (batchSize，查询的个数，d)
        :return: batchSize * 值的维度
        """
        #  queries: (batchSize，查询的个数，d)
        #  keys: (batchSize，“键－值”对的个数，d)
        #  values: (batchSize，“键－值”对的个数，值的维度)
        #  valid_lens: (batchSize，查询的个数)
        queries, keys, values = X, X, X
        d = queries.shape[-1]
        # 设置transpose_b=True为了交换keys的最后两个维度
        scores = torch.bmm(queries, keys.transpose(1, 2)) / math.sqrt(d)
        self.attention_weights = masked_softmax(scores, valid_lens)
        return torch.bmm(self.dropout(self.attention_weights), values)  # batchSize，查询个数的个数，d


class MultiModalAttention(nn.Module):
    """多模态加性注意力机制融合"""

    def __init__(self, querySizes, keySize, dropout=0):
        """
        QKV: query = query, key=key, value=key
        :param querySizes: 利用多个向量进行融合-源于
        :param keySize:
        :param dropout:
        """
        super().__init__()
        self.attentions = []
        count = 0
        for querySize in querySizes:
            exec(
                "self.addATT_{} = AdditiveAttention(query_size=querySize, key_size=keySize, num_hiddens=keySize // 2, "
                "dropout=dropout)".format(
                    count))
            exec("self.attentions.append(self.addATT_{})".format(count))
            count += 1
        [attention.apply(MultiModalAttention.weight_init) for attention in self.attentions]

    @staticmethod
    def weight_init(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)

    def forward(self, queries, key):
        """
        :param queries: (query1, query2...)
        :param key: batchSize, 键值对, values
        :return: batchSize * 1 * 值的维度
        """

        vector = torch.zeros(key.shape[0], 1, key.shape[-1], device=key.device)  # 这里加维为了后面stack
        for attention, query in zip(self.attentions, queries):
            vector += attention.forward(queries=query, keys=key, values=key)
        return vector / len(self.attentions)


In [11]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/27 11:11
# @Author  : CaoQixuan
# @File    : NNManager.py
# @Description :总体网络搭建


class Net(nn.Module):

    def __init__(self, nHidden, seqLen, dropout=0, numLayers=1, classEmbeddingDir="..//ExtractWords/vector",
                 textEmbeddingDir="../words/vector", device="cpu"):
        super().__init__()
        self.FinalMLPSize = 512
        self.device = device
        self.extractFeature = ExtractFeature(embeddingDir=classEmbeddingDir, device=device)  # 图像中物品类别
        self.imageFeature = ImageFeature()  # 图像特征
        self.imageFeature.apply(ImageFeature.weight_init)
        self.textFeature = TextFeature(nHidden, seqLen, textEmbeddingDir=textEmbeddingDir,
                                       numLayers=numLayers,
                                       guideLen=self.extractFeature.embSize, dropout=dropout, device=device)

        # 注意力机制以 x, y, z 指导向量计算与 key的评分，最后将其平均 这里用的是加性注意力机制
        self.extractFeatureATT = MultiModalAttention(
            querySizes=(
                self.extractFeature.embSize, self.imageFeature.defaultFeatureSize, self.textFeature.nHidden * 2),
            keySize=self.extractFeature.embSize, dropout=dropout)
        self.imageFeatureATT = MultiModalAttention(
            querySizes=(
                self.extractFeature.embSize, self.imageFeature.defaultFeatureSize, self.textFeature.nHidden * 2),
            keySize=self.imageFeature.defaultFeatureSize, dropout=dropout)
        self.textFeatureATT = MultiModalAttention(
            querySizes=(
                self.extractFeature.embSize, self.imageFeature.defaultFeatureSize, self.textFeature.nHidden * 2),
            keySize=self.textFeature.nHidden * 2, dropout=dropout)

        # 为了后面的缩放点积注意力，需要把多模态向量调整为同一维度，后加入注意力机制
        self.extractLinear = nn.Linear(self.extractFeature.embSize, self.FinalMLPSize)
        self.extractRelu = nn.ReLU()
        self.imageLinear = nn.Linear(self.imageFeature.defaultFeatureSize, self.FinalMLPSize)
        self.imageRelu = nn.ReLU()
        self.textLinear = nn.Linear(self.textFeature.nHidden * 2, self.FinalMLPSize)
        self.textRelu = nn.ReLU()

        self.multiAttention = DotProductAttention(dropout=dropout)

        # 最后加入两层全连接层
        self.MLP, self.FC = nn.Linear(self.FinalMLPSize, self.FinalMLPSize // 2), nn.Linear(self.FinalMLPSize // 2, 1)
        self.mlpRelu, self.fcSigmoid = nn.ReLU(), nn.Sigmoid()

    @staticmethod
    def weight_init(m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight)

    def forward(self, X):
        reText, images, reWords, text = X
        input_ids, token_type_ids, attention_mask = text

        if self.device == "gpu":
            reText, images, reWords = reText.to(try_gpu()), images.to(try_gpu()), reWords.to(
                try_gpu())
            input_ids, token_type_ids, attention_mask = input_ids.cuda(), token_type_ids.cuda(), attention_mask.cuda()

        extractMatrix, extractGuidVec = self.extractFeature.forward(reWords)
        imageMatrix, imageGuidVec = self.imageFeature.forward(images)
        textHMatrix, textGuidVec = self.textFeature.forward(reText, (input_ids, token_type_ids, attention_mask),
                                                            extractGuidVec)
        extractGuidVec, imageGuidVec, textGuidVec = extractGuidVec.unsqueeze(1), imageGuidVec.unsqueeze(
            1), textGuidVec.unsqueeze(1)  # 升维
        extractVec = self.extractFeatureATT.forward((extractGuidVec, imageGuidVec, textGuidVec), extractMatrix)
        imageVec = self.imageFeatureATT.forward((extractGuidVec, imageGuidVec, textGuidVec), imageMatrix)
        textVec = self.textFeatureATT.forward((extractGuidVec, imageGuidVec, textGuidVec), textHMatrix)

        extractVec, imageVec, textVec = extractVec.squeeze(1), imageVec.squeeze(1), textVec.squeeze(1)  # 降维

        # 是否加入relu继续激活 未实验 20230504
        extractVec = self.extractLinear.forward(extractVec)
        extractVec = self.extractRelu(extractVec)
        imageVec = self.imageLinear.forward(imageVec)
        imageVec = self.imageRelu(imageVec)
        textVec = self.textLinear.forward(textVec)
        textVec = self.textRelu(textVec)
        finalMatrix = torch.stack((extractVec, imageVec, textVec), dim=1)  # 转化为 batch * 3 * FinalMLPSize
        # print("finalMatrix.shape", finalMatrix.shape)
        finalVec = torch.mean(self.multiAttention.forward(finalMatrix), dim=1)
        # print("finalVec.shape", finalVec.shape)
        fcInput = self.mlpRelu(self.MLP(finalVec))
        # print(self.FC.weight.grad)
        return self.fcSigmoid(self.FC(fcInput))


In [12]:
class Main:
    def __init__(self, device="cpu"):
        self.lr = 1e-5  # 学习率
        self.nHidden = 256  # 隐藏层 - Bi-LSTM
        self.seqLen = 80  # 步长 - Bi-LSTM
        self.numLayers = 2  # 隐藏层层数
        self.batchSize = 128  # 批量
        self.maxClipping = 10  # 梯度裁剪
        self.normType = 2  # 梯度的范式
        self.dropout = 0.2  # DropOut层的概率 留取80%
        self.maxEpoch = 100  # 最大迭代
        self.displayStep = 1  # 多少轮后展示训练结果ExtractFeature.py  =1时 会记录每个人epoch 当!=1时 记录maxEpoch//displayStep
        self.maxPatience = 10  # 能够容忍多少个epoch内都没有improvement 后期也不用了前期可调
        self.representationScores = {}
        self.lrRecord = []  # 记录学习率变化
        self.scoreNames = ["acc", "pre", "rec", "f1", "auc", "loss"]
        self.XExample = None  # 获得某一个X的样本
        self.device = device
        self.beforeEpoch = 0  # 可以继续训练
        self.net = Net(self.nHidden, self.seqLen, dropout=self.dropout, classEmbeddingDir=classEmbeddingDir,
                       textEmbeddingDir=textEmbeddingDir, device=device, numLayers=self.numLayers)
        self.net.apply(Net.weight_init)
        if device == "gpu":
            self.net.to(device=try_gpu())
        self.loss = nn.BCELoss(reduction='none')
        self.updater = torch.optim.Adam(self.net.parameters(), lr=self.lr)

        # 下面两个学习率衰减用法不一样
        # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        #     optimizer=self.updater,
        #     mode="min",  # 增加/ 减小
        #     patience=20,  # loos/acc 不再减小（或增大）的累计次数后改变学习率；
        #     verbose=False,  # 是否可视
        #     min_lr=1e-7,  # 最小的学习率
        #     cooldown=10,  # 更新后冷静期
        #     eps=1e-3  # If the difference between new and old lr is smaller than eps, the update is ignored
        # )  # 在发现loss不再降低或者acc不再提高之后，降低学习率，这里用于批量的，所以呢，循环论数很多， 大约是 20K / batch_size
        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer=self.updater,
            T_0=5,  # 初试周期
            T_mult=2
        )  # 按cos函数下降 会周期性回复上来

        self.train_iter = self.loadData(DATASET.TRAIN)

    def loadData(self, dataType=DATASET.TRAIN):
        data = MyDataSet(
            seqLen=self.seqLen,
            imageClassDir=imageClassDir,
            imageVectorDir=imageVectorDir,
            textDir=dataPrefix,
            wordVocabDir=wordsPrefix,
            dataType=dataType
        )
        return DataLoader(dataset=data, batch_size=self.batchSize, shuffle=True, num_workers=8,
                          pin_memory=True, prefetch_factor=2, persistent_workers=False)

    def test(self, dataType=DATASET.TEST, num=2000):
        if isinstance(self.net, torch.nn.Module):
            self.net.eval()
        with torch.no_grad():
            testData = self.loadData(dataType=dataType)
            count = 0
            yPred, yTrue = [], []
            for X, y, in testData:
                self.XExample = X
                if self.device == "gpu":
                    y = y.cuda()
                y_pred = self.net(X)
                count += y_pred.shape[0]
                if (dataType == DATASET.TRAIN) and (count > num):
                    break
                yPred.append(y_pred)
                yTrue.append(y)
        yPred = torch.cat(yPred, dim=0)
        yTrue = torch.cat(yTrue, dim=0)
        return getScore(y_pred=yPred.to(torch.device("cpu")), y_true=yTrue.to(torch.device("cpu")))

    def train_epoch(self):
        if isinstance(self.net, torch.nn.Module):
            self.net.train()
        if not isinstance(self.updater, torch.optim.Optimizer):
            raise AttributeError
        count = 0
        for X, y in self.train_iter:
            count += 1
            torch.cuda.empty_cache()
            if self.device == "gpu":
                y = y.cuda()
            y_hat = self.net(X)
            self.lrRecord.append(self.updater.state_dict()['param_groups'][0]['lr'])
            l = self.loss(y_hat.squeeze(), y.squeeze()).mean()
            self.updater.zero_grad()
            l.backward()
            nn.utils.clip_grad_norm_(self.net.parameters(), max_norm=self.maxClipping, norm_type=self.normType)
            # self.lr_scheduler.step(l)
            self.updater.step()
            self.lr_scheduler.step()
            del X, y
            # if count == 10: # 提前中止，测试用
            #     break
        gc.collect()

    def train(self):
        maxF1 = 0  # 以F1score为指标
        patience = self.maxPatience  # 当前的容忍度
        _, testScores, validScores, validStr = None, None, None, None
        start = time.time()
        for epoch in range(self.beforeEpoch, self.beforeEpoch + self.maxEpoch):
            self.train_epoch()
            if epoch % self.displayStep == 0:
                # acc, pre, rec, f1, auc, loss # 元组内的顺序
                trainScores, testScores, validScores = self.test(DATASET.TRAIN), self.test(DATASET.TEST), self.test(
                    DATASET.VALID)
                self.representationScores[epoch // self.displayStep] = tuple(
                    zip(trainScores, testScores, validScores))  #
                end = time.time()
                print("----epoch:", epoch, "total cost:{:.2f} min".format((end - start) / 60), "---------")
                print("train, patience={}, acc:{:.3f}, pre:{:.3f}, rec:{:.3f}, f1:{:.3f}, acu:{:.3f},"
                      "loss:{:.2f}".format(patience, *trainScores))
                print("test, patience={}, acc:{:.3f}, pre:{:.3f}, rec:{:.3f}, f1:{:.3f}, acu:{:.3f},"
                      "loss:{:.2f}".format(patience, *testScores))
                validStr = "valid, patience={}, acc:{:.3f}, pre:{:.3f}, rec:{:.3f}, f1:{:.3f}, acu:{:.3f}, loss:{:.4f}".format(patience, *validScores)
                print(validStr)
                if testScores[3] > maxF1 + 1e-3:
                    maxF1, patience = testScores[3], self.maxPatience
                    self.saveNet("bestModel", describe=validStr)
                else:
                    patience -= 1
                if patience == 0:
                    break
        self.saveNet(describe=validStr)


    def saveNet(self, saveName=time.strftime("%Y-%m-%d", time.localtime()), describe="unKnown"):
        """保存网络参数"""
        savePath = saveModelWightsDir + saveName + "/"

        if os.path.exists(savePath):
            shutil.rmtree(savePath)  # 如果重新运行时，切忌如果有相同的文件名时要提前保存！！！！！
        os.makedirs(savePath, exist_ok=True)
        if not os.path.exists(savePath + "logs/"):
            os.mkdir(savePath + "logs/")
        if not os.path.exists(savePath + "runs/"):
            os.mkdir(savePath + "runs/")

        torch.save(self.net.state_dict(), savePath + saveName + ".pth")

        summaryWriter = SummaryWriter(log_dir=savePath + "runs/")
        modelScoresVision(summaryWriter, scoresValues=self.representationScores, scoresNames=self.scoreNames,lrValues=self.lrRecord)
        summaryWriter.close()

        runLogs = (self.representationScores, self.lrRecord)
        with open(savePath + "logs/" + saveName, 'wb+') as f:
            pickle.dump(runLogs, f)

        with open(savePath + "describe.txt", 'w+') as f:
            f.write("acc, pre, rec, f1, auc, loss\n")
            f.write(describe)

    def loadNet(self, loadName=time.strftime("%Y-%m-%d", time.localtime()), isEval=False):
        """加载网络参数"""

        loadPath = saveModelWightsDir + loadName + "/"

        self.net.load_state_dict(torch.load(loadPath + loadName + ".pth"))

        with open(loadPath + "logs/" + loadName, 'rb') as f:
            self.representationScores, self.lrRecord = pickle.load(f)
        self.beforeEpoch = len(self.representationScores)

        if isEval:
            self.net.eval()  # 不启用 BatchNormalization 和 Dropout

print("Over")

Over


In [13]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/4/25 15:43
# @Author  : CaoQixuan
# @File    : Main.py
# @Description :主函数
"""
20230504
1. 要实现 学习率逐渐下降
2. 梯度裁剪
3. 最大忍耐度
4. 展示实验结果
5. 写注释
6. 保存参数模型参数
7. debug ！！！！！！
8. gpu跑
关于CPU版本在Main_CPU中

model.eval() - model.eval()不会影响各层的gradient计算行为，即gradient计算和存储与training模式一样，只是不进行反向传播(backprobagation)
torch.no_grad() 用于停止autograd模块的工作，起到加速和节省显存的作用（具体行为就是停止gradient计算，从而节省了GPU算力和显存）
"""

class MainText:
    def __init__(self, device="cpu"):
        self.lr = 1e-6 / 2  # 学习率
        self.nHidden = 256  # 隐藏层 - Bi-LSTM
        self.seqLen = 80  # 步长 - Bi-LSTM
        self.numLayers = 2  # 隐藏层层数
        self.batchSize = 64  # 批量
        self.maxClipping = 5  # 梯度裁剪
        self.normType = 2  # 梯度的范式
        self.dropout = 0.1  # DropOut层的概率 留取80%
        self.maxEpoch = 50  # 最大迭代
        self.displayStep = 1  # 多少轮后展示训练结果ExtractFeature.py  =1时 会记录每个人epoch 当!=1时 记录maxEpoch//displayStep
        self.maxPatience = 10  # 能够容忍多少个epoch内都没有improvement 后期也不用了前期可调
        self.representationScores = {}
        self.lrRecord = []  # 记录学习率变化
        self.scoreNames = ["acc", "pre", "rec", "f1", "auc", "loss"]
        self.XExample = None  # 获得某一个X的样本
        self.device = device
        self.beforeEpoch = 0  # 可以继续训练

        self.net = TextFeature(nHidden=self.nHidden, seqLen=self.seqLen, textEmbeddingDir=textEmbeddingDir,
                               numLayers=self.numLayers, guideLen=200, dropout=self.dropout)
        self.net.apply(Net.weight_init)
        if device == "gpu":
            self.net.to(device=try_gpu())
        self.loss = nn.BCELoss(reduction='none')
        self.updater = torch.optim.Adam(self.net.parameters(), lr=self.lr)

        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer=self.updater,
            T_0=5,  # 初试周期
            T_mult=2
        )

        self.train_iter = self.loadData(DATASET.TRAIN)
        self.FC1 = nn.Linear(self.nHidden * 2, self.nHidden).to(try_gpu())
        self.relu = nn.ReLU().to(try_gpu())
        self.FC2 = nn.Linear(self.nHidden, 1).to(try_gpu())
        self.sigmoid = nn.Sigmoid().to(try_gpu())

    def loadData(self, dataType=DATASET.TRAIN):
        data = MyDataSet(
            seqLen=self.seqLen,
            imageClassDir=imageClassDir,
            imageVectorDir=imageVectorDir,
            textDir=dataPrefix,
            wordVocabDir=wordsPrefix,
            dataType=dataType
        )
        return DataLoader(dataset=data, batch_size=self.batchSize, shuffle=True, num_workers=6,
                          pin_memory=True, prefetch_factor=2, persistent_workers=False)

    def test(self, dataType=DATASET.TEST, num=2000):
        if isinstance(self.net, torch.nn.Module):
            self.net.eval()
        with torch.no_grad():
            testData = self.loadData(dataType=dataType)
            count = 0
            yPred, yTrue = [], []
            for X, y, in testData:
                self.XExample = X
                if self.device == "gpu":
                    y = y.cuda()
                reText, images, reWords, text = X
                input_ids, token_type_ids, attention_mask = text
                if self.device == "gpu":
                    reText, images, reWords = reText.to(try_gpu()), images.to(try_gpu()), reWords.to(
                        try_gpu())
                    input_ids, token_type_ids, attention_mask = input_ids.cuda(), token_type_ids.cuda(), attention_mask.cuda()
                output, output_vec = self.net.forward(reText, (input_ids, token_type_ids, attention_mask), None)
                y_pred = self.sigmoid(self.FC2(self.relu(self.FC1(output_vec))))
                count += y_pred.shape[0]
                if (dataType == DATASET.TRAIN) and (count > num):
                    break
                yPred.append(y_pred)
                yTrue.append(y)
        yPred = torch.cat(yPred, dim=0)
        yTrue = torch.cat(yTrue, dim=0)
        return getScore(y_pred=yPred.to(torch.device("cpu")), y_true=yTrue.to(torch.device("cpu")))

    def train_epoch(self):
        if isinstance(self.net, torch.nn.Module):
            self.net.train()
        if not isinstance(self.updater, torch.optim.Optimizer):
            raise AttributeError
        count = 0
        for X, y in self.train_iter:
            count += 1
            torch.cuda.empty_cache()
            if self.device == "gpu":
                y = y.cuda()
            reText, images, reWords, text = X
            input_ids, token_type_ids, attention_mask = text
            if self.device == "gpu":
                reText, images, reWords = reText.to(try_gpu()), images.to(try_gpu()), reWords.to(
                    try_gpu())
                input_ids, token_type_ids, attention_mask = input_ids.cuda(), token_type_ids.cuda(), attention_mask.cuda()
            output, output_vec = self.net.forward(reText, (input_ids, token_type_ids, attention_mask), None)
            y_hat = self.sigmoid(self.FC2(self.relu(self.FC1(output_vec))))
            self.lrRecord.append(self.updater.state_dict()['param_groups'][0]['lr'])
            l = self.loss(y_hat.squeeze(), y.squeeze()).mean()
            self.updater.zero_grad()
            l.backward()
            nn.utils.clip_grad_norm_(self.net.parameters(), max_norm=self.maxClipping, norm_type=self.normType)
            self.updater.step()
            self.lr_scheduler.step()
            del X, y
            if count == 10:  # 提前中止，测试用
                break
        gc.collect()

    def train(self):
        maxF1 = 0  # 以F1score为指标
        patience = self.maxPatience  # 当前的容忍度
        _, testScores, validScores, validStr = None, None, None, None
        start = time.time()
        for epoch in range(self.beforeEpoch, self.beforeEpoch + self.maxEpoch):
            self.train_epoch()
            if epoch % self.displayStep == 0:
                # acc, pre, rec, f1, auc, loss # 元组内的顺序
                trainScores, testScores, validScores = self.test(DATASET.TRAIN), self.test(DATASET.TEST), self.test(
                    DATASET.VALID)
                self.representationScores[epoch // self.displayStep] = tuple(
                    zip(trainScores, testScores, validScores))  #
                end = time.time()
                print("----epoch:", epoch, "total cost:{:.2f} min".format((end - start) / 60), "---------")
                print("train, patience={}, acc:{:.3f}, pre:{:.3f}, rec:{:.3f}, f1:{:.3f}, acu:{:.3f},"
                      "loss:{:.2f}".format(patience, *trainScores))
                print("test, patience={}, acc:{:.3f}, pre:{:.3f}, rec:{:.3f}, f1:{:.3f}, acu:{:.3f},"
                      "loss:{:.2f}".format(patience, *testScores))
                validStr = "valid, patience={}, acc:{:.3f}, pre:{:.3f}, rec:{:.3f}, f1:{:.3f}, acu:{:.3f}, loss:{:.4f}".format(patience, *validScores)
                print(validStr)
                if validScores[3] > maxF1 + 1e-3:
                    maxF1, patience = validScores[3], self.maxPatience
                    self.saveNet("bestModel", describe=validStr)
                else:
                    patience -= 1
                if patience == 0:
                    break
        self.saveNet(describe=validStr)

    def saveNet(self, saveName=time.strftime("%Y-%m-%d", time.localtime()), describe="unKnown"):
        """保存网络参数"""
        savePath = saveModelWightsDir + saveName + "/"

        if os.path.exists(savePath):
            shutil.rmtree(savePath)  # 如果重新运行时，切忌如果有相同的文件名时要提前保存！！！！！
        os.makedirs(savePath, exist_ok=True)
        if not os.path.exists(savePath + "logs/"):
            os.mkdir(savePath + "logs/")
        if not os.path.exists(savePath + "runs/"):
            os.mkdir(savePath + "runs/")

        torch.save(self.net.state_dict(), savePath + saveName + ".pth")

        summaryWriter = SummaryWriter(log_dir=savePath + "runs/")
        modelScoresVision(summaryWriter, scoresValues=self.representationScores, scoresNames=self.scoreNames,
                          lrValues=self.lrRecord)
        summaryWriter.close()

        runLogs = (self.representationScores, self.lrRecord)
        with open(savePath + "logs/" + saveName, 'wb+') as f:
            pickle.dump(runLogs, f)

        with open(savePath + "describe.txt", 'w+') as f:
            f.write("acc, pre, rec, f1, auc, loss\n")
            f.write(describe)

    def loadNet(self, loadName=time.strftime("%Y-%m-%d", time.localtime()), isEval=False):
        """加载网络参数"""

        loadPath = saveModelWightsDir + loadName + "/"

        self.net.load_state_dict(torch.load(loadPath + loadName + ".pth"))

        with open(loadPath + "logs/" + loadName, 'rb') as f:
            self.representationScores, self.lrRecord = pickle.load(f)
        self.beforeEpoch = len(self.representationScores)

        if isEval:
            self.net.eval()  # 不启用 BatchNormalization 和 Dropout

In [14]:
warnings.filterwarnings('ignore')
main = Main("gpu")
main.train()

Some weights of the model checkpoint at /kaggle/input/modelwights/bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


----epoch: 0 total cost:3.06 min ---------
train, patience=10, acc:0.618, pre:0.628, rec:0.274, f1:0.381, acu:0.630,loss:0.66
test, patience=10, acc:0.638, pre:0.601, rec:0.270, f1:0.373, acu:0.612,loss:0.66
valid, patience=10, acc:0.617, pre:0.543, rec:0.229, f1:0.323, acu:0.590, loss:0.6640
----epoch: 1 total cost:6.45 min ---------
train, patience=10, acc:0.621, pre:0.605, rec:0.374, f1:0.462, acu:0.649,loss:0.66
test, patience=10, acc:0.649, pre:0.592, rec:0.381, f1:0.463, acu:0.648,loss:0.65
valid, patience=10, acc:0.619, pre:0.533, rec:0.336, f1:0.412, acu:0.621, loss:0.6615
----epoch: 2 total cost:9.67 min ---------
train, patience=10, acc:0.620, pre:0.633, rec:0.358, f1:0.457, acu:0.660,loss:0.65
test, patience=10, acc:0.651, pre:0.604, rec:0.361, f1:0.452, acu:0.668,loss:0.64
valid, patience=10, acc:0.629, pre:0.562, rec:0.311, f1:0.400, acu:0.643, loss:0.6482
----epoch: 3 total cost:12.80 min ---------
train, patience=9, acc:0.655, pre:0.657, rec:0.469, f1:0.547, acu:0.698,lo