---
## 包依赖

In [1]:
### 包依赖
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import os

---
## 使用pytorch建立模型

In [3]:
### CNN+LSTM+自互注意力模型
#   Classification Model

# params：
# feature_size                                 输入特征个数 21个
# temporal_size                                时间步长度  101个时间步
# cnn_kernel_size                              卷积核长度
# cnn_kernel_num                               卷积核个数
# lstm_layer                                   LSTM层个数
# self_att_hide                                自注意力层神经元数量
# n                                            相同时间内，选取前n个分数最高的特征；相同特征下，选取前n个分数最高的时间
# m                                            选取对单一特征，单一时间片段下，影响最大的前m个任意时间片段下的任意特征

class CnnLstmModel(nn.Module):
    def __init__(self, feature_size, temporal_size, cnn_kernel_size, cnn_kernel_num, lstm_layer, self_att_dim, inter_att_dim, n, m):
        super(CnnLstmModel, self).__init__()
        
        # init param
        self.feature_size = feature_size
        self.temporal_size = temporal_size
        self.cnn_kernel_size = cnn_kernel_size
        self.cnn_kernel_num = cnn_kernel_num
        self.lstm_layer = lstm_layer
        self.self_att_dim = self_att_dim
        self.inter_att_dim = inter_att_dim
        self.n = n
        self.m = m
        self.dim = self.n * (self.feature_size + self.temporal_size)
        
        # CNN-layer:
        # 输入:[batch_size, feature_size, temporal_size]
        # 输出:[batch_size, feature_size * cnn_kernel_num, temporal_size]
        self.cnnin = self.feature_size
        self.cnnout = self.feature_size * self.cnn_kernel_num
        self.cnn_padding = (self.cnn_kernel_size - 1) / 2
        self.cnn = nn.Conv1d(
            in_channels= self.cnnin, 
            out_channels= self.cnnout, 
            kernel_size= self.cnn_kernel_size, 
            stride= 1, 
            padding = self.cnn_padding
        )
        
        # LSTM-layer:
        # 输入:[feature_size, temporal_size， cnn_kernel_num] 需要转置操作
        # 输出 = 输入。需要设置 hidden_size = cnn_kernel_num
        self.lstm = nn.LSTM(
            input_size= self.cnn_kernel_num,
            hidden_size = self.cnn_kernel_num,
            num_layers = self.lstm_layer,
            batch_first = True
        )

        #Add&Norm

        # Self-Attention:
        # 输入:[batch_size, feature_size, temporal_size, cnn_kernel_num]
        # 输出:[batch_size, feature_size, temporal_size, 1]
        self.self_att_score = 1
        self.self_att = nn.Sequential(
            nn.Linear(self.cnn_kernel_num, self.self_att_dim),
            nn.ReLU(),
            nn.Linear(self.self_att_dim, self.self_att_score)
        )
        
        # Inter-Attention
        # 输入: 
        #     1.CNN+LSTM的输出结果: [batch_size, feature_size, temporal_size, cnn_kernel_num]
        #     2.选择后的Self-Attention的输出结果: [batch_size, (feature_size + temporal_size) * n, 1, cnn_kernel_num]
        # 输出: [batch_size, (feature_size + temporal_size)*n*(m+1), 1, cnn_kernel_num]
        # K:
        # K的输入:[batch_size, feature_size, temporal_size, cnn_kernel_num]
        # K的输出:[batch_size, feature_size, temporal_size, inter_att_dim]
        self.k = nn.Sequential(
            nn.Linear(self.cnn_kernel_num, self.inter_att_dim),
            nn.ReLU(),
            nn.Linear(self.inter_att_dim, self.inter_att_dim)
        )
        # Q:
        # Q的输入:[batch_size, (feature_size + temporal_size) * n, 1, cnn_kernel_num]
        # Q的输出:[batch_size, (feature_size + temporal_size) * n, 1, inter_att_dim]
        self.q = nn.Sequential(
            nn.Linear(self.cnn_kernel_num, self.inter_att_dim),
            nn.ReLU(),
            nn.Linear(self.inter_att_dim, self.inter_att_dim)
        )
        

        # 2dconv-layer
        # 输入:[batch_size, n*(temporal_size+ feature_size), (m+1), cnn_kernel_num]
        # 输出:[batch_size, class_size]
        self.conv_layers = nn.Sequential(
            # 第一层卷积:保持不变
            nn.Conv2d(
                in_channels=self.dim,        # 输入通道数
                out_channels=self.dim,       # 输出通道数
                kernel_size=(3, 3),          # 卷积核大小
                padding=1                    # 填充以保持空间维度
            ),
            nn.BatchNorm2d(self.dim),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2)),  # 池化减少空间维度
            
            # 第二层卷积:缓慢增加
            nn.Conv2d(self.dim, self.dim * 2, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(self.dim * 2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2)),
            
            # 第三层卷积
            nn.Conv2d(self.dim * 2, 256, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))  # 全局平均池化
        )
        # [batch_size, 256, 1, 1]
        
        # 全连接分类层
        self.fc = nn.Linear(256, num_classes = 3) #设置分类类别为三类 (<1.6, >1.6且<1.9, >1.9)
        
        #softmax
        self.softmax = nn.softmax(dim = -1)
        
    def forward(self, x):
        # x:[batch_size, temporal_size, feature_size]
        ### cnn layer
        x = x.permute(0, 2, 1)
        # x:[batch_size, feature_size, temporal_size]
        
        x = self.cnn(x)
        # x:[batch_size, feature_size * cnn_kernel_num, temporal_size]
        
        x = x.view(x.size(0), self.feature_size, self.cnn_kernel_num, x.size(2))
        # x:[batch_size, feature_size, cnn_kernel_num, temporal_size]
        
        cnn_out = x.permute(0, 1, 3, 2)
        # cnn_out:[batch_size, feature_size, temporal_size, cnn_kernel_num]


        
        ### lstm layer
        x = cnn_out.view(cnn_out.size(0) * cnn_out.size(1), cnn_out.size(2), cnn_out.size(3))
        # x:[batch_size * feature_size, temporal_size, cnn_kernel_num]
        
        x = self.lstm(x)
        # x:[batch_size * feature_size, temporal_size, cnn_kernel_num]
        
        lstm_out = x.view(self.batch_size, self.feature_size, x.size(1), x.size(2))
        # lstm_out:[batch_size, feature_size, temporal_size, cnn_kernel_num]


        
        ###cnn和lstm层结果的归一化  
        union_out = cnn_out + lstm_out


        
        ### self-att layer
        x = union_out.view(-1, self.cnn_kernel_num)
        # x:[batch_size * feature_size * temporal_size, cnn_kernel_num]
        
        x = self.self_att(x)
        # x:[batch_size * feature_size * temporal_size, 1]
        
        score = x.view(self.batch_size, self.feature_size, self.temporal_size)
        # score:[batch_size, feature_size, temporal_size]



        ## 同一时间维度选取得分最高的前n个特征
        x = score.permute(0, 2, 1)
        # x:[batch_size, temporal_size, feature_size]

        temporal_topn_score, temporal_topn_indices = torch.topk(x, self.n, dim = 2)
        # temporal_topn_score, temporal_topn_indices:[batch_size, temporal_size, n]

        temporal_expanded_indices = temporal_topn_indices.unsqueeze(-1).expand(-1, -1, -1, self.cnn_kernel_num)
        # temporal_expanded_indices:[batch_size, temporal_size, n, cnn_kernel_num]

        temporal_topn_values = torch.gather(union_out, 2, temporal_expanded_indices)
        # temporal_topn_values:[batch_size, temporal_size, n, cnn_kernel_num]
        
        ## 同一特征维度选取得分最高的前n个时间
        feature_topn_score, feature_topn_indices = torch.topk(score, self.n, dim = 2)
        # feature_topn_score, feature_topn_indices:[batch_size, feature_size, n]

        feature_expanded_indices = feature_topn_indices.unsqueeze(-1).expand(-1, -1, -1, self.cnn_kernel_num)
        # feature_expanded_indices:[batch_size, feature_size, n, cnn_kernel_num]

        feature_topn_values = torch.gather(union_out, 2, feature_expanded_indices)
        # feature_topn_values:[batch_size, feature_size, n, cnn_kernel_num]


        ### inter-att layer
        #待定
        topn_values = []
        # topn_values:[batch_size, (feature_size + temporal_size) * n, cnn_kernel_num]

        ## 
        x = union_out.view(union_out.size(0) * union_out.size(1) * union_out.size(2), union_out.size(3))
        # x:[batch_size * feature_size * temporal_size, cnn_kernel_num]

        x = self.k(x)
        # x:[batch_size * feature_size * temporal_size, inter_att_dim]
        k = x.view(self.batch_size, self.feature_size, self.temporal_size, -1)
        # k:[batch_size, feature_size, temporal_size, inter_att_dim]

        x = topn_values.view(topn_values.size(0) * topn_values.size(1), topn_values.size(2))
        # x:[batch_size * (feature_size + temporal_size) * n, cnn_kernel_num]
        
        x = self.q(x)
        # x:[batch_size * (feature_size + temporal_size) * n, inter_att_dim]
        
        q = x.view(self.batch_size, -1, x.size(1))
        # q:[batch_size, (feature_size + temporal_size) * n, inter_att_dim]
        
        inter_att_score = torch.einsum('bftd,bqd->bqft', k, q)
        # inter_att_score:[batch_size, (feature_size + temporal_size) * n, feature_size, temporal_size]

        #初始化结果张量
        dim1 = (self.feature_size + self.temporal_size) * self.n
        inter_att_result = torch.zeros(self.batch_size, dim1, self.m, self.cnn_kernel_num)
        for b in range(self.batch_size):
            for d in range(dim1):
                # 获取当前注意力分数矩阵 [feature_size, temporal_size]
                att_matrix = inter_att_score[b, d]

                # 展平注意力矩阵并获取前m个最大值的索引
                flat_att = att_matrix.view(-1)
                topk_values, topk_indices = torch.topk(flat_att, m, dim=0)

                # 将扁平索引转换为二维索引 (feature_idx, temporal_idx)
                feature_indices = topk_indices // temporal_size
                temporal_indices = topk_indices % temporal_size
                
                # 从union_out中提取对应的特征
                for i, (f_idx, t_idx) in enumerate(zip(feature_indices, temporal_indices)):
                    result[b, d, i] = union_out[b, f_idx, t_idx]
        # inter_att_result = [batch_size, (feature_size + temporal_size) * n, m, cnn_kernel]

        topn_values_expanded = topn_values.unsqueeze(2)
        # [batch_size, dim1, cnn_kernel_num] -> [batch_size, dim1, 1, cnn_kernel_num]

        combined = torch.cat([topn_values_expanded, inter_att_result], dim=2)
        # combined = [batch_size, dim1, m+1, cnn_kernel_num]
        conv2d_out = self.conv_layer(combined)
        conv2d_out = conv2d_out.view(conv2d_out.size(0), -1)
        fc_out = self.fc(conv2d_out)
        # fc_out = [batch_size, num_classes]
        out = self.softmax(fc_out)

        return out
        # cnn-layer:
        

---
## 训练前的参数与数据配置

In [2]:
# 获取数据X
folder_30s = r"../lib/attention_model_data/30s"
folder_28s = r"../lib/attention_model_data/28s"
folder_26s = r"../lib/attention_model_data/26s"

X_30s = []
X_28s = []
X_26s = []

csv_files = [f for f in os.listdir(folder_30s) 
             if f.endswith('.csv') and os.path.isfile(os.path.join(folder_30s, f))]
for f in csv_files:
    #合成数据文件路径
    file_x_30s = os.path.join(folder_30s, f)
    file_x_28s = os.path.join(folder_28s, f)
    file_x_26s = os.path.join(folder_26s, f)
    
    #读取数据
    df_30s = pd.read_csv(file_x_30s)
    df_28s = pd.read_csv(file_x_28s)
    df_26s = pd.read_csv(file_x_26s)

    #将数据从dataframe格式转换为torch支持的tensor结构
    np_x_30s = df_30s.to_numpy()
    np_x_28s = df_28s.to_numpy()
    np_x_26s = df_26s.to_numpy()

    np_x_30s = np_x_30s.astype(np.float32)
    np_x_28s = np_x_28s.astype(np.float32)
    np_x_26s = np_x_26s.astype(np.float32)

    torch_x_30s = torch.tensor(np_x_30s)
    torch_x_28s = torch.tensor(np_x_28s)
    torch_x_26s = torch.tensor(np_x_26s)

    X_30s.append(torch_x_30s)
    X_28s.append(torch_x_28s)
    X_26s.append(torch_x_26s)


In [3]:
# 获取数据y
file_target_y_value = r"../lib/target_y/attention_model_y_value.csv"
file_target_y_label = r"../lib/target_y/attention_model_y_label.csv"
Y_value = pd.read_csv(file_target_y_value)
Y_label = pd.read_csv(file_target_y_label)

In [4]:
#使用batchnorm归一化X
#将X从list类型转为tensor
stacked_X_30s = torch.stack(X_30s)
stacked_X_28s = torch.stack(X_28s)
stacked_X_26s = torch.stack(X_26s)


#对数据集使用层归一化
print("shape of stacked_x:", stacked_X_30s.shape)
stacked_X_30s = stacked_X_30s.permute(0,2,1)
stacked_X_28s = stacked_X_28s.permute(0,2,1)
stacked_X_26s = stacked_X_26s.permute(0,2,1)
print("shape of changed X:", stacked_X_30s.shape)
print("size of feachers:", stacked_X_30s.size(1))


# 创建batchnorm类对象
batchnorm = nn.BatchNorm1d(stacked_X_30s.size(1))


# 对三个数据集采用batchnorm归一化
temp_normed_X = batchnorm(stacked_X_30s)
normed_X_30s = temp_normed_X.permute(0,2,1)

temp_normed_X = batchnorm(stacked_X_28s)
normed_X_28s = temp_normed_X.permute(0,2,1)

temp_normed_X = batchnorm(stacked_X_26s)
normed_X_26s = temp_normed_X.permute(0,2,1)

print("shape of normed X:", normed_X_30s.shape)


#清除梯度 消除grad_fn参数
normed_X_30s = normed_X_30s.detach()
normed_X_28s = normed_X_28s.detach()
normed_X_26s = normed_X_26s.detach()

shape of stacked_x: torch.Size([2895, 30, 21])
shape of changed X: torch.Size([2895, 21, 30])
size of feachers: 21
shape of normed X: torch.Size([2895, 30, 21])


In [16]:
# 将y处理为模型能接受的tensor格式
# Y = Y.iloc[:,1]
# tensor_Y_value    Y的取值 值为g值
# tensor_Y_label    Y的标签 值为[1,2,3]
np_Y_value = Y_value.to_numpy()
np_Y_value = np_Y_value.astype(np.float32)
tensor_Y_value = torch.tensor(np_Y_value)

np_Y_label = Y_label.to_numpy()
np_Y_label = np_Y_label.astype(np.float32)
tensor_Y_label = torch.tensor(np_Y_label)

In [19]:
print(tensor_Y_value.shape)
print(tensor_Y_label.shape)

torch.Size([2895, 1])

In [None]:
#划分训练集和测试集
# normed_X_30s    30s数据集
# normed_X_28s    28s数据集
# normed_X_26s    26s数据集
# tensor_Y        标签Y值


# 构建dataset
# 采用取值构建模型-->回归模型
dataset_30s = data_utils.TensorDataset(normed_X_30s, tensor_Y_value)
dataset_28s = data_utils.TensorDataset(normed_X_28s, tensor_Y_value)
dataset_26s = data_utils.TensorDataset(normed_X_26s, tensor_Y_value)

train_size = int(len(dataset)*2/3)
test_size = len(dataset) - train_size


# split dataset
train_dataset_30s, test_dataset_30s = data_utils.random_split(
    dataset_30s,
    [train_size, test_size]
)
train_dataset_28s, test_dataset_28s = data_utils.random_split(
    dataset_28s,
    [train_size, test_size]
)
train_dataset_26s, test_dataset_26s = data_utils.random_split(
    dataset_26s,
    [train_size, test_size]
)
print(f"训练集大小: {len(train_dataset_28s)}")
print(f"测试集大小: {len(test_dataset_26s)}")


# 创建训练集和测试集的dataloader
# 设置:训练集中batchsize = 10，测试集中batchsize = 5
train_dataloader_30s = data_utils.DataLoader(train_dataset_30s, batch_size=10, shuffle=True)
test_dataloader_30s = data_utils.DataLoader(test_dataset_30s, batch_size=5, shuffle=True)

train_dataloader_28s = data_utils.DataLoader(train_dataset_28s, batch_size=10, shuffle=True)
test_dataloader_28s = data_utils.DataLoader(test_dataset_28s, batch_size=5, shuffle=True)

train_dataloader_26s = data_utils.DataLoader(train_dataset_26s, batch_size=10, shuffle=True)
test_dataloader_26s = data_utils.DataLoader(test_dataset_26s, batch_size=5, shuffle=True)

---
### 训练模型

In [None]:
#模型参数设置
# train_dataloader_30s    30s训练集
# train_dataloader_28s    28s训练集
# train_dataloader_26s    26s训练集
# test_dataloader_30s     30s训练集
# test_dataloader_28s     28s训练集
# test_dataloader_26s     26s训练集
# device = "cpu" or "cuda"


# 学习率
learning_rate = 0.01

#迭代次数
epoch = 400

# 设置cuda
if torch.cuda.is_available():
    device = torch.device("cuda")  # 使用GPU
else:
    device = torch.device("cpu")   # 使用CPU
print("device:", device)

# 定义模型超参数
# class CnnLstmModel(nn.Module):
#     def __init__(self, feature_size, temporal_size, cnn_kernel_size, cnn_kernel_num, lstm_layer, self_att_dim, inter_att_dim, n, m):
feature_size = stacked_X_30s.size(1)
temporal_size_30s = stacked_X_30s.size(2)
temporal_size_28s = stacked_X_28s.size(2)
temporal_size_26s = stacked_X_26s.size(2)
cnn_kernel_size = 5
cnn_kernel_num = 10
lstm_layer = 1
self_att_dim = 8
inter_att_dim = 8
n = 3
m = 5

# 定义模型
model_30s = CnnLstmModel(
    feature_size = feature_size,
    temporal_size = temporal_size_30s,
    cnn_kernel_size = cnn_kernel_size,
    cnn_kernel_num = cnn_kernel_num,
    lstm_layer = lstm_layer,
    self_att_dim = self_att_dim,
    inter_att_dim = inter_att_dim,
    n = n,
    m = m
).to(device)

# 定义损失函数
criterion_MSE = nn.MSELoss()    #MSE 处理回归任务
criterion_CrossEntropy = nn.CrossEntropyLoss()    #交叉熵损失函数


# 定义迭代器
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()

In [20]:
for inputs, label in train_dataloader_30s:
    print(type(inputs))
    print(inputs.shape)

NameError: name 'train_dataloader_30s' is not defined

In [None]:
# 模型训练 30s数据集
model_30s.train()
loss_store = []
for i in range(epoch):
    total_loss = 0.0
    for inputs, labels in train_dataloader_30s:

        no_grad_inputs = inputs.detach()#再次消除输出中的grad_fn参数 防止意外

        #使用gpu加速
        gpu_inputs = no_grad_inputs.to(device)
        gpu_labels = labels.to(device)
        
        outputs = model_30s(gpu_inputs)#模型输出 .squeeze(-1)
        loss = criterion_MSE(outputs,gpu_labels)#计算损失
        optimizer.zero_grad()#清零梯度
        loss.backward()#反向传播
        optimizer.step()#更新参数

        total_loss += loss.item()#统计损失
        
    # 计算一次epoch的平均损失
    ave_loss = total_loss / len(train_dataset_30s):.4f
    #打印训练过程中的损失
    if (i + 1) % 25 == 0:
        print(f'Epoch [{i + 1}/{epoch}], Loss: {ave_loss}')
    # 储存损失值
    loss_store.append(ave_loss)

---
### 测试模型

In [None]:
# 模型测试 30s
model_30s.eval()
eval_loss = 0.0
with torch.no_grad():
    for inputs, labels in test_dataloader_30s:
        outputs = model_30s(inputs.to(device))  # .squeeze(-1)
        loss = criterion_MSE(outputs,labels.float().to(device))
        eval_loss += loss.item()
    print(f'eval_loss: {eval_loss / len(test_dataset):.4f}')

---
### 结束训练后的保存配置

In [None]:
# 保存模型配置
torch.save(model_30s.state_dict(), r'model_weights.pth') 

# # 加载时，需要先重新创建模型结构
# model = MyModelClass(*args, **kwargs)  # 必须与原始模型结构相同
# model.load_state_dict(torch.load('model_weights.pth'))
# model.eval()  # 重要：将模型设置为评估模式（关闭dropout等训练特定层）