In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import pandas as pd
import pywt
import torch.optim as optim
import glob
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
# 检查CUDA是否可用
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Data_path = '/home/ubuntu/data/workspace/Li/Data/'
save_path = '/home/ubuntu/data/workspace/Li/Python_nonlinear_verification5/'

In [None]:
M = 9900
A = 1
data = []
labels = []

# 拿到数据路径，方便后续读取
dir_path = f'{Data_path}Hanning_Signal_Experiment(10000)'
# 获取目录及其子目录下所有CSV文件的路径
dataPaths = sorted(glob.glob(os.path.join(dir_path, '**', '*.csv'), recursive=True))
random.seed(42)
random.shuffle(dataPaths)

# 遍历读取数据
for dataPath in dataPaths:
    # 从文件名中提取x, y, z, j, k标签
    filename = os.path.basename(dataPath)
    parts = filename.split('_')
    # 假设文件名格式为 "x_y_z_j_k.csv"
    k = int(parts[-1].split('.')[0])  # 提取文件名中最后一个部分（k），并去掉.csv扩展名

    # 读取数据
    data_1 = pd.read_csv(dataPath)
    # 提取标签
    label1_force = parts[0]
    label2_HI = parts[1]
    label1 = float(label1_force)
    label2 = float(label2_HI)
    labels.append([label1, label2])

    # 检查第一列的每一行，找到第一个大于0的值的位置N
    for N in range(len(data_1)):
        if data_1.iloc[N, 1] != 0:
            break

    # 如果找到了大于0的值，则从第二列的第N行开始，往后取M个点
    if N < len(data_1) - M + 1:  # 确保N+M不会超出索引范围
        data.append(data_1.iloc[N:N+M, 1].values)

# 将数据和标签转换为numpy数组
all_datas = np.array(data, dtype="float")
all_labels = np.array(labels)

# 直接按照7:2:1的比例划分
# 首先分出训练集（70%）和临时集（30%）
train_datas, temp_datas, train_labels, temp_labels = train_test_split(
    all_datas, all_labels, test_size = 0.3, random_state=42
)

# 然后将临时集按照2:1的比例划分为验证集和测试集
# 2:1相当于验证集占2/3，测试集占1/3
validation_datas, test_datas, validation_labels, test_labels = train_test_split(
    temp_datas, temp_labels, test_size=1/2, random_state=42
)

# 输出数据集的形状以确认
print("训练集数据形状:", train_datas.shape)
print("验证集数据形状:", validation_datas.shape)
print("测试集数据形状:", test_datas.shape)

In [None]:
train_datas = train_datas.reshape(-1,train_datas.shape[1],1)
validation_datas = validation_datas.reshape(-1,validation_datas.shape[1],1)

print(f'train_datas.shape:{train_datas.shape}')
print(f'validation_datas.shape:{validation_datas.shape}')

In [None]:
plt.style.use('default')
plt.figure(figsize=(10,6))
plt.rcParams['font.family'] = ['Times New Roman']
plt.plot(train_datas[1],linewidth=1.5)
plt.xlabel('Sample point',fontdict={'weight': 'normal', 'size': 18})
plt.ylabel('Amplitude(V)',fontdict={'weight': 'normal', 'size': 18})
#坐标轴刻度大小设置s
plt.tick_params(axis='both', which='major', labelsize=15)
plt.xlim([0,M])
plt.savefig(f'{save_path}train_signal.jpg', dpi=600, bbox_inches='tight')

In [None]:
plt.style.use('default')
plt.figure(figsize=(10,6))
plt.rcParams['font.family'] = ['Times New Roman']
plt.plot(validation_datas[1],linewidth=1.5)
plt.xlabel('Sample point',fontdict={'weight': 'normal', 'size': 18})
plt.ylabel('Amplitude(V)',fontdict={'weight': 'normal', 'size': 18})
#坐标轴刻度大小设置s
plt.tick_params(axis='both', which='major', labelsize=15)
plt.xlim([0,M])
plt.savefig(f'{save_path}validation_signal.jpg', dpi=600, bbox_inches='tight')

In [7]:
BATCH_SIZE = 32
input_size = 9617
hidden_size = 128
num_layers = 2
output_size = 2
EPOCH = 5000
learning_rate = 1e-4

In [8]:
# 准备数据
#torch.from_numpy将 NumPy 数组转换为 PyTorch 张量
#TensorDataset用于将张量数据和标签组合成一个数据集
#DataLoader用于从数据集中加载批次数据，并进行训练或测试

train_dataset = TensorDataset(torch.from_numpy(train_datas),torch.from_numpy(train_labels))
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

validation_dataset = TensorDataset(torch.from_numpy(validation_datas),torch.from_numpy(validation_labels))
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [9]:
class ConvAutoencoderPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout_rate):
        super(ConvAutoencoderPredictor, self).__init__()
        # 编码器
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=64, kernel_size=1, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Dropout(dropout_rate)  # 添加 Dropout 层
        )
        # 解码器
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(in_channels=128, out_channels=64, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose1d(in_channels=64, out_channels=input_size, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.Dropout(dropout_rate)  # 添加 Dropout 层
        )
        # 双向LSTM
        self.bdlstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        # 预测层
        self.fc = nn.Linear(hidden_size * 2, output_size)  # 由于是双向LSTM，所以输出维度是hidden_size * 2
        self.dropout = nn.Dropout(dropout_rate)  # 添加 Dropout 层

    def forward(self, x):
        wavelet = 'db4'
        # 将张量移动至CPU上面
        x1 = x.cpu()

        # 将CPU张量转化为numpy
        x2 = x1.numpy()
        # 离散小波分解
        coeffs = pywt.wavedec(x2, wavelet, level=6)
        xh1 = coeffs[6]
        xh2 = coeffs[5]
        xh3 = coeffs[4]
        xh4 = coeffs[3]
        xh5 = coeffs[2]
        xh6 = coeffs[1]
        xl6 = coeffs[0]
        # 连接分解后的序列
        x3 = np.concatenate((xh1, xh2, xh3, xh4, xh5), axis=-1)

        x4 = torch.from_numpy(x3)
        x5 = x4.to(device)

        x6 = x5.permute(0, 2, 1)  # 调整维度以适应卷积层 [batch_size, num_features, seq_length]

        encoded = self.encoder(x6)
        
        decoded = self.decoder(encoded)

        out = decoded.view(decoded.size(0), -1)  # 展平
        out = out.view(out.size(0), -1, input_size)  # 调整维度以适应LSTM [batch_size, seq_length, input_size]

        lstm_out, _ = self.bdlstm(out)  # LSTM输出
        lstm_out = lstm_out[:, -1, :]  # 取LSTM最后一个时间步的输出

        out = self.dropout(lstm_out)  # 在 LSTM 输出后添加 Dropout
        out = self.fc(out)
        return out

In [10]:
#模型实例化
model = ConvAutoencoderPredictor(input_size, hidden_size, output_size, num_layers,dropout_rate = 0.5)
model.to(device)
loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
#两个空列表用于存储训练和验证中的损失值
train_loss_force_epoch = []
train_loss_HI_epoch = []

#两个空列表用于存储训练和验证中的损失值
validation_loss_force_epoch = []
validation_loss_HI_epoch = []

In [12]:
def train_and_validate(model, train_loader, validation_loader, optimizer, loss, device, EPOCH, A, patience):
    """
    训练和验证模型的函数

    参数:
    model: 要训练的模型
    train_loader: 训练数据加载器
    validation_loader: 验证数据加载器
    optimizer: 优化器
    loss: 损失函数
    device: 运行设备（如 'cuda' 或 'cpu')
    EPOCH: 训练的总轮数
    A: HI 损失的权重系数
    patience: 早停的耐心值，即在多少个 epoch 内验证损失没有改善时停止训练
    """
    train_loss_force_epoch = []
    train_loss_HI_epoch = []
    validation_loss_force_epoch = []
    validation_loss_HI_epoch = []

    best_validation_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(EPOCH):
        if epoch % (EPOCH // 10) == 0:
            print("-------第 {} 轮训练开始-------".format(epoch + 1))

        train_force_epoch = 0.0
        train_HI_epoch = 0.0

        # 训练步骤开始
        model.train()
        for i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()

            x1 = x.type(torch.FloatTensor)
            x2 = x1.permute(0, 2, 1)

            force1 = y[:, 0].type(torch.FloatTensor)
            HI1 = y[:, 1].type(torch.FloatTensor)

            x3, force2, HI2 = x2.to(device), force1.to(device), HI1.to(device)

            force_hat = model(x3)[:, 0]
            HI_hat = model(x3)[:, 1]

            train_loss_force = loss(force_hat.flatten(), force2.flatten())
            train_loss_HI = A * loss(HI_hat.flatten(), HI2.flatten())

            train_loss_force.backward()
            train_loss_HI.backward()

            optimizer.step()

            train_force_epoch += train_loss_force.item() * x3.size(0)
            train_HI_epoch += train_loss_HI.item() * x3.size(0)

        train_mean_force = train_force_epoch / len(train_loader.dataset)
        train_loss_force_epoch.append([train_mean_force])

        train_mean_HI = train_HI_epoch / len(train_loader.dataset)
        train_loss_HI_epoch.append([train_mean_HI])

        # 验证步骤开始
        model.eval()
        validation_force_epoch = 0.0
        validation_HI_epoch = 0.0

        with torch.no_grad():
            for i, (x, y) in enumerate(validation_loader):
                x1 = x.type(torch.FloatTensor)
                x2 = x1.permute(0, 2, 1)

                force1 = y[:, 0].type(torch.FloatTensor)
                HI1 = y[:, 1].type(torch.FloatTensor)

                x3, force2, HI2 = x2.to(device), force1.to(device), HI1.to(device)

                force_hat = model(x3)[:, 0]
                HI_hat = model(x3)[:, 1]

                validation_loss_force = loss(force_hat.flatten(), force2.flatten())
                validation_loss_HI = A * loss(HI_hat.flatten(), HI2.flatten())

                validation_force_epoch += validation_loss_force.item() * x3.size(0)
                validation_HI_epoch += validation_loss_HI.item() * x3.size(0)

            validation_mean_force = validation_force_epoch / len(validation_loader.dataset)
            validation_loss_force_epoch.append([validation_mean_force])

            validation_mean_HI = validation_HI_epoch / len(validation_loader.dataset)
            validation_loss_HI_epoch.append([validation_mean_HI])

        # 检查是否有改进
        if validation_mean_force < best_validation_loss:
            best_validation_loss = validation_mean_force
            epochs_no_improve = 0
            # 保存当前最佳模型
            torch.save(model.state_dict(), f'{save_path}/Inverse_Model.pth')
        else:
            epochs_no_improve += 1

        # 如果在 patience 个 epoch 内没有改进，则停止训练
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {epoch + 1} epochs.")
            break

        if epoch % (EPOCH // 10) == 0:
            print(f"Epoch:{epoch}, Train_force_Loss: {train_mean_force:.4f}, Train_HI_Loss: {train_mean_HI:.4f},\n"
                  f"Validation_force_Loss: {validation_mean_force:.4f}, Validation_HI_Loss: {validation_mean_HI:.4f}")

    return train_loss_force_epoch, train_loss_HI_epoch, validation_loss_force_epoch, validation_loss_HI_epoch

In [None]:
# 假设你已经定义了模型、数据加载器、优化器、损失函数、设备等
train_loss_force, train_loss_HI, validation_loss_force, validation_loss_HI = train_and_validate(
    model=model,
    train_loader = train_loader,
    validation_loader = validation_loader,
    optimizer = optimizer,
    loss = loss,
    device = device,
    EPOCH = EPOCH,
    A = A,
    patience = 1000
)

In [None]:
plt.style.use('default')
plt.figure(figsize=(12, 8)) # 创建Figure对象，并指定尺寸
plt.rcParams['font.family'] = ['Times New Roman']

# 设置x轴刻度
epoch = np.arange(0, len(train_loss_force), len(train_loss_force) // 10)
plt.xticks(epoch)

# 创建第一个y轴
ax1 = plt.gca()
ax1.plot(train_loss_force, 'r-', linewidth=2.5)
ax1.plot(train_loss_force, marker='o', markersize=5, color='red', linestyle='None', label='Training loss for PF')
ax1.plot(validation_loss_force, 'r-', linewidth=2.5)
ax1.plot(validation_loss_force, marker='*', markersize=5, color='red', linestyle='None', label='Validation loss for PF')

ax1.set_xlabel('Epoch', fontdict={'weight': 'normal', 'size': 20})
ax1.set_ylabel('Loss for PF', fontdict={'weight': 'normal', 'size': 20}, color='red')

ax1.tick_params(axis='y', labelcolor='red')
ax1.tick_params(axis='both', which='major',labelcolor='black', labelsize=15)

# 添加图例
ax1.legend(loc='upper left', fontsize=20)


max_val = max(
    max(train_loss_force),
    max(validation_loss_force)
)

ax1.set_ylim(0, max_val[0])

# 创建第二个y轴
ax2 = ax1.twinx()
ax2.plot(train_loss_HI, 'b-', linewidth=2.5)
ax2.plot(train_loss_HI, marker='o', markersize=5, color='blue', linestyle='None', label='Training loss for HI')
ax2.plot(validation_loss_HI, 'b-', linewidth=2.5)
ax2.plot(validation_loss_HI, marker='*', markersize=5, color='blue', linestyle='None', label='Validation loss for HI')

ax2.set_ylabel('Loss for HI', fontdict={'weight': 'normal', 'size': 20}, color='blue')
ax2.tick_params(axis='y', labelcolor='blue')
ax2.tick_params(axis='both', which='major',labelcolor='blue', labelsize=15)

max_val = max(
    max(train_loss_HI),
    max(validation_loss_HI)
)

ax2.set_ylim(0, max_val[0])
ax2.legend(loc='upper right', fontsize=20)

# 创建放大图的子图（inset）
x1 = int(len(train_loss_force)*0.7)
x2 = int(len(train_loss_force))

ax_inset = plt.axes([0.45, 0.25, 0.4, 0.4])  # [left, bottom, width, height]
ax_inset.plot(train_loss_force, 'r-', linewidth=2.5)
ax_inset.plot(train_loss_force, marker='o', markersize=8, color='red', linestyle='None')
ax_inset.plot(validation_loss_force, 'r-', linewidth=2.5)
ax_inset.plot(validation_loss_force, marker='*', markersize=8, color='red', linestyle='None')

max_val = max(
    max(train_loss_force[x1:x2]),
    max(validation_loss_force[x1:x2])
)

ax_inset.set_xlim(x1, x2)
ax_inset.set_ylim(0, max_val[0])
ax_inset.tick_params(axis='both', which='major',  labelcolor='red',labelsize=15)

ax2 = ax_inset.twinx()

ax2.plot(train_loss_HI, 'b-', linewidth=2.5)
ax2.plot(train_loss_HI, marker='o', markersize=5, color='blue', linestyle='None')
ax2.plot(validation_loss_HI, 'b-', linewidth=2.5)
ax2.plot(validation_loss_HI, marker='*', markersize=5, color='blue', linestyle='None')

max_val = max(
    max(train_loss_HI[x1:x2]),
    max(validation_loss_HI[x1:x2])
)

ax2.set_ylim(0, max_val[0])

ax2.tick_params(axis='y', labelcolor='blue')
ax2.tick_params(axis='both', which='major', labelsize=15)

# 保存图像
plt.savefig(f'{save_path}/Inverse_problem_Loss.jpg', dpi=600, bbox_inches='tight')
plt.show()