In [2]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
 
file_names = ['97.mat','105.mat','118.mat','130.mat','169.mat','185.mat','197.mat','209.mat','222.mat','234.mat']
 
for file in file_names:
    # 读取MAT文件
    data = loadmat(f'../data_files/ODS\\{file}')
    print(list(data.keys()))

In [3]:
# 采用驱动端数据
data_columns = ['X097_DE_time', 'X105_DE_time', 'X118_DE_time', 'X130_DE_time', 'X169_DE_time',
                'X185_DE_time','X197_DE_time','X209_DE_time','X222_DE_time','X234_DE_time']
columns_name = ['97','105','118','130','169','185','197','209','222',"234"]
data_12k_10c = pd.DataFrame()
for index in range(10):
    # 读取MAT文件
    data = loadmat(f'../data_files/ODS\\{file_names[index]}')
    dataList = data[data_columns[index]].reshape(-1)
    data_12k_10c[columns_name[index]] = dataList[:119808]  # 121048  min: 121265
print(data_12k_10c.shape)
data_12k_10c

In [38]:
# 定义滑动窗口大小和步长
window_size = 1024
stride = int(window_size / 2)  # 此处使用了 50% 的重叠率
result_transpose_df=pd.DataFrame()
for futures_name in columns_name:
    data_array=data_12k_10c[futures_name]
    # 创建滑动窗口
    rolling_window = np.lib.stride_tricks.sliding_window_view(data_array, window_shape=window_size)
    # 提取具有给定步长的子数组
    rolling_window = rolling_window[::stride]
    for window_data in rolling_window:
        arr=window_data
        scalar_value = float(futures_name)
        # 使用 concatenate() 函数将标量追加到数组末尾
        new_arr = np.concatenate([arr, [scalar_value]])
        rolling_window_df=pd.DataFrame(new_arr)
                # 将滑动窗口中的数据组合成一个新的 DataFrame 对象
        result_transpose_df = pd.concat([result_transpose_df, rolling_window_df], axis=1, ignore_index=True)

result_df=result_transpose_df.transpose()
# 检查结果
print(result_df.shape)  # 输出 (n, 1024)

In [4]:
import numpy as np
import pandas as pd
result_temp_df=pd.read_csv("../data_files/CDM/origin_datas0.csv")
# 去掉第一列（通常是ID列）来创建新的DataFrame X  
result_df = result_temp_df.iloc[:, 1:]  # 使用iloc并选择所有行，从第二列（索引为1）开始到最后一列  
result_df

# 堆叠处理
## 堆叠处理的本质：
一维时间序列向量，以reshape，变换形状为二维数组计算堆叠处理
## 堆叠处理的用处：
常用于LSTM模型，将一维对象的训练数据变为二维对象，减低一维长序列向量的信息损失，用二维对象尽可能保存信息；

In [5]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder  

# 提取特征变量 X，不包含最后一列  
X_temp = result_df.iloc[:, :-1]  # 使用iloc并选择所有行和除了最后一列的所有列  
# 转换特征变量 X，为numpy.ndarray类型,和堆叠操作
X_array = X_temp.to_numpy()
X = X_array.reshape(X_temp.shape[0], 32, 32)

# 使用 .iloc 方法选择最后一列作为一个新的 DataFrame
y_temp = result_df.iloc[:, -1:]
y_encoded_labels_temp=y_temp.to_numpy()
# 创建一个LabelEncoder对象  
le = LabelEncoder()  
# 使用fit_transform方法对标签进行序列编码，独热编码
y_one_hot_encoded_temp = le.fit_transform(y_encoded_labels_temp)  
num_classes = len(np.unique(y_one_hot_encoded_temp))
y = to_categorical(y_one_hot_encoded_temp, num_classes=num_classes)  
#保存y的值映射表
original_labels = le.inverse_transform(y_one_hot_encoded_temp)  
unique_original_labels_only = np.unique(original_labels)
# 将一维数组转换为pandas Series，然后保存为DataFrame（默认只有一列）  
unique_original_labels_only_df = pd.DataFrame(unique_original_labels_only, columns=['Column_Name'])  
unique_original_labels_only_df.to_csv('../data_files/ADS/unique_original_labels_only.csv', index=False)  # index=False避免保存行索引 

In [6]:
# 首先划分训练集和测试数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train,X_test.shape

In [7]:
from keras.models import Sequential
from keras.layers import LSTM, Dense,Dropout
import keras

# 创建一个 Sequential 模型
model = Sequential()

# 添加一个 LSTM 层，输入形状为 (时间步长, 特征数量)，这里假设时间步长为 10，特征数量为 X 的列数
model.add(LSTM(units=256, return_sequences=True, input_shape=(32,32)))

# 添加第二层 LSTM，最后一层不需要返回序列
model.add(LSTM(units=128))
# 添加Dropout,正则化，防止过拟合
model.add(Dropout(0.5))
# 添加一个全连接层作为输出层，使用 sigmoid 激活函数进行二分类
model.add(Dense(units=10, activation='sigmoid'))

# 编译模型，使用交叉熵作为损失函数
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 回调函数
class StopAtAccuracy(keras.callbacks.Callback):  
    def on_epoch_end(self, epoch, logs=None):  
        if logs and logs.get('val_accuracy') is not None:  
            if logs['val_accuracy'] >= 0.95:  # 当验证准确率大于或等于95%时停止训练  
                print(f'\nReached {logs["val_accuracy"]*100:.2f}% accuracy so cancelling training!')  
                self.model.stop_training = True  
# 训练模型
# 创建自定义回调实例  
stop_at_accuracy = StopAtAccuracy()  
  
# 训练模型，传入自定义回调作为回调  
history=model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[stop_at_accuracy])

In [33]:
# 保存模型到磁盘  
model.save('LSTM_model.keras')  # 创建model文件 'LSTM_model.keras'

In [34]:
# 评估模型  
loss, accuracy = model.evaluate(X_test, y_test)  
print(f'Test loss: {loss:.4f}')  
print(f'Test accuracy: {accuracy:.4f}')

In [36]:
import matplotlib.pyplot as plt 
# 绘制训练loss和验证loss  
plt.plot(history.history['loss'], label='train')  
plt.plot(history.history['val_loss'], label='val')  
plt.legend(loc='upper right')  
plt.title('Model loss')  
plt.ylabel('Loss')  
plt.xlabel('Epoch')  
plt.show()  
  
# 绘制训练accuracy和验证accuracy  
plt.plot(history.history['accuracy'], label='train')  
plt.plot(history.history['val_accuracy'], label='val')  
plt.legend(loc='lower right')  
plt.title('Model accuracy')  
plt.ylabel('Accuracy')  
plt.xlabel('Epoch')  
plt.ylim(ymin=0, ymax=1)  # 设置y轴的范围为0到1  
plt.show()