In [1]:
import os
import pandas as pd
from scipy.signal import medfilt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense

import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import joblib
from matplotlib.font_manager import FontProperties

# 设置字体
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
# 数据读取与预处理
data_dir = r'/home/lyj/2500个数据'
all_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir)]

# 定义最大序列长度
max_length = 5000  # 假设这是最长序列的长度

def preprocess_data(file):
    data = pd.read_csv(file, header=None)
    # 使用中值滤波进行去噪
    filtered_data = medfilt(data.iloc[:, 0].values, kernel_size=5)
    # 从第二列读取标签
    labels = data.iloc[:, 1].values  
    return filtered_data, labels

# 将所有文件的数据读取并预处理
all_filtered_data = []
all_labels = []

for file in all_files:
    filtered_data, labels = preprocess_data(file)
    all_filtered_data.append(filtered_data)
    all_labels.append(labels)

# 将数据和标签合并为数组
all_filtered_data = np.array(all_filtered_data)
all_labels = np.array(all_labels)

# 归一化数据
scaler = MinMaxScaler(feature_range=(0, 1))
all_filtered_data = scaler.fit_transform(all_filtered_data.reshape(-1, 1)).reshape(all_filtered_data.shape)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(all_filtered_data, all_labels, test_size=0.2, random_state=42)

# 将数据转换为 LSTM 输入格式
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# 加载模型
model =joblib.load('LSTM_model.pkl')
# 模型评估
y_pred = (model.predict(X_test) > 0.43).astype("int32")

import numpy as np
import matplotlib.pyplot as plt

# 函数用于计算单个样本的准确率
def calculate_accuracy(y_true, y_pred):
    true_positives = np.sum((y_pred == 1) & (y_true == 1))
    true_negatives = np.sum((y_pred == 0) & (y_true == 0))
    false_positives = np.sum((y_pred == 1) & (y_true == 0))
    false_negatives = np.sum((y_pred == 0) & (y_true == 1))
    
    accuracy = (true_positives + true_negatives) / (true_positives + false_positives + true_negatives + false_negatives)
    return accuracy

# 计算每个样本的准确率
accuracies = [calculate_accuracy(y_test[i], y_pred[i]) for i in range(len(y_test))]

# 找出准确率最高的20个样本的索引
top_indices = np.argsort(accuracies)[-20:]

# 可视化准确率最高的20个样本
for index in top_indices:
    sample_data = X_test[index].flatten()
    sample_labels = y_test[index]
    sample_pred = y_pred[index]

    # 绘制原始数据与去噪后的数据
    plt.figure(figsize=(14, 7))
    plt.plot(sample_data, label='处理后数据')

    predicted_events = np.where(sample_pred == 1)[0]
    plt.plot(predicted_events, sample_data[predicted_events], 'o', markersize=3, color='red', label='预测事件')

    true_events = np.where(sample_labels == 1)[0]
    plt.scatter(true_events, sample_data[true_events], color='green', label='真实事件')

    plt.legend()
    plt.title('地震迹线标签图像')
    plt.show()

    # 绘制预测与真实标签
    plt.figure(figsize=(14, 7))
    plt.scatter(range(len(sample_labels)), sample_labels, c='g', label='真实值')

    predicted_one_indices = np.where(sample_pred == 1)[0]
    predicted_zero_indices = np.where(sample_pred == 0)[0]

    plt.scatter(predicted_one_indices, sample_pred[predicted_one_indices], c='r', s=0.7, label='预测标签 (1)')
    plt.scatter(predicted_zero_indices, sample_pred[predicted_zero_indices], c='y', s=0.7, label='预测标签 (0)')

    plt.legend()
    plt.title('预测与实际标签的对比')
    plt.show()

    # 计算精度和准确率
    predicted_positives = (sample_pred == 1)
    true_positives = np.sum((sample_pred == 1) & (sample_labels == 1))
    actual_positives = np.sum(sample_labels == 1)
    true_negatives = np.sum((sample_pred == 0) & (sample_labels == 0))
    false_positives = np.sum(predicted_positives) - true_positives
    false_negatives = actual_positives - true_positives

    precision = true_positives / (true_positives + false_positives)
    accuracy = (true_positives + true_negatives) / (true_positives + false_positives + true_negatives + false_negatives)

    print(f"样本 {index} Accuracy: {accuracy:.4f}")
    print(f"样本 {index} Precision: {precision:.4f}\n")

ModuleNotFoundError: No module named 'tensorflow'

In [18]:
# 计算平均精确率
average_precision = np.mean(precision)

# 计算平均准确率
average_accuracy = np.mean([accuracies[i] for i in top_indices])

print(f"二十个样本的精确率: {average_precision:.4f}")
print(f"二十个样本的准确率: {average_accuracy:.4f}")

二十个样本的精确率: 1.0000
二十个样本的准确率: 0.9604
