In [1]:
import pandas as pd
import numpy as np

In [2]:
# 训练集和测试集文件路径
train_path = r'D:\xjtufiles\3ee\DGA\datasets\DGA_data_add_cleaned_train.csv'
test_path = r'D:\xjtufiles\3ee\DGA\datasets\DGA_data_add_cleaned_test.csv'

# 预处理训练集
df_train = pd.read_csv(train_path)

df_train['h2'] = pd.to_numeric(df_train['h2'], errors='coerce')
df_train['ch4'] = pd.to_numeric(df_train['ch4'], errors='coerce')
df_train['c2h6'] = pd.to_numeric(df_train['c2h6'], errors='coerce')
df_train['c2h4'] = pd.to_numeric(df_train['c2h4'], errors='coerce')
df_train['c2h2'] = pd.to_numeric(df_train['c2h2'], errors='coerce')
df_train['act'] = pd.to_numeric(df_train['act'], errors='coerce')

df_train = df_train.reset_index(drop=True)  # 重置索引

# 预处理测试集
df_test = pd.read_csv(test_path)

df_test['h2'] = pd.to_numeric(df_test['h2'], errors='coerce')
df_test['ch4'] = pd.to_numeric(df_test['ch4'], errors='coerce')
df_test['c2h6'] = pd.to_numeric(df_test['c2h6'], errors='coerce')
df_test['c2h4'] = pd.to_numeric(df_test['c2h4'], errors='coerce')
df_test['c2h2'] = pd.to_numeric(df_test['c2h2'], errors='coerce')
df_test['act'] = pd.to_numeric(df_test['act'], errors='coerce')

df_test = df_test.reset_index(drop=True)  # 重置索引

# 现在，df_train 和 df_test 已经分别完成了预处理
print("训练集预处理完成，形状：", df_train.shape)
print("测试集预处理完成，形状：", df_test.shape)

训练集预处理完成，形状： (1436, 6)
测试集预处理完成，形状： (160, 6)


In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
# Dornenburg Ratio Method (DRM)
def drm_diagnosis(df):
    df['r1'] = df['c2h2'] / df['c2h4']
    df['r2'] = df['ch4'] / df['h2']
    df['r3'] = df['c2h4'] / df['c2h6']
    df['r4'] = df['c2h2'] / df['ch4']

    def diagnose(row):
        r1, r2, r3, r4 = row['r1'], row['r2'], row['r3'], row['r4']
        if r1 > 1 and r3 > 1:
            return 3  # 电弧放电
        elif r2 > 1 and r3 < 1:
            return 2  # 过热
        elif r1 < 0.5 and r2 < 1 and r3 > 1:
            return 1 # 低能量放电
        else:
            return 0  # 正常

    df['drm_diagnosis'] = df.apply(diagnose, axis=1)
    return df

# HAE Diagnosis Method
def hae_diagnosis(df):
    total_gas = df['c2h2'] + df['c2h4'] + df['ch4']
    df['c2h2_percent'] = df['c2h2'] / total_gas * 100
    df['c2h4_percent'] = df['c2h4'] / total_gas * 100
    df['ch4_percent'] = df['ch4'] / total_gas * 100

    def diagnose(row):
        c2h2, c2h4, ch4 = row['c2h2_percent'], row['c2h4_percent'], row['ch4_percent']
        if c2h2 > 50:
            return 3  # 电弧放电
        elif c2h4 > 50:
            return 2  # 过热
        elif ch4 > 50 :
            return 1 # 低能量放电
        else:
            return 0  # 正常

    df['hae_diagnosis'] = df.apply(diagnose, axis=1)
    return df

# 应用诊断方法
df_test_drm = drm_diagnosis(df_test.copy())
df_test_hae = hae_diagnosis(df_test.copy())

# 性能评估
def evaluate(y_true, y_pred, method_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    print(f"{method_name} 诊断结果:")
    print(f"  准确率: {accuracy:.4f}")
    print(f"  精确率: {precision:.4f}")
    print(f"  召回率: {recall:.4f}")
    print(f"  F1分数: {f1:.4f}")

# 评估DRM
evaluate(df_test_drm['act'], df_test_drm['drm_diagnosis'], "DRM")

# 评估HAE
evaluate(df_test_hae['act'], df_test_hae['hae_diagnosis'], "HAE")

DRM 诊断结果:
  准确率: 0.1313
  精确率: 0.1646
  召回率: 0.1313
  F1分数: 0.1390
HAE 诊断结果:
  准确率: 0.4062
  精确率: 0.3386
  召回率: 0.4062
  F1分数: 0.3678


In [5]:
# 改进三比值法 (ITR)
def itr_diagnosis(df):
    df['r1'] = df['c2h2'] / df['c2h4']
    df['r2'] = df['ch4'] / df['h2']
    df['r3'] = df['c2h4'] / df['c2h6']

    def diagnose(row):
        r1, r2, r3 = row['r1'], row['r2'], row['r3']

        if r1 > 1:
            if r2 < 0.1 and r3 > 1:
                return 3  # 电弧放电
            elif r2 > 1 and r3 < 0.5:
                return 3 # 电弧放电
            else:
                return 3 # 电弧放电
        elif r2 > 1:
            if r1 < 0.1 and r3 < 1:
                return 2 # 过热
            elif r1 < 1 and r3 < 0.5:
                return 2 # 过热
            else:
                return 2 # 过热
        elif r3 > 1:
            if r1 < 1 and r2 < 1:
                return 1 # 低能量放电
            else:
                return 1 # 低能量放电
        else:
            return 0  # 正常

    df['itr_diagnosis'] = df.apply(diagnose, axis=1)
    return df

# 应用ITR诊断方法
df_test_itr = itr_diagnosis(df_test.copy())

# 性能评估
def evaluate(y_true, y_pred, method_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    print(f"{method_name} 诊断结果:")
    print(f"  准确率: {accuracy:.4f}")
    print(f"  精确率: {precision:.4f}")
    print(f"  召回率: {recall:.4f}")
    print(f"  F1分数: {f1:.4f}")

# 评估ITR
evaluate(df_test_itr['act'], df_test_itr['itr_diagnosis'], "ITR")

ITR 诊断结果:
  准确率: 0.4625
  精确率: 0.3165
  召回率: 0.4625
  F1分数: 0.3709


In [6]:
# 原始三比值法 (IEC Ratio Method)
def iec_diagnosis(df):
    df['r1'] = df['c2h2'] / df['c2h4']
    df['r2'] = df['ch4'] / df['h2']
    df['r3'] = df['c2h4'] / df['c2h6']

    def diagnose(row):
        r1, r2, r3 = row['r1'], row['r2'], row['r3']

        if r1 < 0.1 and r2 < 0.1 and r3 < 0.1:
            return 0  # 正常
        elif r1 > 1 and r3 > 1:
            return 3  # 电弧放电
        elif r2 > 1 and r3 < 1:
            return 2  # 过热
        elif r3 > 1:
            return 1 # 低能量放电
        else:
            return 0  # 默认正常

    df['iec_diagnosis'] = df.apply(diagnose, axis=1)
    return df

# 大卫三角法 (HAE Diagnosis Method)
def hae_diagnosis(df):
    total_gas = df['c2h2'] + df['c2h4'] + df['ch4']
    df['c2h2_percent'] = df['c2h2'] / total_gas * 100
    df['c2h4_percent'] = df['c2h4'] / total_gas * 100
    df['ch4_percent'] = df['ch4'] / total_gas * 100

    def diagnose(row):
        c2h2, c2h4, ch4 = row['c2h2_percent'], row['c2h4_percent'], row['ch4_percent']
        if c2h2 > 50:
            return 3  # 电弧放电
        elif c2h4 > 50:
            return 2  # 过热
        elif ch4 > 50 :
            return 1 # 低能量放电
        else:
            return 0  # 正常

    df['hae_diagnosis'] = df.apply(diagnose, axis=1)
    return df

# 应用诊断方法
df_test_iec = iec_diagnosis(df_test.copy())
df_test_hae = hae_diagnosis(df_test.copy())

# 性能评估
def evaluate(y_true, y_pred, method_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    print(f"{method_name} 诊断结果:")
    print(f"  准确率: {accuracy:.4f}")
    print(f"  精确率: {precision:.4f}")
    print(f"  召回率: {recall:.4f}")
    print(f"  F1分数: {f1:.4f}")

# 评估IEC
evaluate(df_test_iec['act'], df_test_iec['iec_diagnosis'], "IEC")

# 评估HAE
evaluate(df_test_hae['act'], df_test_hae['hae_diagnosis'], "HAE")

IEC 诊断结果:
  准确率: 0.1500
  精确率: 0.1316
  召回率: 0.1500
  F1分数: 0.1240
HAE 诊断结果:
  准确率: 0.4062
  精确率: 0.3386
  召回率: 0.4062
  F1分数: 0.3678
