In [6]:
import os
import sys
from pathlib import Path

SRC_PATH = Path(__name__).resolve().parents[1]

sys.path.append(str(SRC_PATH))

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf

from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
from sklearn.preprocessing import StandardScaler

from src.preprocess import MissingHandler


2024-09-30 15:19:31.541513: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-30 15:19:31.541678: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-30 15:19:31.557324: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-30 15:19:31.605736: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
pmu = pd.read_csv("../data/pmu.csv")

In [3]:
pmu.isnull().sum()

timestamp            0
frequency        11997
voltage          12053
current          12099
phase_angle      12062
anomaly_type         0
anomaly_class        0
dtype: int64

In [4]:
missng_handler = MissingHandler()
pmu = missng_handler.handle_missing_values(pmu)

In [5]:
pmu.isnull().sum()

timestamp        0
frequency        0
voltage          0
current          0
phase_angle      0
anomaly_type     0
anomaly_class    0
dtype: int64

In [7]:


scaler = StandardScaler()
scaled_data = scaler.fit_transform(pmu[['frequency', 'voltage', 'current', 'phase_angle']])

# 정상 상태 데이터만 사용하여 오토인코더 학습
normal_data = scaled_data[pmu['anomaly_class'] == 0]

# 오토인코더 모델 정의
def build_autoencoder(input_dim):
    input_layer = tf.keras.layers.Input(shape=(input_dim,))
    encoded = tf.keras.layers.Dense(32, activation='relu')(input_layer)
    encoded = tf.keras.layers.Dense(16, activation='relu')(encoded)
    encoded = tf.keras.layers.Dense(8, activation='relu')(encoded)
    
    decoded = tf.keras.layers.Dense(16, activation='relu')(encoded)
    decoded = tf.keras.layers.Dense(32, activation='relu')(decoded)
    decoded = tf.keras.layers.Dense(input_dim, activation='linear')(decoded)
    
    autoencoder = tf.keras.models.Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

In [8]:
# 모델 학습
input_dim = normal_data.shape[1]
autoencoder = build_autoencoder(input_dim)
autoencoder.fit(normal_data, normal_data, epochs=50, batch_size=32, shuffle=True, validation_split=0.2)


2024-09-30 15:19:48.911375: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:b3:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-09-30 15:19:48.960712: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7fee745e5120>

In [9]:
# 모든 데이터에 대해 재구성 오류 계산
reconstructed_data = autoencoder.predict(scaled_data)
reconstruction_error = np.mean(np.abs(reconstructed_data - scaled_data), axis=1)

# 이상 탐지 기준: 재구성 오류가 일정 임계값 이상이면 이상으로 간주
threshold = np.percentile(reconstruction_error[pmu['anomaly_class'] == 0], 99)  # 상위 5%를 임계값으로 설정
predictions = (reconstruction_error > threshold).astype(int)

# 성능 평가
f1 = f1_score(pmu['anomaly_class'], predictions)
precision = precision_score(pmu['anomaly_class'], predictions)
recall = recall_score(pmu['anomaly_class'], predictions)
accuracy = accuracy_score(pmu['anomaly_class'], predictions)

print(f"accuracy Score: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

print(confusion_matrix(pmu['anomaly_class'], predictions))

accuracy Score: 0.9900
F1 Score: 0.0053
Precision: 0.0027
Recall: 0.5333
[[593970   6000]
 [    14     16]]


In [None]:
# 재구성 오류 시각화
def visualize_reconstruction_error(reconstruction_error, predictions, anomaly_class, threshold):
    plt.figure(figsize=(12, 6))
    plt.plot(reconstruction_error, label='Reconstruction Error')
    plt.axhline(y=threshold, color='r', linestyle='--', label='Threshold')
    
    # 이상 탐지된 부분 표시
    anomaly_idx = np.where(predictions == 1)[0]
    plt.scatter(anomaly_idx, reconstruction_error[anomaly_idx], color='red', label='Detected Anomalies', marker='x')
    
    # 실제 이상이 발생한 부분 표시
    real_anomalies_idx = np.where(anomaly_class == 1)[0]
    plt.scatter(real_anomalies_idx, reconstruction_error[real_anomalies_idx], color='green', label='True Anomalies', marker='o', facecolors='none')
    
    plt.title('Reconstruction Error with Anomalies')
    plt.xlabel('Sample Index')
    plt.ylabel('Reconstruction Error')
    plt.legend()
    plt.show()

# 재구성 오류 및 이상 탐지 결과 시각화
visualize_reconstruction_error(reconstruction_error, predictions, pmu['anomaly_class'].values, threshold)