In [1]:
from LSTMFault import LSTMAutoencoder
import train_test
import utils
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np 
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import glob
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Matplotlib is building the font cache; this may take a moment.


In [2]:
columns_to_drop_WFA = ['time_stamp','train_test', 'asset_id', 'id', 'status_type_id','wind_speed_3_avg', 'wind_speed_4_avg',
       'wind_speed_3_max', 'wind_speed_3_min', 'wind_speed_3_std', 'reactive_power_27_avg', 'reactive_power_27_max',
       'reactive_power_27_min', 'reactive_power_27_std',
       'reactive_power_28_avg', 'reactive_power_28_max',
       'reactive_power_28_min', 'reactive_power_28_std', 'power_29_avg',
       'power_29_max', 'power_29_min', 'power_29_std', 'power_30_avg',
       'power_30_max', 'power_30_min', 'power_30_std']

folder_path = r'../data/Care_To_Compare/Wind Farm A/datasets/'

# Get list of all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# # Read and concatenate all CSV files
df_list = [pd.read_csv(file, sep=';') for file in csv_files]
df = pd.concat(df_list, ignore_index=True)

sensor_columns =  df.columns.drop(columns_to_drop_WFA)
data = df[sensor_columns].values

In [3]:
train_data = data

X_train = utils.create_sequences(data, 5)
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
batch_size = 16
train_loader = DataLoader(X_train, batch_size=batch_size, shuffle=True)

# Model parameters
input_size = X_train.shape[2]  # Number of sensor features
hidden_size = 64  # Adjustable
num_layers = 5  # Number of LSTM layers

model = LSTMAutoencoder(input_size, hidden_size, num_layers)
model = model.to(device)

In [8]:
scaler = torch.load('minmax_fault_model_WFA.pt',weights_only=False)
model_weights = 'lstm_fault_model_WFA.pt'
model= torch.load(model_weights, map_location=device,weights_only=False)

train_errors, threshold = train_test.eval_model(model, train_loader, device, k=6)
print(f"Threshold: {threshold}")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Threshold: nan


In [10]:
threshold

np.float32(nan)

In [12]:
## Run inference on new Data:
file_path = r'../data/Care_To_Compare/Wind Farm A/datasets/26.csv'

new_errors, local_threshold, global_anomalies, local_anomalies, timestamps = utils.process_new_data(model,file_path, columns_to_drop_WFA, scaler, batch_size, 0.0004, device)

In [27]:
pd.options.plotting.backend = "plotly"
import plotly.express as px
anomaly_info = list(zip(timestamps, new_errors, global_anomalies, local_anomalies))
anomaly_df = pd.DataFrame(anomaly_info, columns=['time_stamp', 'reconstruction_error', 'is_anomaly_global', 'is_anomaly_local'])
fig = anomaly_df.plot(x = 'time_stamp', y = 'reconstruction_error')
scatter = px.scatter(anomaly_df, x='time_stamp', y='reconstruction_error', color='is_anomaly_local')

fig.add_trace(scatter.data[0])
fig.add_trace(scatter.data[1])

fig.update_layout(width = 1000, height = 500)
