In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import warnings
warnings.filterwarnings(action="ignore")

In [2]:
# Reading Pre-processed Data
def read(data_path):
    df = pd.read_csv(data_path)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.sort_values(by=['Turbine_ID','Timestamp'], inplace=True)
    return df

data = {}
for system, path in {
    "GEARBOX": "./data/GEARBOX_processed.csv",
    "BEARING": "./data/GENERATOR_BEARING_processed.csv",
    "GENERATOR": "./data/GENERATOR_processed.csv",
    "HYDRAULIC": "./data/HYDRAULIC_GROUP_processed.csv",
    "TRANSFORMER": "./data/TRANSFORMER_processed.csv",
    "ALL": "./data/all_signals_processed.csv"
    }.items():
    data[system] = read(path)

In [3]:
# Function to obtain X for past n steps, Y and corresponding lead time
def get_XY_with_steps(data, x_steps=1):
    X, Y, lead, event = [], [], [], []
    for i in range(x_steps, len(data)-1):
        x = data.iloc[i-x_steps:i+1, 2:-4].to_numpy()
        X.append(x.flatten())
        Y.append(data.iloc[i+1, -1])
        lead.append(data.iloc[i+1, -2])
        event.append(data.iloc[i+1, 0] + " " + data.iloc[i+1, -3])
    return {
        "X": X,
        "Y": Y,
        "Lead Time": lead,
        "Event": event
        }

# Function to undersample the majority class so that the data is balanced
def undersample_majority(x, y):
    class_counts = np.bincount(y)
    minority_class = np.argmin(class_counts)
    majority_class = np.argmax(class_counts)
    majority_to_keep = class_counts[minority_class]
    majority_idx = np.where(y == majority_class)[0]
    majority_idx_new = np.random.choice(majority_idx, majority_to_keep, replace=False)
    minority_idx = np.where(y == minority_class)[0]
    idx_to_keep = np.concatenate((majority_idx_new, minority_idx))
    return x[idx_to_keep], y[idx_to_keep]

In [4]:
# Taking Gearbox data for example
df = data["GEARBOX"]

# Parameters
test_size = 0.25
val_size = 0.15
steps = 6*24

# Seperate and save test data for later use
test_data = df.sort_values(by='Timestamp')[:int(len(df) * test_size)].reset_index(drop=True)
train_data = df.sort_values(by='Timestamp')[int(len(df) * test_size):].reset_index(drop=True)

# Split training data into X, Y and lead time
train_xy = train_data.groupby("Turbine_ID").apply(lambda group: get_XY_with_steps(group, steps)).reset_index()
X_train, Y_train = [], []
for _, row in train_xy.iterrows():
    X_train = X_train + row[0]['X']
    Y_train = Y_train + row[0]['Y']
X_train, Y_train = np.array(X_train), np.array(Y_train)

# Balance the training data
X_train_balanced, Y_train_balanced = undersample_majority(X_train, Y_train)

# Seperate and save validation data
X_train, X_val, y_train, y_val = train_test_split(X_train_balanced, Y_train_balanced, test_size=val_size, random_state=66)

In [25]:
X_train.shape, y_train.shape

((26110, 6090), (26110,))

In [16]:
# Create model
def make_model(input_size):
    model = Sequential([
        Dense(512, activation="relu", input_shape=(input_size,)),
        Dense(128, activation="relu"),
        Dense(128, activation="relu"),
        Dense(1, activation="sigmoid")
    ])
    return model

model = make_model(X_train.shape[1])

# Build early stopping feature to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, verbose=1, mode='min', restore_best_weights=True)

# Class weight that punish false positive rate
class_weights = {0: 1, 1: 5}

# Compile and train
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(
    X_train, y_train.astype(float), 
    class_weight=class_weights,
    validation_data=(X_val, y_val.astype(float)), 
    epochs=100, 
    batch_size=32, 
    callbacks=[early_stopping]
    )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 20: early stopping


In [17]:
# Obtain evaluation metrics for the model
def evaluate_model(y_pred, y_true):
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy}")
    precision = precision_score(y_true, y_pred)
    print(f"Precision: {precision}")
    recall = recall_score(y_true, y_pred)
    print(f"Recall: {recall}")
    f1 = f1_score(y_true, y_pred)
    print(f"F1 Score: {f1}")

y_pred = model.predict(X_val)
y_pred = (y_pred > 0.5).astype(int).flatten()
y_true = y_val.astype(int)

evaluate_model(y_pred, y_true)

Accuracy: 0.740234375
Precision: 0.6604042129234273
Recall: 0.9982788296041308
F1 Score: 0.7949289018331335


In [21]:
# Split testing data into X, Y and lead time
test_xy = test_data.groupby("Turbine_ID").apply(lambda group: get_XY_with_steps(group, steps)).reset_index()
X_test, Y_test, Y_lead, Y_event = [], [], [], []
for _, row in test_xy.iterrows():
    X_test = X_test + row[0]['X']
    Y_test = Y_test + row[0]['Y']
    Y_lead = Y_lead + row[0]['Lead Time']
    Y_event = Y_event + row[0]['Event']
X_test, Y_test, Y_lead, Y_event = np.array(X_test), np.array(Y_test), np.array(Y_lead), np.array(Y_event)
X_test.shape, Y_test.shape

((107803, 6090), (107803,))

In [22]:
y_true = Y_test.astype(int)
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int).flatten()



In [26]:
FP, FN, TP_lead = 0, 0, []
warnings = {}
for i in range(len(Y_test)):
    if y_true[i] == 0 and y_pred[i] == 1:
        FP += 1
    elif y_true[i] == 1:
        event = Y_event[i]
        lead_time = Y_lead[i]
        if event not in warnings and y_pred[i] == 0:
            warnings[event] = -1
        elif event not in warnings and y_pred[i] == 1:
            warnings[event] = lead_time
        elif event in warnings and warnings[event] < 0:
            if y_pred[i] == 1:
                warnings[event] = lead_time
for event, lead in warnings.items():
    if lead < 0:
        FN += 1
    else:
        TP_lead.append(lead)

FP, FN, TP_lead

(1073, 0, [59])

In [27]:
R, M, I = 100000, 20000, 5000
savings = 0
for l in TP_lead:
    savings += (l / 60) * (R - M)
savings -= FP * I
savings

-5286333.333333333

In [11]:
# Save Model
# model.save('./saved_models/model_0404')

INFO:tensorflow:Assets written to: ./saved_models/model_0404\assets


INFO:tensorflow:Assets written to: ./saved_models/model_0404\assets


In [20]:
# To load the model later
# model = load_model('./saved_models/model_0404')