In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf

# Load dataset
file_path_2 = "../data/guidewire-2.csv"
df = pd.read_csv(file_path_2)

# Convert Timestamp to datetime
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
df = df.sort_values(by="Timestamp")

# Select relevant features
features = ["CPU Usage (%)", "Memory Usage (%)", "Memory Requests (%)",
            "FS Reads Total (MB)", "FS Writes Total (MB)"]

# Normalize features
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Generate binary labels (1 = resource exhaustion, 0 = normal)
df["Resource Exhaustion"] = ((df["CPU Usage (%)"] > 0.9) | (df["Memory Usage (%)"] > 0.9)).astype(int)

# Convert to NumPy array
data = df[features].values
labels = df["Resource Exhaustion"].values

# Create sequences for LSTM (past 10 steps)
def create_sequences(data, labels, seq_length=10):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(labels[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 10
X, y = create_sequences(data, labels, seq_length)

# Split into train and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [5]:
# Build LSTM Model
lstm_model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(seq_length, X.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

# Compile Model
lstm_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train Model
lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Get LSTM Predictions
lstm_preds = lstm_model.predict(X_test).flatten()
lstm_preds = (lstm_preds > 0.5).astype(int)  # Convert probabilities to binary


2025-03-23 21:13:23.140581: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-03-23 21:13:23.140608: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-03-23 21:13:23.140616: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1742744603.140626 3143114 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1742744603.140644 3143114 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)


Epoch 1/10


2025-03-23 21:13:24.284228: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.9843 - loss: 0.0757 - val_accuracy: 0.9547 - val_loss: 0.2185
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.9918 - loss: 0.0464 - val_accuracy: 0.9547 - val_loss: 0.2004
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.9918 - loss: 0.0460 - val_accuracy: 0.9547 - val_loss: 0.1934
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.9923 - loss: 0.0432 - val_accuracy: 0.9547 - val_loss: 0.1928
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - accuracy: 0.9920 - loss: 0.0441 - val_accuracy: 0.9547 - val_loss: 0.2056
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.9924 - loss: 0.0421 - val_accuracy: 0.9547 - val_loss: 0.1981
Epoch 7/10
[1m

In [6]:
# Flatten LSTM outputs for XGBoost
X_train_xgb = X_train.reshape(X_train.shape[0], -1)
X_test_xgb = X_test.reshape(X_test.shape[0], -1)

# Train XGBoost Classifier
xgb_model = XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1)
xgb_model.fit(X_train_xgb, y_train)

# Get XGBoost Predictions
xgb_preds = xgb_model.predict(X_test_xgb)


In [7]:
# Hybrid model decision: if either LSTM or XGBoost predicts exhaustion, classify as exhaustion
final_preds = np.logical_or(lstm_preds, xgb_preds).astype(int)

# Evaluate
print("Accuracy:", accuracy_score(y_test, final_preds))
print(classification_report(y_test, final_preds))


Accuracy: 0.9546954695469547
              precision    recall  f1-score   support

           0       0.95      1.00      0.98     19092
           1       0.00      0.00      0.00       906

    accuracy                           0.95     19998
   macro avg       0.48      0.50      0.49     19998
weighted avg       0.91      0.95      0.93     19998



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
