In [None]:
import subprocess
import sys

def install_if_missing(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

packages = ["pandas", "numpy", "matplotlib", "scikit-learn", "tensorflow"]

for package in packages:
    install_if_missing(package)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf


file_path_2 = "../data/guidewire-2.csv"
df = pd.read_csv(file_path_2)

# Convert Timestamp to datetime
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
df = df.sort_values(by="Timestamp")

# Compute total network packets dropped
df["Network Packets Dropped (p/s)"] = df["Network Transmit Packets Dropped (p/s)"] + df["Network Receive Packets Dropped (p/s)"]


# Select network-related features
network_features = ["Network Receive Bytes", "Network Transmit Bytes",
                    "Network Receive Packets (p/s)", "Network Transmit Packets (p/s)",
                    "Network Packets Dropped (p/s)"]

# Normalize features
scaler = MinMaxScaler()
df[network_features] = scaler.fit_transform(df[network_features])

# Generate anomaly labels (1 = network failure, 0 = normal)
df["Network Failure"] = (df["Network Packets Dropped (p/s)"] > 0.05).astype(int)

# Convert to NumPy array
data = df[network_features].values
labels = df["Network Failure"].values

# Create sequences for LSTM (past 10 steps)
def create_sequences(data, labels, seq_length=10):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(labels[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 10
X, y = create_sequences(data, labels, seq_length)

# Split into train and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]




In [5]:
# Build LSTM Model
lstm_model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(seq_length, X.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

# Compile Model
lstm_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train Model
lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Get LSTM Predictions
lstm_preds = lstm_model.predict(X_test).flatten()
lstm_preds = (lstm_preds > 0.5).astype(int)  # Convert probabilities to binary


2025-03-23 20:50:38.995283: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-03-23 20:50:38.995360: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-03-23 20:50:38.995375: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1742743238.995453 3106048 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1742743238.995545 3106048 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)


Epoch 1/10


2025-03-23 20:50:40.107415: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 13ms/step - accuracy: 0.8035 - loss: 0.4675 - val_accuracy: 0.9523 - val_loss: 0.2356
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.8040 - loss: 0.4535 - val_accuracy: 0.9523 - val_loss: 0.2353
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.8039 - loss: 0.4497 - val_accuracy: 0.9523 - val_loss: 0.2109
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.8038 - loss: 0.4479 - val_accuracy: 0.9520 - val_loss: 0.2276
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.8071 - loss: 0.4447 - val_accuracy: 0.9523 - val_loss: 0.2065
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - accuracy: 0.8049 - loss: 0.4449 - val_accuracy: 0.9522 - val_loss: 0.2037
Epoch 7/10
[1m

In [6]:
# Autoencoder Model
input_dim = X_train.shape[2]
autoencoder_input = Input(shape=(seq_length, input_dim))

# Encoder
encoded = LSTM(64, return_sequences=True)(autoencoder_input)
encoded = LSTM(32, return_sequences=False)(encoded)

# Decoder
decoded = Dense(64, activation="relu")(encoded)
decoded = Dense(input_dim, activation="sigmoid")(decoded)

# Autoencoder Model
autoencoder = Model(autoencoder_input, decoded)
autoencoder.compile(optimizer="adam", loss="mse")

# Train Autoencoder
autoencoder.fit(X_train, X_train[:, -1, :], epochs=10, batch_size=32, validation_data=(X_test, X_test[:, -1, :]))

# Compute reconstruction errors
reconstructions = autoencoder.predict(X_test)
reconstruction_errors = np.mean(np.abs(reconstructions - X_test[:, -1, :]), axis=1)

# Threshold for anomaly detection
threshold = np.percentile(reconstruction_errors, 95)
anomaly_preds = (reconstruction_errors > threshold).astype(int)


Epoch 1/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - loss: 0.0194 - val_loss: 0.0029
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - loss: 9.2431e-04 - val_loss: 0.0019
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - loss: 4.9294e-04 - val_loss: 7.8140e-04
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - loss: 2.6284e-04 - val_loss: 3.8512e-04
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - loss: 1.1548e-04 - val_loss: 2.2461e-04
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - loss: 8.7575e-05 - val_loss: 2.0376e-04
Epoch 7/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - loss: 7.5694e-05 - val_loss: 2.6696e-04
Epoch 8/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/ste

In [7]:
# Hybrid model decision: if either LSTM or Autoencoder detects an anomaly, classify as failure
final_preds = np.logical_or(lstm_preds, anomaly_preds).astype(int)

# Evaluate
print("Accuracy:", accuracy_score(y_test, final_preds))
print(classification_report(y_test, final_preds))


Accuracy: 0.951995199519952
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       954
           1       0.95      1.00      0.98     19044

    accuracy                           0.95     19998
   macro avg       0.48      0.50      0.49     19998
weighted avg       0.91      0.95      0.93     19998

