<a href="https://colab.research.google.com/github/Sumit-Dwivedi/SHM-ML-model-for-pipelines/blob/main/SHM_For_Pipelines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#SHM project
!pip install pandas numpy matplotlib seaborn scikit-learn tensorflow

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from tensorflow import keras
from tensorflow.keras import layers

# Load the dataset
file_path = "/content/Merged_Pipeline_Data.xlsx"
xls = pd.ExcelFile(file_path, engine="openpyxl")  # Ensure correct engine for .xlsx

# Define the sheet names corresponding to different frequencies
sheets = xls.sheet_names  # ['Frequency_1', 'Frequency_2', ..., 'Frequency_5']

# Dictionary to store results
results = {}

for sheet in sheets:
    print(f"Processing {sheet}...")

    # Load the data for the current frequency
    df = pd.read_excel(xls, sheet_name=sheet)

    # Ensure correct row start (skip first row if necessary)
    if df.columns[0] != "Distance (m)":
        df = pd.read_excel(xls, sheet_name=sheet, skiprows=1)

    # Ensure "Acquisition Number" is numeric
    df["Acquisition Number"] = pd.to_numeric(df["Acquisition Number"], errors="coerce")

    # Filter Training Data (Acquisition 22-301)
    train_df = df[(df["Acquisition Number"] >= 22) & (df["Acquisition Number"] <= 301)]

    # Filter Testing Data (Acquisition 302-330)
    test_df = df[(df["Acquisition Number"] >= 302) & (df["Acquisition Number"] <= 330)]

    # Select relevant features
    features = ["Torsional (V)", "Flexural (V)"]
    train_X = train_df[features].astype(float)
    test_X = test_df[features].astype(float)

    # Normalize the data
    scaler = MinMaxScaler()
    train_X_scaled = scaler.fit_transform(train_X)
    test_X_scaled = scaler.transform(test_X)

    ## === Autoencoder Model === ##
    input_dim = train_X_scaled.shape[1]
    autoencoder = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(train_X_scaled, train_X_scaled, epochs=50, batch_size=16, verbose=0)

    # Compute reconstruction error
    train_pred = autoencoder.predict(train_X_scaled)
    train_errors = np.mean(np.abs(train_X_scaled - train_pred), axis=1)
    test_pred = autoencoder.predict(test_X_scaled)
    test_errors = np.mean(np.abs(test_X_scaled - test_pred), axis=1)

    ## === Isolation Forest === ##
    iso_forest = IsolationForest(contamination=0.05, random_state=42)
    iso_forest.fit(train_X_scaled)
    test_iso_scores = iso_forest.decision_function(test_X_scaled)  # Anomaly scores

    ## === One-Class SVM === ##
    oc_svm = OneClassSVM(nu=0.05, kernel="rbf", gamma="auto")
    oc_svm.fit(train_X_scaled)
    test_svm_scores = oc_svm.decision_function(test_X_scaled)  # Anomaly scores

    # Store results
    results[sheet] = {
        "test_errors": test_errors,
        "test_iso_scores": test_iso_scores,
        "test_svm_scores": test_svm_scores
    }

    # Plot results for the current frequency
    plt.figure(figsize=(10, 5))
    plt.plot(test_errors, label='Autoencoder Error', color='b')
    plt.plot(-test_iso_scores, label='Isolation Forest', color='r')
    plt.plot(-test_svm_scores, label='One-Class SVM', color='g')
    plt.title(f"Anomaly Detection for {sheet}")
    plt.legend()
    plt.show()

# Combined plot across all frequencies
plt.figure(figsize=(12, 6))
for sheet in sheets:
    plt.plot(results[sheet]["test_errors"], label=f'{sheet} (Autoencoder)')
plt.title("Comparison of Autoencoder Errors Across Frequencies")
plt.legend()
plt.show()
