In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import onnxruntime as ort
import pickle
import plotly.express as px
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from pykalman import KalmanFilter
from sklearn.impute import KNNImputer
from tqdm import tqdm

# --- Configuration ---
DATASET_PATH = "../datasets/MASTER_DATASET.csv"
MODEL_PATH = "../artifacts/rul_model.onnx"
SCALER_PATH = "../artifacts/rul_scaler.pkl"

# --- 1. Load Data, Model, and Scaler ---
print("Loading data and artifacts...")
df = pd.read_csv(DATASET_PATH)
session = ort.InferenceSession(MODEL_PATH)
input_name = session.get_inputs()[0].name
with open(SCALER_PATH, 'rb') as f:
    scaler = pickle.load(f)
print("✅ Artifacts loaded successfully.")


# --- 2. Full Preprocessing Pipeline (must be identical to training) ---
print("\nStarting full preprocessing pipeline...")

# Kalman Filter
kalman_filter_columns = ["Pressure_In", "Temperature_In", "Vibration", "Temperature_Out"]
for col in tqdm(kalman_filter_columns, desc="Applying Kalman Filter"):
    kf = KalmanFilter(initial_state_mean=df[col].mean(), n_dim_obs=1)
    em_kf = kf.em(df[col].values)
    smoothed_values = em_kf.smooth(df[col].values)[0]
    df[col] = smoothed_values.flatten()

# KNN Imputer
numeric_cols = df.select_dtypes(include=np.number).columns
imputer = KNNImputer(n_neighbors=3)
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])

# Feature Selection
feature_cols = scaler.get_feature_names_out()
df_features = df[feature_cols]

# Scaling
data_scaled = scaler.transform(df_features)

# Sequence Creation
window_size = session.get_inputs()[0].shape[1] # e.g., 60
def create_sequences(data, window_size):
    Xs = []
    for i in tqdm(range(len(data) - window_size), desc="Creating Sequences"):
        Xs.append(data[i : i + window_size])
    return np.array(Xs, dtype=np.float32)

X = create_sequences(data_scaled, window_size)
y_true = df['Label'].values[window_size:] # Ground truth RUL values

print("✅ Preprocessing complete.")


# --- 3. Run Prediction on All Sequences ---
print("\nRunning predictions on all sequences...")
y_pred = session.run(None, {input_name: X})[0].flatten()
print("✅ Prediction complete.")


# --- 4. Quantitative Evaluation (Metrics) ---
print("\n--- RUL Model Performance Metrics ---")
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R²) Score:     {r2:.4f}")


# --- 5. Visual Evaluation ---
print("\n--- Generating Interactive Visualization ---")
# Create a DataFrame for easy plotting
plot_df = pd.DataFrame({'Actual RUL': y_true, 'Predicted RUL': y_pred})

fig = px.scatter(
    plot_df,
    x="Actual RUL",
    y="Predicted RUL",
    title="RUL Model Performance: Actual vs. Predicted",
    opacity=0.5,
    trendline="ols", # Adds a regression line to show the trend
    trendline_color_override="red"
)
# Add a perfect prediction line (y=x) for comparison
fig.add_shape(type='line', x0=y_true.min(), y0=y_true.min(), x1=y_true.max(), y1=y_true.max(),
              line=dict(color='green', width=2, dash='dash'), name='Perfect Prediction')

fig.show()

Loading data and artifacts...
✅ Artifacts loaded successfully.

Starting full preprocessing pipeline...


Applying Kalman Filter: 100%|██████████| 4/4 [22:44<00:00, 341.13s/it]
Creating Sequences: 100%|██████████| 99940/99940 [00:00<00:00, 1998906.75it/s]


✅ Preprocessing complete.

Running predictions on all sequences...
✅ Prediction complete.

--- RUL Model Performance Metrics ---
Mean Absolute Error (MAE): 0.2842
R-squared (R²) Score:     0.7223

--- Generating Interactive Visualization ---
