In [41]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_excel("Clean_OBD_Sensors1.xlsx")

# Basic cleaning
df = df.select_dtypes(include=['float64','int64'])   # keep numeric
df = df.replace([np.inf, -np.inf], np.nan)            # remove infinite
df = df.ffill().bfill().fillna(0)                     # fill missing

print("üìä Dataset Shape:", df.shape)
df.head()


üìä Dataset Shape: (348, 9)


Unnamed: 0,Engine Load (%),Throttle (%),Short Fuel Trim (%),Long Fuel Trim (%),O2 Sensor B1S1 (V),O2 Sensor B1S2 (V),Fuel Pressure (kPa),MAF (g/s),Battery Voltage (V)
0,18.823529,14.509804,-4.6875,10.15625,0.615,0.475,0.0,0.0,14.4
1,18.823529,14.509804,-6.25,10.15625,0.69,0.48,0.0,0.0,14.3
2,18.823529,14.509804,-5.46875,10.15625,0.255,0.475,0.0,0.0,14.4
3,19.215686,14.509804,-4.6875,10.15625,0.27,0.475,0.0,0.0,14.3
4,18.823529,14.509804,-6.25,10.15625,0.665,0.48,0.0,0.0,14.3


In [42]:
from sklearn.preprocessing import StandardScaler
import joblib
import os

# Create models directory if missing
os.makedirs("models", exist_ok=True)

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# Save scaler
joblib.dump(scaler, "models/scaler.pkl")
print("‚úî Saved: models/scaler.pkl")

df_scaled[:5]


‚úî Saved: models/scaler.pkl


array([[-0.74999216, -0.5925061 , -2.15789691,  2.2514361 ,  0.4385873 ,
         0.30823775,  0.        ,  0.        ,  1.08107677],
       [-0.74999216, -0.5925061 , -2.8603741 ,  2.2514361 ,  0.64762909,
         0.33251723,  0.        ,  0.        ,  0.55119689],
       [-0.74999216, -0.5925061 , -2.50913551,  2.2514361 , -0.5648133 ,
         0.30823775,  0.        ,  0.        ,  1.08107677],
       [-0.73667937, -0.5925061 , -2.15789691,  2.2514361 , -0.52300494,
         0.30823775,  0.        ,  0.        ,  0.55119689],
       [-0.74999216, -0.5925061 , -2.8603741 ,  2.2514361 ,  0.5779485 ,
         0.33251723,  0.        ,  0.        ,  0.55119689]])

In [43]:
import torch
from torch.utils.data import TensorDataset, DataLoader

WINDOW = 30
X = []

for i in range(len(df_scaled) - WINDOW):
    X.append(df_scaled[i:i+WINDOW].flatten())

X = torch.tensor(X, dtype=torch.float32)
dataset = TensorDataset(X, X)

loader = DataLoader(dataset, batch_size=32, shuffle=True)
input_dim = WINDOW * df.shape[1]

print("üî• Final Training Shape:", X.shape)


üî• Final Training Shape: torch.Size([318, 270])


  X = torch.tensor(X, dtype=torch.float32)


In [44]:
import torch.nn as nn

class AutoEncoder(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128), nn.ReLU(),
            nn.Linear(128, 64), nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(64, 128), nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))

model = AutoEncoder(input_dim)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train model
for epoch in range(30):
    for batch,_ in loader:
        recon = model(batch)
        loss = loss_fn(recon, batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} | Loss: {loss.item():.6f}")

torch.save(model.state_dict(), "models/autoencoder.pth")
print("‚úî Saved: models/autoencoder.pth")


Epoch 1 | Loss: 0.705112
Epoch 2 | Loss: 0.651246
Epoch 3 | Loss: 0.573621
Epoch 4 | Loss: 0.583093
Epoch 5 | Loss: 0.458120
Epoch 6 | Loss: 0.437913
Epoch 7 | Loss: 0.412465
Epoch 8 | Loss: 0.406353
Epoch 9 | Loss: 0.379872
Epoch 10 | Loss: 0.353825
Epoch 11 | Loss: 0.331036
Epoch 12 | Loss: 0.310082
Epoch 13 | Loss: 0.288108
Epoch 14 | Loss: 0.268210
Epoch 15 | Loss: 0.244486
Epoch 16 | Loss: 0.283441
Epoch 17 | Loss: 0.284352
Epoch 18 | Loss: 0.259608
Epoch 19 | Loss: 0.235775
Epoch 20 | Loss: 0.259242
Epoch 21 | Loss: 0.232781
Epoch 22 | Loss: 0.252877
Epoch 23 | Loss: 0.254494
Epoch 24 | Loss: 0.212704
Epoch 25 | Loss: 0.244254
Epoch 26 | Loss: 0.242701
Epoch 27 | Loss: 0.213476
Epoch 28 | Loss: 0.205705
Epoch 29 | Loss: 0.200773
Epoch 30 | Loss: 0.174993
‚úî Saved: models/autoencoder.pth


In [45]:
# Based on your dataset scoring range
normal_th   = 0.50
warning_th  = 0.80
critical_th = 1.10


In [46]:
# Load Saved Models
scaler = joblib.load("models/scaler.pkl")
model = AutoEncoder(input_dim)
model.load_state_dict(torch.load("models/autoencoder.pth"))
model.eval()

def evaluate_window(window_data):
    # Convert back to dataframe to match scaler feature names
    window_df = pd.DataFrame(window_data, columns=df.columns)
    window = scaler.transform(window_df)
    window = torch.tensor(window, dtype=torch.float32).reshape(1, -1)

    recon = model(window).detach()
    score = torch.nn.functional.mse_loss(window, recon).item()

    # Scoring Calculations
    health_percent = max(0, 100 - ((score/critical_th)*100))
    failure_probability = min(100, (score/warning_th)*100)
    prediction_conf = min(99.9, abs(score-normal_th)/abs(critical_th-normal_th)*100)

    # Status
    if score <= normal_th:
        status = "üü¢ HEALTHY"
    elif score <= warning_th:
        status = "üü° WARNING"
    elif score <= critical_th:
        status = "üî¥ CRITICAL"
    else:
        status = "üî• FAILURE LIKELY"

    return {
        "anomaly_score": round(score, 5),
        "health_percent": round(health_percent, 2),
        "failure_probability": round(failure_probability, 2),
        "confidence": round(prediction_conf, 2),
        "status": status
    }

print("üöÄ READY TO RUN evaluate_window(data)")


üöÄ READY TO RUN evaluate_window(data)


In [47]:
results = []
for i in range(len(df) - WINDOW):
    window = df.iloc[i:i+WINDOW].values
    out = evaluate_window(window)
    out["start_index"] = i
    out["end_index"] = i+WINDOW
    results.append(out)

results_df = pd.DataFrame(results)
results_df.to_csv("models/full_dataset_results.csv", index=False)

print("üìÅ RESULTS SAVED: models/full_dataset_results.csv")
results_df.head()


üìÅ RESULTS SAVED: models/full_dataset_results.csv


Unnamed: 0,anomaly_score,health_percent,failure_probability,confidence,status,start_index,end_index
0,0.25177,77.11,31.47,41.37,üü¢ HEALTHY,0,30
1,0.22584,79.47,28.23,45.69,üü¢ HEALTHY,1,31
2,0.2242,79.62,28.03,45.97,üü¢ HEALTHY,2,32
3,0.22274,79.75,27.84,46.21,üü¢ HEALTHY,3,33
4,0.2226,79.76,27.82,46.23,üü¢ HEALTHY,4,34


In [48]:
print(df.columns.tolist())


['Engine Load (%)', 'Throttle (%)', 'Short Fuel Trim (%)', 'Long Fuel Trim (%)', 'O2 Sensor B1S1 (V)', 'O2 Sensor B1S2 (V)', 'Fuel Pressure (kPa)', 'MAF (g/s)', 'Battery Voltage (V)']


#RUL CALCULATION#

In [49]:
# Calculate reconstruction error trend across dataset for RUL
error_trend = results_df["anomaly_score"].values

# Smooth to reduce noise (moving average)
window_size = 10
smoothed_error = pd.Series(error_trend).rolling(window_size).mean().fillna(method='bfill')

results_df["smoothed_error"] = smoothed_error


  smoothed_error = pd.Series(error_trend).rolling(window_size).mean().fillna(method='bfill')


In [50]:
# Slope calculation (rate of increase in error)
from sklearn.linear_model import LinearRegression
import numpy as np

X_time = np.arange(len(smoothed_error)).reshape(-1, 1)
y_error = smoothed_error.values.reshape(-1, 1)

model_lr = LinearRegression().fit(X_time, y_error)
slope = model_lr.coef_[0][0]  # degradation rate

print("üìâ Degradation Slope:", slope)


üìâ Degradation Slope: -0.0003177662176400532


In [51]:
# Prevent divide by zero
if slope <= 0:
    RUL_hours = np.inf
else:
    current_error = smoothed_error.iloc[-1]
    distance_to_failure = critical_th - current_error
    RUL_hours = distance_to_failure / slope


In [52]:
RUL_hours = max(0, RUL_hours)  # never negative
RUL_days = RUL_hours / 24      # convert to days


In [56]:
import numpy as np

# ---------------------------------
# 1Ô∏è‚É£ Pull latest error values
# ---------------------------------
smoothed_error = results_df["smoothed_error"].values
current_error = smoothed_error[-1]  # last window error
critical_th = 0.25  # !! MODIFY based on your model threshold or desired sensitivity


# ---------------------------------
# 2Ô∏è‚É£ Degradation slope (already computed earlier)
# ---------------------------------
# (Run this if not done before)
from sklearn.linear_model import LinearRegression

X_time = np.arange(len(smoothed_error)).reshape(-1, 1)
y_error = smoothed_error.reshape(-1, 1)
model_lr = LinearRegression().fit(X_time, y_error)
slope = float(model_lr.coef_[0])


# ---------------------------------
# 3Ô∏è‚É£ RUL Calculation
# ---------------------------------
if slope <= 0:
    RUL_hours = np.inf
    RUL_days = np.inf
else:
    distance_to_failure = critical_th - current_error
    RUL_hours = max(0, distance_to_failure / slope)
    RUL_days = RUL_hours / 24

# ---------------------------------
# 4Ô∏è‚É£ Failure Risk & Confidence
# ---------------------------------
failure_risk_percent = min(100, max(0, (current_error / critical_th) * 100))
prediction_confidence = round((1 - (current_error / critical_th)) * 100, 2)
health_condition = round(max(0, (1 - current_error / critical_th) * 100), 2)

severity = (
    "üü¢ HEALTHY" if failure_risk_percent < 40 else
    "üü° WARNING" if failure_risk_percent < 70 else
    "üî¥ CRITICAL"
)

advice = (
    "‚úî Normal operation OK." if RUL_days > 30 else
    "‚ö† Schedule maintenance soon." if RUL_days > 7 else
    "‚õî URGENT: Inspect immediately."
)


# ---------------------------------
# 5Ô∏è‚É£ Final Combined Output
# ---------------------------------
RUL_report = {
    "Current Error": round(current_error, 5),
    "Failure Threshold": critical_th,
    "Failure Risk %": round(failure_risk_percent, 2),
    "Health Condition %": health_condition,
    "Prediction Confidence %": prediction_confidence,
    "Degradation Slope": round(slope, 6),
    "RUL (Hours)": round(RUL_hours, 2),
    "RUL (Days)": round(RUL_days, 2),
    "Status": severity,
    "Maintenance Advice": advice
}

print("\nüìå RUL & Health Report")
for k, v in RUL_report.items():
    print(f"{k}: {v}")



üìå RUL & Health Report
Current Error: 0.17156
Failure Threshold: 0.25
Failure Risk %: 68.62
Health Condition %: 31.38
Prediction Confidence %: 31.38
Degradation Slope: -0.000318
RUL (Hours): inf
RUL (Days): inf
Maintenance Advice: ‚úî Normal operation OK.


  slope = float(model_lr.coef_[0])


In [62]:
import numpy as np

# ===============================
# üìå REQUIRED INPUTS
# ===============================
# Use your last calculated anomaly errors list:
# If you already have anomaly scores list, replace it below.
# Otherwise we simulate a list from previous windows.

# Example: from your sliding window model
if "errors_over_time" in globals():
    last_100_errs = errors_over_time[-100:]  # take last 100 from model run
else:
    last_100_errs = np.array([current_error])  # fallback if only single window was tested

# Ensure it's a numpy array always
last_100_errs = np.array(last_100_errs).flatten()

# ===============================
# üìå SAFETY CHECK (PREVENT ERRORS)
# ===============================
if len(last_100_errs) < 2:
    # Add artificial stability trend if too few samples
    last_100_errs = np.concatenate([last_100_errs, last_100_errs + 1e-6])

# ===============================
# üìå REQUIRED MAIN VARIABLES
# ===============================
current_error = float(current_error)    # from model evaluation
critical_th   = float(critical_th)      # threshold you set

failure_risk_percent     = min(100, (current_error / critical_th) * 100)
health_condition          = round(max(0, 100 - failure_risk_percent), 2)
prediction_confidence     = round(100 - abs(health_condition - failure_risk_percent), 2)

# ===============================
# üìå SLOPE / DEGRADATION RATE
# ===============================
from sklearn.linear_model import LinearRegression

x = np.arange(len(last_100_errs)).reshape(-1,1)
model_lr = LinearRegression().fit(x, last_100_errs)
slope = float(model_lr.coef_[0])

# ===============================
# üìå RUL CALCULATION
# ===============================
if slope <= 0:
    RUL_hours = float("inf")
    RUL_days = float("inf")
    rul_state = "‚àû (Stable / No active failure trend)"
else:
    distance_to_failure = max(0, critical_th - current_error)
    RUL_hours = distance_to_failure / slope
    RUL_days = RUL_hours / 24
    rul_state = f"{round(RUL_days,2)} days (~{round(RUL_hours,2)} hrs)"

# ===============================
# üìå HEALTH & STATUS DECISION
# ===============================
if current_error < critical_th * 0.50:
    severity = "üü¢ HEALTHY - Running Normal"
    advice = "‚úî Stable operation. No maintenance required."
elif current_error < critical_th:
    severity = "üü° WARNING - Monitor Soon"
    advice = "‚ö† Inspect components, rising failure trend spotted."
else:
    severity = "üî¥ FAILURE LIKELY"
    advice = "üö® Immediate inspection needed. Risk above safe limit."

# Override behavior if slope negative = recovering
if slope < 0:
    severity = "üü¢ STABLE / RECOVERING"
    advice = "‚úî System improving. Continue observation."

# ===============================
# üìå FINAL REPORT OUTPUT
# ===============================
RUL_report = {
    "Current Error": round(current_error, 5),
    "Failure Threshold": critical_th,
    "Failure Risk %": round(failure_risk_percent, 2),
    "Health Condition %": health_condition,
    "Prediction Confidence %": prediction_confidence,
    "Degradation Slope": round(slope, 6),
    "RUL (Hours)": "‚àû" if RUL_hours == float("inf") else round(RUL_hours, 2),
    "RUL (Days)": "‚àû" if RUL_days == float("inf") else round(RUL_days, 2),
    "RUL Status": rul_state,
    "System Status": severity,
    "Maintenance Advice": advice
}

print("\nüìå RUL & HEALTH REPORT")
for k,v in RUL_report.items():
    print(f"{k}: {v}")



üìå RUL & HEALTH REPORT
Current Error: 0.17156
Failure Threshold: 0.25
Failure Risk %: 68.62
Health Condition %: 31.38
Prediction Confidence %: 62.76
Degradation Slope: 1e-06
RUL (Hours): 78445.0
RUL (Days): 3268.54
RUL Status: 3268.54 days (~78445.0 hrs)
Maintenance Advice: ‚ö† Inspect components, rising failure trend spotted.
