Transformer Health Monitoring - with Visualization + Inference


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
import os

# -------------------------
# 1. Load dataset
# -------------------------
data = pd.read_csv("transformer_health_dataset.csv")
print("Dataset shape:", data.shape)

# Features and target
X = data.drop(columns=['fault'])
y = data['fault']

# -------------------------
# 2. Train-test split
# -------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# -------------------------
# 3. Preprocessing (scaling)
# -------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

os.makedirs("models", exist_ok=True)
joblib.dump(scaler, "models/scaler.joblib")

# -------------------------
# 4. Train RandomForest Classifier
# -------------------------
clf = RandomForestClassifier(
    n_estimators=150,
    random_state=42,
    class_weight="balanced"
)
clf.fit(X_train_scaled, y_train)

# -------------------------
# 5. Evaluate
# -------------------------
y_pred = clf.predict(X_test_scaled)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

joblib.dump(clf, "models/rf_transformer_health.joblib")

# -------------------------
# 6. Visualization
# -------------------------

# (a) Feature distributions (Healthy vs Faulty)
for col in X.columns[:4]:  # plot first 4 features as example
    plt.figure(figsize=(6,4))
    sns.histplot(data, x=col, hue="fault", kde=True, bins=30, palette="Set2")
    plt.title(f"Distribution of {col} (Healthy vs Faulty)")
    plt.show()

# (b) Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(data.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.show()

# (c) Confusion Matrix heatmap
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Healthy","Faulty"], yticklabels=["Healthy","Faulty"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# (d) Feature importance bar chart
importances = clf.feature_importances_
feat_imp = pd.Series(importances, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(8,5))
sns.barplot(x=feat_imp.values, y=feat_imp.index, palette="viridis")
plt.title("Feature Importance (RandomForest)")
plt.xlabel("Importance Score")
plt.show()

# -------------------------
# 7. Inference (Prediction)
# -------------------------
def infer_health(sample_dict):
    """Takes a dict of feature values and predicts if transformer is faulty or healthy."""
    # Convert dict -> DataFrame
    sample_df = pd.DataFrame([sample_dict])
    
    # Load scaler and model
    s = joblib.load("models/scaler.joblib")
    model = joblib.load("models/rf_transformer_health.joblib")
    
    # Scale features
    xs = s.transform(sample_df)
    
    # Predict
    pred = model.predict(xs)[0]
    prob = model.predict_proba(xs)[0, 1]
    
    label = "Faulty" if pred == 1 else "Healthy"
    print("\nPrediction Result:")
    print("Input values:", sample_dict)
    print(f"Predicted Class: {label}")
    print(f"Fault Probability: {prob:.4f}")
    return label, prob

# Example 1: Use a random sample from dataset
random_sample = X.sample(1, random_state=42).to_dict(orient="records")[0]
infer_health(random_sample)

# Example 2: Custom manual input
custom_sample = {
    "oil_temp": 95,        # high temp
    "winding_temp": 120,   # high winding temp
    "vibration_rms": 3.2,  # high vibration
    "load_current": 300,   # higher than usual
    "voltage": 11,
    "h2": 15,
    "ch4": 7,
    "c2h2": 4,
    "moisture": 1.5
}
infer_health(custom_sample)

# -------------------------
# 8. Show Accuracy as a Plot
# -------------------------
plt.figure(figsize=(4, 5))
plt.bar(["Model Accuracy"], [acc], color="green")
plt.ylim(0, 1)
plt.title("Model Accuracy")
plt.ylabel("Accuracy Score")
plt.text(0, acc/2, f"{acc:.2f}", ha="center", va="center", fontsize=12, color="white")
plt.show()
