In [None]:
# 📝 AI-Based Fault Detection in Transmission Lines (EEE Project Demo)

# --- 1. Setup ---
import scipy.io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pywt
from scipy.fft import rfft
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# --- 2. Plot Sample Signals ---
files = [
    "data/raw/normal_001.mat",
    "data/raw/SLG_001.mat",
    "data/raw/LLG_001.mat",
    "data/raw/LLL_001.mat"
]
titles = ["Normal", "SLG Fault", "LLG Fault", "LLL Fault"]

plt.figure(figsize=(10,6))
for i, f in enumerate(files):
    mat = scipy.io.loadmat(f)
    sig = mat['sig'].squeeze()
    t = mat['t'].squeeze()
    plt.subplot(2,2,i+1)
    plt.plot(t, sig)
    plt.title(titles[i])
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
plt.tight_layout()
plt.show()

# --- 3. Feature Extraction Functions ---
def pad_or_crop(signal, target_len=1024):
    if len(signal) >= target_len:
        return signal[:target_len]
    else:
        return np.pad(signal, (0, target_len-len(signal)), 'constant')

def extract_features(signal):
    feats = []
    # FFT
    yf = np.abs(rfft(signal))
    feats.extend([np.mean(yf), np.std(yf), np.max(yf)])
    # Wavelet
    coeffs = pywt.wavedec(signal, 'db4', level=3)
    for c in coeffs:
        feats.extend([np.mean(c), np.std(c)])
    return feats

# --- 4. Load Data and Build Features ---
df = pd.read_csv("data/manifest.csv")
rows = []
for _, r in df.iterrows():
    path = r["file"].replace("../", "")  # fix paths
    mat = scipy.io.loadmat(path)
    sig = pad_or_crop(mat['sig'].squeeze())
    feats = extract_features(sig)
    feats.append(r["label"])
    rows.append(feats)

cols = ["fft_mean","fft_std","fft_max"] + \
       [f"wl{i}_{s}" for i in range(4) for s in ["mean","std"]] + ["label"]

features = pd.DataFrame(rows, columns=cols)
features.head()

# --- 5. Train Model ---
X = features.drop(columns=["label"])
y = features["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Save model
joblib.dump(rf, "models/random_forest.pkl")

# --- 6. Evaluate Model ---
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred, labels=sorted(y.unique()))
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=sorted(y.unique()), yticklabels=sorted(y.unique()))
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix - Random Forest")
plt.show()

# --- 7. Test Prediction on New File ---
test_file = "data/raw/SLG_005.mat"
mat = scipy.io.loadmat(test_file)
sig = pad_or_crop(mat["sig"].squeeze())
feats = extract_features(sig)

pred = rf.predict([feats])[0]
print(f"✅ Prediction for {test_file}: {pred}")
