<a href="https://colab.research.google.com/github/Oynkey2127/ML-SENSOR-FUSION-PROJECT-/blob/main/ROTATING_EQUIPMENT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:


!pip install -q xgboost imbalanced-learn shap

In [None]:
!pip install -q xgboost imbalanced-learn shap

import os, random, joblib, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from scipy.stats import skew, kurtosis
from scipy.fft import rfft, rfftfreq

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
import tensorflow as tf


SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)
MODEL_DIR = "/content/models_nocnn"
os.makedirs(MODEL_DIR, exist_ok=True)


DATA_PATH = "/content/Rotating_equipment_fault_data.csv"
print("Loading:", DATA_PATH)
df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
display(df.head())


# Cell 4: Column detection & cleaning
target_col = "Fault_Type"
sensor_cols = ['Vibration_X','Vibration_Y','Vibration_Z','Acoustic_Level','Temperature']  # from your CSV sample

# drop NA rows in required columns
df = df.dropna(subset=sensor_cols + [target_col]).reset_index(drop=True)
print("After dropna:", df.shape)

# label encode target
le = LabelEncoder()
y = le.fit_transform(df[target_col].values)
X_raw = df[sensor_cols].astype(float).values
n_classes = len(le.classes_)
print("Detected classes:", le.classes_)


# Cell 5: Create non-overlapping windows
TIMESTEPS = 20  # tune if needed

def make_non_overlapping(X, y, timesteps):
    Xw, yw = [], []
    i = 0
    while i + timesteps <= len(X):
        Xw.append(X[i:i+timesteps])
        yw.append(y[i+timesteps-1])
        i += timesteps
    return np.array(Xw), np.array(yw)

Xw_all, yw_all = make_non_overlapping(X_raw, y, TIMESTEPS)
print("Windows shape (non-overlap):", Xw_all.shape, yw_all.shape)


# Cell 6: Time-ordered split (train = first 80%, test = last 20%)
n = len(Xw_all)
split_idx = int(0.8 * n)
Xw_train, Xw_test = Xw_all[:split_idx], Xw_all[split_idx:]
yw_train, yw_test = yw_all[:split_idx], yw_all[split_idx:]
print("Train windows:", Xw_train.shape, "Test windows:", Xw_test.shape)


# Cell 7: Feature engineering functions
def time_feats(window):
    feats = []
    for ch in range(window.shape[1]):
        arr = window[:, ch]
        feats += [
            arr.mean(),
            arr.std(ddof=0),
            np.sqrt(np.mean(arr**2)),    # RMS
            np.max(arr) - np.min(arr),   # P2P
            np.percentile(arr,75)-np.percentile(arr,25),  # IQR
            skew(arr),
            kurtosis(arr),
            (np.max(np.abs(arr)) / (np.mean(np.abs(arr))+1e-9)),  # crest-ish
            np.mean(np.diff(arr))
        ]
    return np.array(feats)

def freq_feats(window, n_bands=6):
    feats = []
    t = window.shape[0]
    fft_idx_edges = np.linspace(0, t//2+1, n_bands+1, dtype=int)
    for ch in range(window.shape[1]):
        sig = window[:, ch]
        fft_vals = np.abs(rfft(sig))
        energy = (fft_vals**2).sum() + 1e-9
        for b in range(n_bands):
            v = fft_vals[fft_idx_edges[b]:fft_idx_edges[b+1]]
            feats.append((v**2).sum()/energy)
    return np.array(feats)

def build_feats(Xw):
    out = []
    for w in Xw:
        feat = np.concatenate([time_feats(w), freq_feats(w, n_bands=6)])
        out.append(feat)
    return np.vstack(out)


# Cell 8: Build features & scale (fit scaler on train only)
Xf_train = build_feats(Xw_train)
Xf_test  = build_feats(Xw_test)
print("Engineered feature shapes:", Xf_train.shape, Xf_test.shape)

scaler_feats = StandardScaler().fit(Xf_train)
Xf_train_s = scaler_feats.transform(Xf_train)
Xf_test_s  = scaler_feats.transform(Xf_test)
joblib.dump(scaler_feats, os.path.join(MODEL_DIR, "scaler_features.pkl"))
print("Saved scaler to", os.path.join(MODEL_DIR, "scaler_features.pkl"))


# Cell 9: XGBoost training on engineered features
# (Using XGBoost with default parameters for demonstration)
xgb = XGBClassifier(objective='multi:softmax', num_class=n_classes, random_state=SEED)
xgb.fit(Xf_train_s, yw_train)
xgb_pred = xgb.predict(Xf_test_s)
print("XGBoost Test accuracy:", accuracy_score(yw_test, xgb_pred))
print(classification_report(yw_test, xgb_pred, target_names=le.classes_))
xgb.save_model(os.path.join(MODEL_DIR, "xgb_feats.json")) # Use json format for compatibility
print("Saved XGBoost model to", os.path.join(MODEL_DIR, "xgb_feats.json"))


# Cell 10: MLP training on engineered features
n_feats = Xf_train_s.shape[1]
def build_mlp(n_feats, n_classes):
    inp = layers.Input(shape=(n_feats,))
    x = layers.Dense(256, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    out = layers.Dense(n_classes, activation='softmax')(x)
    model = models.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

y_train_cat = to_categorical(yw_train, num_classes=n_classes)
y_test_cat  = to_categorical(yw_test,  num_classes=n_classes)

mlp = build_mlp(n_feats, n_classes)
es = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
mlp.fit(Xf_train_s, y_train_cat, validation_split=0.15, epochs=60, batch_size=128, callbacks=[es], verbose=2)
mlp_pred = np.argmax(mlp.predict(Xf_test_s), axis=1)
print("MLP Test accuracy:", accuracy_score(yw_test, mlp_pred))
print(classification_report(yw_test, mlp_pred, target_names=le.classes_))
mlp.save(os.path.join(MODEL_DIR, "mlp_feats.h5"))


# Cell 11: Soft-vote ensemble (XGB + MLP)
xgb_proba = xgb.predict_proba(Xf_test_s)
mlp_proba = mlp.predict(Xf_test_s)
ens_proba = (xgb_proba + mlp_proba) / 2.0
ens_pred = np.argmax(ens_proba, axis=1)
print("Ensemble (XGB+MLP) Test accuracy:", accuracy_score(yw_test, ens_pred))
print(classification_report(yw_test, ens_pred, target_names=le.classes_))

# Save ensemble artifacts
joblib.dump(le, os.path.join(MODEL_DIR, "label_encoder.pkl"))
print("Saved label encoder:", os.path.join(MODEL_DIR, "label_encoder.pkl"))


# Cell 12: Inference helper (use saved models)
def time_feats_single(window):
    return time_feats(window)

def freq_feats_single(window):
    return freq_feats(window, n_bands=6)

def predict_from_raw_row_no_cnn(raw_row):
    """raw_row: 1D np.array of sensor values in order sensor_cols"""
    w = np.tile(np.array(raw_row, dtype=float).reshape(1,-1), (TIMESTEPS,1))
    feats = np.concatenate([time_feats_single(w), freq_feats_single(w)])
    feats_s = scaler_feats.transform(feats.reshape(1,-1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp)/2.0
    idx = np.argmax(p_avg)
    return le.inverse_transform([idx])[0], float(p_avg[idx])


import numpy as np

def predict_from_raw_row_safe(raw_row):
    """raw_row: [Vibration_X, Vibration_Y, Vibration_Z, Acoustic_Level, Temperature]"""
    # 1. build artificial window (same as before)
    w = np.tile(np.array(raw_row, dtype=float).reshape(1, -1), (TIMESTEPS, 1))

    # 2. compute time + freq features using your original functions
    t_feats = time_feats(w)
    f_feats = freq_feats(w, n_bands=6)
    feats = np.concatenate([t_feats, f_feats])

    # 3. replace NaN / inf with 0 before scaling
    feats = np.nan_to_num(feats, nan=0.0, posinf=0.0, neginf=0.0);

    # 4. scale and predict (same as before)
    feats_s = scaler_feats.transform(feats.reshape(1, -1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp) / 2.0

    idx = int(np.argmax(p_avg))
    label = le.inverse_transform([idx])[0]
    prob = float(p_avg[idx])

    return label, prob


# Cell 13: Quick sample prediction + list saved artifacts
sample_row = df.loc[100, sensor_cols].values
plabel, pprob = predict_from_raw_row_no_cnn(sample_row)
print("Sample prediction:", plabel, pprob)
print("Saved files:", os.listdir(MODEL_DIR))


# Cell 14: Optional - XGBoost feature importance (requires matplotlib)
importances = xgb.feature_importances_
idx = np.argsort(importances)[::-1][:30]
plt.figure(figsize=(8,6))
plt.barh(range(len(idx)), importances[idx][::-1])
plt.yticks(range(len(idx)), [f"feat_{i}" for i in idx[::-1]])
plt.title("Top feature importances (XGBoost)")
plt.show()

def test_custom(vx, vy, vz, acoustic, temp):
    raw = [vx, vy, vz, acoustic, temp]
    label, prob = predict_from_raw_row_safe(raw)
    print(f"Input: Vx={vx}, Vy={vy}, Vz={vz}, Acoustic={acoustic}, Temp={temp}")
    print(f" -> Fault: {label}, Confidence: {prob*100:.2f}%")
    print("-" * 50)

test_custom(0.1, 0.2, 0.3, 88, 90)


In [None]:
import os, random, joblib, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from scipy.stats import skew, kurtosis
from scipy.fft import rfft, rfftfreq

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
import tensorflow as tf


SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)
MODEL_DIR = "/content/models_nocnn"
os.makedirs(MODEL_DIR, exist_ok=True)


DATA_PATH = "/content/Rotating_equipment_fault_data.csv"
print("Loading:", DATA_PATH)
df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
display(df.head())


# Cell 4: Column detection & cleaning
target_col = "Fault_Type"
sensor_cols = ['Vibration_X','Vibration_Y','Vibration_Z','Acoustic_Level','Temperature']  # from your CSV sample

# drop NA rows in required columns
df = df.dropna(subset=sensor_cols + [target_col]).reset_index(drop=True)
print("After dropna:", df.shape)

# label encode target
le = LabelEncoder()
y = le.fit_transform(df[target_col].values)
X_raw = df[sensor_cols].astype(float).values
n_classes = len(le.classes_)
print("Detected classes:", le.classes_)


# Cell 5: Create non-overlapping windows
TIMESTEPS = 20  # tune if needed

def make_non_overlapping(X, y, timesteps):
    Xw, yw = [], []
    i = 0
    while i + timesteps <= len(X):
        Xw.append(X[i:i+timesteps])
        yw.append(y[i+timesteps-1])
        i += timesteps
    return np.array(Xw), np.array(yw)

Xw_all, yw_all = make_non_overlapping(X_raw, y, TIMESTEPS)
print("Windows shape (non-overlap):", Xw_all.shape, yw_all.shape)


# Cell 6: Time-ordered split (train = first 80%, test = last 20%)
n = len(Xw_all)
split_idx = int(0.8 * n)
Xw_train, Xw_test = Xw_all[:split_idx], Xw_all[split_idx:]
yw_train, yw_test = yw_all[:split_idx], yw_all[split_idx:]
print("Train windows:", Xw_train.shape, "Test windows:", Xw_test.shape)


# Cell 7: Feature engineering functions
def time_feats(window):
    feats = []
    for ch in range(window.shape[1]):
        arr = window[:, ch]
        feats += [
            arr.mean(),
            arr.std(ddof=0),
            np.sqrt(np.mean(arr**2)),    # RMS
            np.max(arr) - np.min(arr),   # P2P
            np.percentile(arr,75)-np.percentile(arr,25),  # IQR
            skew(arr),
            kurtosis(arr),
            (np.max(np.abs(arr)) / (np.mean(np.abs(arr))+1e-9)),  # crest-ish
            np.mean(np.diff(arr))
        ]
    return np.array(feats)

def freq_feats(window, n_bands=6):
    feats = []
    t = window.shape[0]
    fft_idx_edges = np.linspace(0, t//2+1, n_bands+1, dtype=int)
    for ch in range(window.shape[1]):
        sig = window[:, ch]
        fft_vals = np.abs(rfft(sig))
        energy = (fft_vals**2).sum() + 1e-9
        for b in range(n_bands):
            v = fft_vals[fft_idx_edges[b]:fft_idx_edges[b+1]]
            feats.append((v**2).sum()/energy)
    return np.array(feats)

def build_feats(Xw):
    out = []
    for w in Xw:
        feat = np.concatenate([time_feats(w), freq_feats(w, n_bands=6)])
        out.append(feat)
    return np.vstack(out)


# Cell 8: Build features & scale (fit scaler on train only)
Xf_train = build_feats(Xw_train)
Xf_test  = build_feats(Xw_test)
print("Engineered feature shapes:", Xf_train.shape, Xf_test.shape)

scaler_feats = StandardScaler().fit(Xf_train)
Xf_train_s = scaler_feats.transform(Xf_train)
Xf_test_s  = scaler_feats.transform(Xf_test)
joblib.dump(scaler_feats, os.path.join(MODEL_DIR, "scaler_features.pkl"))
print("Saved scaler to", os.path.join(MODEL_DIR, "scaler_features.pkl"))


# Cell 9: XGBoost training on engineered features
# (Using XGBoost with default parameters for demonstration)
xgb = XGBClassifier(objective='multi:softmax', num_class=n_classes, random_state=SEED)
xgb.fit(Xf_train_s, yw_train)
xgb_pred = xgb.predict(Xf_test_s)
print("XGBoost Test accuracy:", accuracy_score(yw_test, xgb_pred))
print(classification_report(yw_test, xgb_pred, target_names=le.classes_))
xgb.save_model(os.path.join(MODEL_DIR, "xgb_feats.json")) # Use json format for compatibility
print("Saved XGBoost model to", os.path.join(MODEL_DIR, "xgb_feats.json"))


# Cell 10: MLP training on engineered features
n_feats = Xf_train_s.shape[1]
def build_mlp(n_feats, n_classes):
    inp = layers.Input(shape=(n_feats,))
    x = layers.Dense(256, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    out = layers.Dense(n_classes, activation='softmax')(x)
    model = models.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

y_train_cat = to_categorical(yw_train, num_classes=n_classes)
y_test_cat  = to_categorical(yw_test,  num_classes=n_classes)

mlp = build_mlp(n_feats, n_classes)
es = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
mlp.fit(Xf_train_s, y_train_cat, validation_split=0.15, epochs=60, batch_size=128, callbacks=[es], verbose=2)
mlp_pred = np.argmax(mlp.predict(Xf_test_s), axis=1)
print("MLP Test accuracy:", accuracy_score(yw_test, mlp_pred))
print(classification_report(yw_test, mlp_pred, target_names=le.classes_))
mlp.save(os.path.join(MODEL_DIR, "mlp_feats.h5"))


# Cell 11: Soft-vote ensemble (XGB + MLP)
xgb_proba = xgb.predict_proba(Xf_test_s)
mlp_proba = mlp.predict(Xf_test_s)
ens_proba = (xgb_proba + mlp_proba) / 2.0
ens_pred = np.argmax(ens_proba, axis=1)
print("Ensemble (XGB+MLP) Test accuracy:", accuracy_score(yw_test, ens_pred))
print(classification_report(yw_test, ens_pred, target_names=le.classes_))

# Save ensemble artifacts
joblib.dump(le, os.path.join(MODEL_DIR, "label_encoder.pkl"))
print("Saved label encoder:", os.path.join(MODEL_DIR, "label_encoder.pkl"))


# Cell 12: Inference helper (use saved models)
def time_feats_single(window):
    return time_feats(window)

def freq_feats_single(window):
    return freq_feats(window, n_bands=6)

def predict_from_raw_row_no_cnn(raw_row):
    """raw_row: 1D np.array of sensor values in order sensor_cols"""
    w = np.tile(np.array(raw_row, dtype=float).reshape(1,-1), (TIMESTEPS,1))
    feats = np.concatenate([time_feats_single(w), freq_feats_single(w)])
    feats_s = scaler_feats.transform(feats.reshape(1,-1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp)/2.0
    idx = np.argmax(p_avg)
    return le.inverse_transform([idx])[0], float(p_avg[idx])


import numpy as np

def predict_from_raw_row_safe(raw_row):
    """raw_row: [Vibration_X, Vibration_Y, Vibration_Z, Acoustic_Level, Temperature]"""
    # 1. build artificial window (same as before)
    w = np.tile(np.array(raw_row, dtype=float).reshape(1, -1), (TIMESTEPS, 1))

    # 2. compute time + freq features using your original functions
    t_feats = time_feats(w)
    f_feats = freq_feats(w, n_bands=6)
    feats = np.concatenate([t_feats, f_feats])

    # 3. replace NaN / inf with 0 before scaling
    feats = np.nan_to_num(feats, nan=0.0, posinf=0.0, neginf=0.0);

    # 4. scale and predict (same as before)
    feats_s = scaler_feats.transform(feats.reshape(1, -1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp) / 2.0

    idx = int(np.argmax(p_avg))
    label = le.inverse_transform([idx])[0]
    prob = float(p_avg[idx])

    return label, prob


# Cell 13: Quick sample prediction + list saved artifacts
sample_row = df.loc[100, sensor_cols].values
plabel, pprob = predict_from_raw_row_no_cnn(sample_row)
print("Sample prediction:", plabel, pprob)
print("Saved files:", os.listdir(MODEL_DIR))


# Cell 14: Optional - XGBoost feature importance (requires matplotlib)
importances = xgb.feature_importances_
idx = np.argsort(importances)[::-1][:30]
plt.figure(figsize=(8,6))
plt.barh(range(len(idx)), importances[idx][::-1])
plt.yticks(range(len(idx)), [f"feat_{i}" for i in idx[::-1]])
plt.title("Top feature importances (XGBoost)")
plt.show()

def test_custom(vx, vy, vz, acoustic, temp):
    raw = [vx, vy, vz, acoustic, temp]
    label, prob = predict_from_raw_row_safe(raw)
    print(f"Input: Vx={vx}, Vy={vy}, Vz={vz}, Acoustic={acoustic}, Temp={temp}")
    print(f" -> Fault: {label}, Confidence: {prob*100:.2f}%")
    print("-" * 50)

test_custom(0.1, 0.2, 0.3, 88, 90)

```python
it

!pip install -q xgboost imbalanced-learn shap
```
```python
import os, random, joblib, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from scipy.stats import skew, kurtosis
from scipy.fft import rfft, rfftfreq

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
import tensorflow as tf


SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)
MODEL_DIR = "/content/models_nocnn"
os.makedirs(MODEL_DIR, exist_ok=True)
```
```python
DATA_PATH = "/content/Rotating_equipment_fault_data.csv"
print("Loading:", DATA_PATH)
df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
display(df.head())
```
```python
# Cell 4: Column detection & cleaning
target_col = "Fault_Type"
sensor_cols = ['Vibration_X','Vibration_Y','Vibration_Z','Acoustic_Level','Temperature']  # from your CSV sample

# drop NA rows in required columns
df = df.dropna(subset=sensor_cols + [target_col]).reset_index(drop=True)
print("After dropna:", df.shape)

# label encode target
le = LabelEncoder()
y = le.fit_transform(df[target_col].values)
X_raw = df[sensor_cols].astype(float).values
n_classes = len(le.classes_)
print("Detected classes:", le.classes_)
```
```python
# Cell 5: Create non-overlapping windows
TIMESTEPS = 20  # tune if needed

def make_non_overlapping(X, y, timesteps):
    Xw, yw = [], []
    i = 0
    while i + timesteps <= len(X):
        Xw.append(X[i:i+timesteps])
        yw.append(y[i+timesteps-1])
        i += timesteps
    return np.array(Xw), np.array(yw)

Xw_all, yw_all = make_non_overlapping(X_raw, y, TIMESTEPS)
print("Windows shape (non-overlap):", Xw_all.shape, yw_all.shape)
```
```python
# Cell 6: Time-ordered split (train = first 80%, test = last 20%)
n = len(Xw_all)
split_idx = int(0.8 * n)
Xw_train, Xw_test = Xw_all[:split_idx], Xw_all[split_idx:]
yw_train, yw_test = yw_all[:split_idx], yw_all[split_idx:]
print("Train windows:", Xw_train.shape, "Test windows:", Xw_test.shape)
```
```python
# Cell 7: Feature engineering functions
def time_feats(window):
    feats = []
    for ch in range(window.shape[1]):
        arr = window[:, ch]
        feats += [
            arr.mean(),
            arr.std(ddof=0),
            np.sqrt(np.mean(arr**2)),    # RMS
            np.max(arr) - np.min(arr),   # P2P
            np.percentile(arr,75)-np.percentile(arr,25),  # IQR
            skew(arr),
            kurtosis(arr),
            (np.max(np.abs(arr)) / (np.mean(np.abs(arr))+1e-9)),  # crest-ish
            np.mean(np.diff(arr))
        ]
    return np.array(feats)

def freq_feats(window, n_bands=6):
    feats = []
    t = window.shape[0]
    fft_idx_edges = np.linspace(0, t//2+1, n_bands+1, dtype=int)
    for ch in range(window.shape[1]):
        sig = window[:, ch]
        fft_vals = np.abs(rfft(sig))
        energy = (fft_vals**2).sum() + 1e-9
        for b in range(n_bands):
            v = fft_vals[fft_idx_edges[b]:fft_idx_edges[b+1]]
            feats.append((v**2).sum()/energy)
    return np.array(feats)

def build_feats(Xw):
    out = []
    for w in Xw:
        feat = np.concatenate([time_feats(w), freq_feats(w, n_bands=6)])
        out.append(feat)
    return np.vstack(out)
```
```python
# Cell 8: Build features & scale (fit scaler on train only)
Xf_train = build_feats(Xw_train)
Xf_test  = build_feats(Xw_test)
print("Engineered feature shapes:", Xf_train.shape, Xf_test.shape)

scaler_feats = StandardScaler().fit(Xf_train)
Xf_train_s = scaler_feats.transform(Xf_train)
Xf_test_s  = scaler_feats.transform(Xf_test)
joblib.dump(scaler_feats, os.path.join(MODEL_DIR, "scaler_features.pkl"))
print("Saved scaler to", os.path.join(MODEL_DIR, "scaler_features.pkl"))
```
```python
# Cell 9: XGBoost training on engineered features
# (Using XGBoost with default parameters for demonstration)
xgb = XGBClassifier(objective='multi:softmax', num_class=n_classes, random_state=SEED)
xgb.fit(Xf_train_s, yw_train)
xgb_pred = xgb.predict(Xf_test_s)
print("XGBoost Test accuracy:", accuracy_score(yw_test, xgb_pred))
print(classification_report(yw_test, xgb_pred, target_names=le.classes_))
xgb.save_model(os.path.join(MODEL_DIR, "xgb_feats.json")) # Use json format for compatibility
print("Saved XGBoost model to", os.path.join(MODEL_DIR, "xgb_feats.json"))
```
```python
# Cell 10: MLP training on engineered features
n_feats = Xf_train_s.shape[1]
def build_mlp(n_feats, n_classes):
    inp = layers.Input(shape=(n_feats,))
    x = layers.Dense(256, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    out = layers.Dense(n_classes, activation='softmax')(x)
    model = models.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

y_train_cat = to_categorical(yw_train, num_classes=n_classes)
y_test_cat  = to_categorical(yw_test,  num_classes=n_classes)

mlp = build_mlp(n_feats, n_classes)
es = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
mlp.fit(Xf_train_s, y_train_cat, validation_split=0.15, epochs=60, batch_size=128, callbacks=[es], verbose=2)
mlp_pred = np.argmax(mlp.predict(Xf_test_s), axis=1)
print("MLP Test accuracy:", accuracy_score(yw_test, mlp_pred))
print(classification_report(yw_test, mlp_pred, target_names=le.classes_))
mlp.save(os.path.join(MODEL_DIR, "mlp_feats.h5"))
```
```python
# Cell 11: Soft-vote ensemble (XGB + MLP)
xgb_proba = xgb.predict_proba(Xf_test_s)
mlp_proba = mlp.predict(Xf_test_s)
ens_proba = (xgb_proba + mlp_proba) / 2.0
ens_pred = np.argmax(ens_proba, axis=1)
print("Ensemble (XGB+MLP) Test accuracy:", accuracy_score(yw_test, ens_pred))
print(classification_report(yw_test, ens_pred, target_names=le.classes_))

# Save ensemble artifacts
joblib.dump(le, os.path.join(MODEL_DIR, "label_encoder.pkl"))
print("Saved label encoder:", os.path.join(MODEL_DIR, "label_encoder.pkl"))
```
```python
# Cell 12: Inference helper (use saved models)
def time_feats_single(window):
    return time_feats(window)

def freq_feats_single(window):
    return freq_feats(window, n_bands=6)

def predict_from_raw_row_no_cnn(raw_row):
    """raw_row: 1D np.array of sensor values in order sensor_cols"""
    w = np.tile(np.array(raw_row, dtype=float).reshape(1,-1), (TIMESTEPS,1))
    feats = np.concatenate([time_feats_single(w), freq_feats_single(w)])
    feats_s = scaler_feats.transform(feats.reshape(1,-1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp)/2.0
    idx = np.argmax(p_avg)
    return le.inverse_transform([idx])[0], float(p_avg[idx])
```
```python
import numpy as np

def predict_from_raw_row_safe(raw_row):
    """raw_row: [Vibration_X, Vibration_Y, Vibration_Z, Acoustic_Level, Temperature]"""
    # 1. build artificial window (same as before)
    w = np.tile(np.array(raw_row, dtype=float).reshape(1, -1), (TIMESTEPS, 1))

    # 2. compute time + freq features using your original functions
    t_feats = time_feats(w)
    f_feats = freq_feats(w, n_bands=6)
    feats = np.concatenate([t_feats, f_feats])

    # 3. replace NaN / inf with 0 before scaling
    feats = np.nan_to_num(feats, nan=0.0, posinf=0.0, neginf=0.0);

    # 4. scale and predict (same as before)
    feats_s = scaler_feats.transform(feats.reshape(1, -1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp) / 2.0

    idx = int(np.argmax(p_avg))
    label = le.inverse_transform([idx])[0]
    prob = float(p_avg[idx])

    return label, prob
```
```python
# Cell 13: Quick sample prediction + list saved artifacts
sample_row = df.loc[100, sensor_cols].values
plabel, pprob = predict_from_raw_row_no_cnn(sample_row)
print("Sample prediction:", plabel, pprob)
print("Saved files:", os.listdir(MODEL_DIR))
```
```python
# Cell 14: Optional - XGBoost feature importance (requires matplotlib)
importances = xgb.feature_importances_
idx = np.argsort(importances)[::-1][:30]
plt.figure(figsize=(8,6))
plt.barh(range(len(idx)), importances[idx][::-1])
plt.yticks(range(len(idx)), [f"feat_{i}" for i in idx[::-1]])
plt.title("Top feature importances (XGBoost)")
plt.show()
```
```python
def test_custom(vx, vy, vz, acoustic, temp):
    raw = [vx, vy, vz, acoustic, temp]
    label, prob = predict_from_raw_row_safe(raw)
    print(f"Input: Vx={vx}, Vy={vy}, Vz={vz}, Acoustic={acoustic}, Temp={temp}")
    print(f" -> Fault: {label}, Confidence: {prob*100:.2f}%")
    print("-" * 50)

test_custom(0.1, 0.2, 0.3, 88, 90)
```
```python

```

In [None]:
import os, random, joblib, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from scipy.stats import skew, kurtosis
from scipy.fft import rfft, rfftfreq

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
import tensorflow as tf


SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)
MODEL_DIR = "/content/models_nocnn"
os.makedirs(MODEL_DIR, exist_ok=True)

DATA_PATH = "/content/Rotating_equipment_fault_data.csv"
print("Loading:", DATA_PATH)
df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
#display(df.head()) # Commented out as display() is for interactive environments

# Cell 4: Column detection & cleaning
target_col = "Fault_Type"
sensor_cols = ['Vibration_X','Vibration_Y','Vibration_Z','Acoustic_Level','Temperature']  # from your CSV sample

# drop NA rows in required columns
df = df.dropna(subset=sensor_cols + [target_col]).reset_index(drop=True)
print("After dropna:", df.shape)

# label encode target
le = LabelEncoder()
y = le.fit_transform(df[target_col].values)
X_raw = df[sensor_cols].astype(float).values
n_classes = len(le.classes_)
print("Detected classes:", le.classes_)

# Cell 5: Create non-overlapping windows
TIMESTEPS = 20  # tune if needed

def make_non_overlapping(X, y, timesteps):
    Xw, yw = [], []
    i = 0
    while i + timesteps <= len(X):
        Xw.append(X[i:i+timesteps])
        yw.append(y[i+timesteps-1])
        i += timesteps
    return np.array(Xw), np.array(yw)

Xw_all, yw_all = make_non_overlapping(X_raw, y, TIMESTEPS)
print("Windows shape (non-overlap):", Xw_all.shape, yw_all.shape)

# Cell 6: Time-ordered split (train = first 80%, test = last 20%)
n = len(Xw_all)
split_idx = int(0.8 * n)
Xw_train, Xw_test = Xw_all[:split_idx], Xw_all[split_idx:]
yw_train, yw_test = yw_all[:split_idx], yw_all[split_idx:]
print("Train windows:", Xw_train.shape, "Test windows:", Xw_test.shape)

# Cell 7: Feature engineering functions
def time_feats(window):
    feats = []
    for ch in range(window.shape[1]):
        arr = window[:, ch]
        feats += [
            arr.mean(),
            arr.std(ddof=0),
            np.sqrt(np.mean(arr**2)),    # RMS
            np.max(arr) - np.min(arr),   # P2P
            np.percentile(arr,75)-np.percentile(arr,25),  # IQR
            skew(arr),
            kurtosis(arr),
            (np.max(np.abs(arr)) / (np.mean(np.abs(arr))+1e-9)),  # crest-ish
            np.mean(np.diff(arr))
        ]
    return np.array(feats)

def freq_feats(window, n_bands=6):
    feats = []
    t = window.shape[0]
    fft_idx_edges = np.linspace(0, t//2+1, n_bands+1, dtype=int)
    for ch in range(window.shape[1]):
        sig = window[:, ch]
        fft_vals = np.abs(rfft(sig))
        energy = (fft_vals**2).sum() + 1e-9
        for b in range(n_bands):
            v = fft_vals[fft_idx_edges[b]:fft_idx_edges[b+1]]
            feats.append((v**2).sum()/energy)
    return np.array(feats)

def build_feats(Xw):
    out = []
    for w in Xw:
        feat = np.concatenate([time_feats(w), freq_feats(w, n_bands=6)])
        out.append(feat)
    return np.vstack(out)

# Cell 8: Build features & scale (fit scaler on train only)
Xf_train = build_feats(Xw_train)
Xf_test  = build_feats(Xw_test)
print("Engineered feature shapes:", Xf_train.shape, Xf_test.shape)

scaler_feats = StandardScaler().fit(Xf_train)
Xf_train_s = scaler_feats.transform(Xf_train)
Xf_test_s  = scaler_feats.transform(Xf_test)
joblib.dump(scaler_feats, os.path.join(MODEL_DIR, "scaler_features.pkl"))
print("Saved scaler to", os.path.join(MODEL_DIR, "scaler_features.pkl"))

# Cell 9: XGBoost training on engineered features
# (Using XGBoost with default parameters for demonstration)
xgb = XGBClassifier(objective='multi:softmax', num_class=n_classes, random_state=SEED)
xgb.fit(Xf_train_s, yw_train)
xgb_pred = xgb.predict(Xf_test_s)
print("XGBoost Test accuracy:", accuracy_score(yw_test, xgb_pred))
print(classification_report(yw_test, xgb_pred, target_names=le.classes_))
xgb.save_model(os.path.join(MODEL_DIR, "xgb_feats.json")) # Use json format for compatibility
print("Saved XGBoost model to", os.path.join(MODEL_DIR, "xgb_feats.json"))

# Cell 10: MLP training on engineered features
n_feats = Xf_train_s.shape[1]
def build_mlp(n_feats, n_classes):
    inp = layers.Input(shape=(n_feats,))
    x = layers.Dense(256, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    out = layers.Dense(n_classes, activation='softmax')(x)
    model = models.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

y_train_cat = to_categorical(yw_train, num_classes=n_classes)
y_test_cat  = to_categorical(yw_test,  num_classes=n_classes)

mlp = build_mlp(n_feats, n_classes)
es = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
mlp.fit(Xf_train_s, y_train_cat, validation_split=0.15, epochs=60, batch_size=128, callbacks=[es], verbose=2)
mlp_pred = np.argmax(mlp.predict(Xf_test_s), axis=1)
print("MLP Test accuracy:", accuracy_score(yw_test, mlp_pred))
print(classification_report(yw_test, mlp_pred, target_names=le.classes_))
mlp.save(os.path.join(MODEL_DIR, "mlp_feats.h5"))

# Cell 11: Soft-vote ensemble (XGB + MLP)
xgb_proba = xgb.predict_proba(Xf_test_s)
mlp_proba = mlp.predict(Xf_test_s)
ens_proba = (xgb_proba + mlp_proba) / 2.0
ens_pred = np.argmax(ens_proba, axis=1)
print("Ensemble (XGB+MLP) Test accuracy:", accuracy_score(yw_test, ens_pred))
print(classification_report(yw_test, ens_pred, target_names=le.classes_))

# Save ensemble artifacts
joblib.dump(le, os.path.join(MODEL_DIR, "label_encoder.pkl"))
print("Saved label encoder:", os.path.join(MODEL_DIR, "label_encoder.pkl"))

# Cell 12: Inference helper (use saved models)
def time_feats_single(window):
    return time_feats(window)

def freq_feats_single(window):
    return freq_feats(window, n_bands=6)

def predict_from_raw_row_no_cnn(raw_row):
    """raw_row: 1D np.array of sensor values in order sensor_cols"""
    w = np.tile(np.array(raw_row, dtype=float).reshape(1,-1), (TIMESTEPS,1))
    feats = np.concatenate([time_feats_single(w), freq_feats_single(w)])
    feats_s = scaler_feats.transform(feats.reshape(1,-1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp)/2.0
    idx = np.argmax(p_avg)
    return le.inverse_transform([idx])[0], float(p_avg[idx])

import numpy as np

def predict_from_raw_row_safe(raw_row):
    """raw_row: [Vibration_X, Vibration_Y, Vibration_Z, Acoustic_Level, Temperature]"""
    # 1. build artificial window (same as before)
    w = np.tile(np.array(raw_row, dtype=float).reshape(1, -1), (TIMESTEPS, 1))

    # 2. compute time + freq features using your original functions
    t_feats = time_feats(w)
    f_feats = freq_feats(w, n_bands=6)
    feats = np.concatenate([t_feats, f_feats])

    # 3. replace NaN / inf with 0 before scaling
    feats = np.nan_to_num(feats, nan=0.0, posinf=0.0, neginf=0.0);

    # 4. scale and predict (same as before)
    feats_s = scaler_feats.transform(feats.reshape(1, -1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp) / 2.0

    idx = int(np.argmax(p_avg))
    label = le.inverse_transform([idx])[0]
    prob = float(p_avg[idx])

    return label, prob

# Cell 13: Quick sample prediction + list saved artifacts
sample_row = df.loc[100, sensor_cols].values
plabel, pprob = predict_from_raw_row_no_cnn(sample_row)
print("Sample prediction:", plabel, pprob)
print("Saved files:", os.listdir(MODEL_DIR))

# Cell 14: Optional - XGBoost feature importance (requires matplotlib)
#importances = xgb.feature_importances_
#idx = np.argsort(importances)[::-1][:30]
#plt.figure(figsize=(8,6))
#plt.barh(range(len(idx)), importances[idx][::-1])
#plt.yticks(range(len(idx)), [f"feat_{i}" for i in idx[::-1]])
#plt.title("Top feature importances (XGBoost)")
#plt.show()

def test_custom(vx, vy, vz, acoustic, temp):
    raw = [vx, vy, vz, acoustic, temp]
    label, prob = predict_from_raw_row_safe(raw)
    print(f"Input: Vx={vx}, Vy={vy}, Vz={vz}, Acoustic={acoustic}, Temp={temp}")
    print(f" -> Fault: {label}, Confidence: {prob*100:.2f}%")
    print("-" * 50)

test_custom(0.1, 0.2, 0.3, 88, 90)

Loading: /content/Rotating_equipment_fault_data.csv


FileNotFoundError: [Errno 2] No such file or directory: '/content/Rotating_equipment_fault_data.csv'

In [None]:

import os, random, joblib, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from scipy.stats import skew, kurtosis
from scipy.fft import rfft, rfftfreq

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from xgboost import XGBClassifier
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
import tensorflow as tf


SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)
MODEL_DIR = "/content/models_nocnn"
os.makedirs(MODEL_DIR, exist_ok=True)


In [None]:

DATA_PATH = "/content/Rotating_equipment_fault_data.csv"
print("Loading:", DATA_PATH)
df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
display(df.head())


In [None]:
# Cell 4: Column detection & cleaning
target_col = "Fault_Type"
sensor_cols = ['Vibration_X','Vibration_Y','Vibration_Z','Acoustic_Level','Temperature']  # from your CSV sample

# drop NA rows in required columns
df = df.dropna(subset=sensor_cols + [target_col]).reset_index(drop=True)
print("After dropna:", df.shape)

# label encode target
le = LabelEncoder()
y = le.fit_transform(df[target_col].values)
X_raw = df[sensor_cols].astype(float).values
n_classes = len(le.classes_)
print("Detected classes:", le.classes_)


In [None]:
# Cell 5: Create non-overlapping windows
TIMESTEPS = 20  # tune if needed

def make_non_overlapping(X, y, timesteps):
    Xw, yw = [], []
    i = 0
    while i + timesteps <= len(X):
        Xw.append(X[i:i+timesteps])
        yw.append(y[i+timesteps-1])
        i += timesteps
    return np.array(Xw), np.array(yw)

Xw_all, yw_all = make_non_overlapping(X_raw, y, TIMESTEPS)
print("Windows shape (non-overlap):", Xw_all.shape, yw_all.shape)


In [None]:
# Cell 6: Time-ordered split (train = first 80%, test = last 20%)
n = len(Xw_all)
split_idx = int(0.8 * n)
Xw_train, Xw_test = Xw_all[:split_idx], Xw_all[split_idx:]
yw_train, yw_test = yw_all[:split_idx], yw_all[split_idx:]
print("Train windows:", Xw_train.shape, "Test windows:", Xw_test.shape)


In [None]:
# Cell 7: Feature engineering functions
def time_feats(window):
    feats = []
    for ch in range(window.shape[1]):
        arr = window[:, ch]
        feats += [
            arr.mean(),
            arr.std(ddof=0),
            np.sqrt(np.mean(arr**2)),    # RMS
            np.max(arr) - np.min(arr),   # P2P
            np.percentile(arr,75)-np.percentile(arr,25),  # IQR
            skew(arr),
            kurtosis(arr),
            (np.max(np.abs(arr)) / (np.mean(np.abs(arr))+1e-9)),  # crest-ish
            np.mean(np.diff(arr))
        ]
    return np.array(feats)

def freq_feats(window, n_bands=6):
    feats = []
    t = window.shape[0]
    fft_idx_edges = np.linspace(0, t//2+1, n_bands+1, dtype=int)
    for ch in range(window.shape[1]):
        sig = window[:, ch]
        fft_vals = np.abs(rfft(sig))
        energy = (fft_vals**2).sum() + 1e-9
        for b in range(n_bands):
            v = fft_vals[fft_idx_edges[b]:fft_idx_edges[b+1]]
            feats.append((v**2).sum()/energy)
    return np.array(feats)

def build_feats(Xw):
    out = []
    for w in Xw:
        feat = np.concatenate([time_feats(w), freq_feats(w, n_bands=6)])
        out.append(feat)
    return np.vstack(out)


In [None]:
# Cell 8: Build features & scale (fit scaler on train only)
Xf_train = build_feats(Xw_train)
Xf_test  = build_feats(Xw_test)
print("Engineered feature shapes:", Xf_train.shape, Xf_test.shape)

scaler_feats = StandardScaler().fit(Xf_train)
Xf_train_s = scaler_feats.transform(Xf_train)
Xf_test_s  = scaler_feats.transform(Xf_test)
joblib.dump(scaler_feats, os.path.join(MODEL_DIR, "scaler_features.pkl"))
print("Saved scaler to", os.path.join(MODEL_DIR, "scaler_features.pkl"))


In [None]:
# Cell 9: XGBoost training on engineered features
# (Using XGBoost with default parameters for demonstration)
xgb = XGBClassifier(objective='multi:softmax', num_class=n_classes, random_state=SEED)
xgb.fit(Xf_train_s, yw_train)
xgb_pred = xgb.predict(Xf_test_s)
print("XGBoost Test accuracy:", accuracy_score(yw_test, xgb_pred))
print(classification_report(yw_test, xgb_pred, target_names=le.classes_))
xgb.save_model(os.path.join(MODEL_DIR, "xgb_feats.json")) # Use json format for compatibility
print("Saved XGBoost model to", os.path.join(MODEL_DIR, "xgb_feats.json"))

In [None]:
# Cell 10: MLP training on engineered features
n_feats = Xf_train_s.shape[1]
def build_mlp(n_feats, n_classes):
    inp = layers.Input(shape=(n_feats,))
    x = layers.Dense(256, activation='relu')(inp)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    out = layers.Dense(n_classes, activation='softmax')(x)
    model = models.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

y_train_cat = to_categorical(yw_train, num_classes=n_classes)
y_test_cat  = to_categorical(yw_test,  num_classes=n_classes)

mlp = build_mlp(n_feats, n_classes)
es = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
mlp.fit(Xf_train_s, y_train_cat, validation_split=0.15, epochs=60, batch_size=128, callbacks=[es], verbose=2)
mlp_pred = np.argmax(mlp.predict(Xf_test_s), axis=1)
print("MLP Test accuracy:", accuracy_score(yw_test, mlp_pred))
print(classification_report(yw_test, mlp_pred, target_names=le.classes_))
mlp.save(os.path.join(MODEL_DIR, "mlp_feats.h5"))


In [None]:
# Cell 11: Soft-vote ensemble (XGB + MLP)
xgb_proba = xgb.predict_proba(Xf_test_s)
mlp_proba = mlp.predict(Xf_test_s)
ens_proba = (xgb_proba + mlp_proba) / 2.0
ens_pred = np.argmax(ens_proba, axis=1)
print("Ensemble (XGB+MLP) Test accuracy:", accuracy_score(yw_test, ens_pred))
print(classification_report(yw_test, ens_pred, target_names=le.classes_))

# Save ensemble artifacts
joblib.dump(le, os.path.join(MODEL_DIR, "label_encoder.pkl"))
print("Saved label encoder:", os.path.join(MODEL_DIR, "label_encoder.pkl"))


In [None]:
# Cell 12: Inference helper (use saved models)
def time_feats_single(window):
    return time_feats(window)

def freq_feats_single(window):
    return freq_feats(window, n_bands=6)

def predict_from_raw_row_no_cnn(raw_row):
    """raw_row: 1D np.array of sensor values in order sensor_cols"""
    w = np.tile(np.array(raw_row, dtype=float).reshape(1,-1), (TIMESTEPS,1))
    feats = np.concatenate([time_feats_single(w), freq_feats_single(w)])
    feats_s = scaler_feats.transform(feats.reshape(1,-1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp)/2.0
    idx = np.argmax(p_avg)
    return le.inverse_transform([idx])[0], float(p_avg[idx])


In [None]:
import numpy as np

def predict_from_raw_row_safe(raw_row):
    """raw_row: [Vibration_X, Vibration_Y, Vibration_Z, Acoustic_Level, Temperature]"""
    # 1. build artificial window (same as before)
    w = np.tile(np.array(raw_row, dtype=float).reshape(1, -1), (TIMESTEPS, 1))

    # 2. compute time + freq features using your original functions
    t_feats = time_feats(w)
    f_feats = freq_feats(w, n_bands=6)
    feats = np.concatenate([t_feats, f_feats])

    # 3. replace NaN / inf with 0 before scaling
    feats = np.nan_to_num(feats, nan=0.0, posinf=0.0, neginf=0.0)

    # 4. scale and predict (same as before)
    feats_s = scaler_feats.transform(feats.reshape(1, -1))
    p_xgb = xgb.predict_proba(feats_s)[0]
    p_mlp = mlp.predict(feats_s)[0]
    p_avg = (p_xgb + p_mlp) / 2.0

    idx = int(np.argmax(p_avg))
    label = le.inverse_transform([idx])[0]
    prob = float(p_avg[idx])

    return label, prob


In [None]:
# Cell 13: Quick sample prediction + list saved artifacts
sample_row = df.loc[100, sensor_cols].values
plabel, pprob = predict_from_raw_row_no_cnn(sample_row)
print("Sample prediction:", plabel, pprob)
print("Saved files:", os.listdir(MODEL_DIR))


In [None]:
# Cell 14: Optional - XGBoost feature importance (requires matplotlib)
importances = xgb.feature_importances_
idx = np.argsort(importances)[::-1][:30]
plt.figure(figsize=(8,6))
plt.barh(range(len(idx)), importances[idx][::-1])
plt.yticks(range(len(idx)), [f"feat_{i}" for i in idx[::-1]])
plt.title("Top feature importances (XGBoost)")
plt.show()


NameError: name 'xgb' is not defined

In [None]:
def test_custom(vx, vy, vz, acoustic, temp):
    raw = [vx, vy, vz, acoustic, temp]
    label, prob = predict_from_raw_row_safe(raw)
    print(f"Input: Vx={vx}, Vy={vy}, Vz={vz}, Acoustic={acoustic}, Temp={temp}")
    print(f" -> Fault: {label}, Confidence: {prob*100:.2f}%")
    print("-" * 50)

test_custom(0.1, 0.2, 0.3, 88, 90)


NameError: name 'predict_from_raw_row_safe' is not defined