In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

from imblearn.over_sampling import SMOTE

# 1. Load data
df = pd.read_excel("train&test_data.csv", sheet_name="Sheet1")

# 2. Compute magnitude
df['magnitude'] = np.sqrt(df['x_axis']**2 + df['y_axis']**2 + df['z_axis']**2)

# 3. Rolling statistics
window_size = 20
df['rolling_mean'] = df['magnitude'].rolling(window=window_size, min_periods=1).mean()
df['rolling_std'] = df['magnitude'].rolling(window=window_size, min_periods=1).std()

# 4. Drop NaNs
df_cleaned = df.dropna().reset_index(drop=True)

# 5. Features and labels
features = df_cleaned[['x_axis', 'y_axis', 'z_axis', 'magnitude', 'rolling_mean', 'rolling_std']]
labels = df_cleaned['label']

# 6. Train-Validation-Test Split (60/20/20)
X_temp, X_test, y_temp, y_test = train_test_split(features, labels, test_size=0.2, stratify=labels, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

# 7. Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# 8. Apply SMOTE on train set only
smote = SMOTE(sampling_strategy=0.5, random_state=42)  # Only partially balance
X_train_sm, y_train_sm = smote.fit_resample(X_train_scaled, y_train)

# 9. Model with limited complexity
model = RandomForestClassifier(
    n_estimators=50,
    max_depth=5,
    min_samples_leaf=10,
    random_state=42
)
model.fit(X_train_sm, y_train_sm)

# 10. Cross-validation (on training data)
cv_scores = cross_val_score(model, X_train_sm, y_train_sm, cv=5, scoring='f1_macro')

# 11. Evaluate on validation set
y_val_pred = model.predict(X_val_scaled)
print("\n Validation Set Classification Report:\n", classification_report(y_val, y_val_pred))

# 12. Final Test Evaluation
y_test_pred = model.predict(X_test_scaled)
print("\nTest Set Classification Report:\n", classification_report(y_test, y_test_pred))

# 13. Confusion Matrix
print(cm = confusion_matrix(y_test, y_test_pred))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)


# 14. Predict full dataset
df_cleaned['prediction'] = model.predict(scaler.transform(features))
df_cleaned['fault_detected'] = df_cleaned['prediction'].apply(lambda x: '⚠️ Fault Detected' if x == 1 else '✅ Normal')

# 15. Frequency Analysis
n = len(df_cleaned)
T = 1.0  # Adjust if actual sampling time is known
yf = fft(df_cleaned['magnitude'].values)
xf = fftfreq(n, T)[:n//2]
dominant_freq = xf[1:][np.argmax(np.abs(yf[1:n//2]))]  # skip DC


# 16. Vibration Plot
plt.figure(figsize=(14, 6))
plt.plot(df_cleaned.index, df_cleaned['magnitude'], label='Vibration Magnitude', color='blue')
plt.plot(df_cleaned.index, df_cleaned['x_axis'], label='X-Axis', color='black')
plt.plot(df_cleaned.index, df_cleaned['y_axis'], label='Y-Axis', color='green')
plt.plot(df_cleaned.index, df_cleaned['z_axis'], label='Z-Axis', color='red')
plt.title(f'Machine Vibration Monitoring (Dominant Frequency ≈ {dominant_freq:.2f} Hz)')
plt.xlabel('Time Index')
plt.ylabel('Magnitude')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("final_machine_vibration_plot.png")
plt.show()

# 17. Save output
df_cleaned.to_excel("Final_Machine_Fault_Report.xlsx", index=False)



 Validation Set Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.92      0.94        64
           1       0.29      0.50      0.36         4

    accuracy                           0.90        68
   macro avg       0.63      0.71      0.65        68
weighted avg       0.93      0.90      0.91        68


Test Set Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.97      0.98        64
           1       0.67      1.00      0.80         4

    accuracy                           0.97        68
   macro avg       0.83      0.98      0.89        68
weighted avg       0.98      0.97      0.97        68



TypeError: 'cm' is an invalid keyword argument for print()

In [None]:
import joblib
# Save model and scaler
joblib.dump(model, 'rf_vibration_model.pkl')
joblib.dump(scaler, 'scaler_vibration.pkl')

['scaler_vibration.pkl']