In [4]:
from pathlib import Path
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score

# ✅ Manually set the correct root path (update if your folder is elsewhere)
PROJECT_ROOT = Path(r"C:\Users\Arushi Sharma\Downloads\predictive_maintenance")

# ✅ Define subfolders
DATA_DIR = PROJECT_ROOT / "data"
MODELS_DIR = PROJECT_ROOT / "models"
MODELS_DIR.mkdir(exist_ok=True)


In [6]:
# Load preprocessed data
df = pd.read_csv(DATA_DIR / "processed_sensor_data.csv")

# Dynamically create label column: 1 if RUL < 20 else 0
df['label'] = (df['RUL'] < 20).astype(int)

# Features and target
X = df.drop(columns=["unit", "cycle", "RUL", "label"])
y = df["label"]

# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_clf.fit(X_train, y_train)

# Evaluate
from sklearn.metrics import classification_report, roc_auc_score
y_pred = rf_clf.predict(X_test)
print("📊 Classification Report:\n", classification_report(y_test, y_pred))
print("🔵 ROC-AUC Score:", roc_auc_score(y_test, rf_clf.predict_proba(X_test)[:, 1]))

# Save model
import joblib
joblib.dump(rf_clf, MODELS_DIR / "rf_binary_classifier.pkl")
print("✅ Saved: rf_binary_classifier.pkl")


📊 Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99      3615
           1       0.94      0.86      0.90       432

    accuracy                           0.98      4047
   macro avg       0.96      0.93      0.94      4047
weighted avg       0.98      0.98      0.98      4047

🔵 ROC-AUC Score: 0.9964121971210491
✅ Saved: rf_binary_classifier.pkl
