In [None]:
import pandas as pd
import os
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score, make_scorer, classification_report

from functions import (clean_impossible_values, remove_target_contradictions, clean_col_names, tune_and_evaluate_stage1_models, 
                       binary_metrics, evaluate_thresholds, evaluate_thresholds_r, evaluate_metrics, train_stage2_models)


In [None]:
df = pd.read_csv('data/predictive_maintenance.csv')
df = clean_impossible_values(df, drop=True)

No obvious impossible values in Air temperature [K].
No obvious impossible values in Process temperature [K].
No obvious impossible values in Rotational speed [rpm].
No obvious impossible values in Torque [Nm].
No obvious impossible values in Tool wear [min].
No rows dropped or flagged (no impossible values found).


In [None]:
df = remove_target_contradictions(df)
df['Temp_diff'] = df['Process temperature [K]'] - df['Air temperature [K]']
df['Torque_per_rpm'] = df['Torque [Nm]'] / (df['Rotational speed [rpm]'])  
df['Wear_per_rpm'] = df['Tool wear [min]'] / (df['Rotational speed [rpm]'])

df.to_csv('data/eval_data.csv')

le1 = LabelEncoder()
df['Type'] = le1.fit_transform(df['Type'])


os.makedirs("models", exist_ok=True)
# Save encoder to the folder
joblib.dump(le1, "models/type_encoder.pkl")

feature_columns = [
    'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]',
    'Torque [Nm]', 'Tool wear [min]', 'Temp_diff', 'Torque_per_rpm', 'Wear_per_rpm',
    'Type'
]


Contradictions before cleaning: 27 rows.
Contradictions after cleaning: 0 rows.
Removed 27 contradictory rows.



In [24]:
X = df[feature_columns]
y = df['Target']

X = clean_col_names(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)
print("Train/val shapes:", X_train.shape, X_test.shape)

Train/val shapes: (7978, 9) (1995, 9)


In [37]:
f1_macro_scorer = make_scorer(f1_score, average='macro')
results = tune_and_evaluate_stage1_models(X_train, y_train, X_test, y_test, f1_macro_scorer)

best_rf = results["RandomForest"]["best_estimator"]
best_lgbm = results["LightGBM"]["best_estimator"]
best_xgb = results["XGBoost"]["best_estimator"]

Fitting 3 folds for each of 100 candidates, totalling 300 fits

--- Random Forest Results ---
Best Params: {'n_estimators': 250, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': None, 'criterion': 'entropy'}
CV F1: 0.8796
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1929
           1       0.90      0.80      0.85        66

    accuracy                           0.99      1995
   macro avg       0.95      0.90      0.92      1995
weighted avg       0.99      0.99      0.99      1995

AUC-PR: 0.8651

[LightGBM] [Info] Number of positive: 264, number of negative: 7714
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000997 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1517
[LightGBM] [Info] Number of data points in the train set: 7978, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pa

In [None]:
voting = VotingClassifier(estimators=[('lgbm', best_lgbm),('xgb', best_xgb),('rf', best_rf)], 
                          voting='soft', n_jobs=-1)
voting.fit(X_train, y_train)
y_pred_voting = voting.predict(X_test)
y_proba_voting = voting.predict_proba(X_test)[:, 1]
metrics_voting = binary_metrics(y_test, y_pred_voting, y_proba_voting)

print("Voting Classifier Metrics:")
print(metrics_voting)


Voting Classifier Metrics:
{'accuracy': 0.9914786967418546, 'precision': 0.8888888888888888, 'recall': 0.8484848484848485, 'f1': 0.8682170542635659, 'roc_auc': 0.9908572505773127, 'mcc': 0.8640660696795754, 'confusion_matrix': array([[1922,    7],
       [  10,   56]], dtype=int64), 'AUC_PR': 0.9159300876441053}


In [None]:
print(classification_report(y_test, y_pred_voting, digits=4))

              precision    recall  f1-score   support

           0     0.9948    0.9964    0.9956      1929
           1     0.8889    0.8485    0.8682        66

    accuracy                         0.9915      1995
   macro avg     0.9419    0.9224    0.9319      1995
weighted avg     0.9913    0.9915    0.9914      1995



In [None]:
best_threshold, results_f1 = evaluate_thresholds(y_test, y_proba_voting)

y_pred_thresh = (y_proba_voting >= best_threshold).astype(int)

metrics_voting_tuned = binary_metrics(y_test, y_pred_thresh, y_proba_voting)
metrics_voting_tuned
print("=== Classification Report: Threshold-Tuned Voting Classifier ===")
print(classification_report(y_test, y_pred_thresh))

Best Threshold by F1: 0.515
At that threshold — Precision: 0.903, Recall: 0.848, F1: 0.875
=== Classification Report: Threshold-Tuned Voting Classifier ===
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1929
           1       0.90      0.85      0.88        66

    accuracy                           0.99      1995
   macro avg       0.95      0.92      0.94      1995
weighted avg       0.99      0.99      0.99      1995



In [None]:
print("=== Classification Report: Threshold-Tuned Voting Classifier ===")
print(classification_report(y_test, y_pred_thresh))

=== Classification Report: Threshold-Tuned Voting Classifier ===
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1929
           1       0.90      0.85      0.88        66

    accuracy                           0.99      1995
   macro avg       0.95      0.92      0.94      1995
weighted avg       0.99      0.99      0.99      1995



In [None]:
thresholds_info = evaluate_thresholds_r(y_test, y_proba_voting, min_precision=0.9)
best_threshold_recall = thresholds_info["best_threshold_recall"]
y_pred_thresh_r = (y_proba_voting >= best_threshold_recall).astype(int)
metrics_voting_tuned_r = binary_metrics(y_test, y_pred_thresh_r, y_proba_voting)
metrics_voting_tuned_r


Best Threshold for Recall (Precision ≥ 0.9): 0.515
Precision: 0.903, Recall: 0.848, F1: 0.875

Best Threshold by F1: 0.515
At that threshold — Precision: 0.903, Recall: 0.848, F1: 0.875


{'accuracy': 0.9919799498746867,
 'precision': 0.9032258064516129,
 'recall': 0.8484848484848485,
 'f1': 0.875,
 'roc_auc': 0.9908572505773127,
 'mcc': 0.8713154535644742,
 'confusion_matrix': array([[1923,    6],
        [  10,   56]], dtype=int64),
 'AUC_PR': 0.9159300876441053}

In [None]:
# Save the first voting ensemble
joblib.dump(voting, "models/voting_stage1_model.pkl")

# Save both thresholds together
thresholds_data = {
    "best_threshold_f1": float(best_threshold),
    "best_threshold_recall": float(thresholds_info["best_threshold_recall"]),
    "results_f1": results_f1,
    "results_recall": thresholds_info["results"]
}

joblib.dump(thresholds_data, "models/voting_stage1_thresholds.pkl")

print("✅ Voting1 thresholds saved successfully.")


✅ Voting1 thresholds saved successfully.


# STAGE 2 

In [None]:
# Prepare dataset for Stage 2
df_failures = df[df['Target']==1].copy()
print("Total failure samples:", len(df_failures))
df_failures['Failure Type'].value_counts()

# Split failures for training stage2
X2 = df_failures[feature_columns]
y2 = df_failures['Failure Type']
X2 = clean_col_names(X2)

le2 = LabelEncoder()
y2 = le2.fit_transform(y2)

print("\nFailure types and their new encoded labels:")
for i, class_name in enumerate(le2.classes_):
    print(f"- {class_name}: {i}")

joblib.dump(le2, "models/failure_type_encoder.pkl")

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, stratify=y2, random_state=42)
print("\nShape of Stage 2 training data:", X2_train.shape)
print("Shape of Stage 2 testing data:", X2_test.shape)

Total failure samples: 330

Failure types and their new encoded labels:
- Heat Dissipation Failure: 0
- Overstrain Failure: 1
- Power Failure: 2
- Tool Wear Failure: 3

Shape of Stage 2 training data: (264, 9)
Shape of Stage 2 testing data: (66, 9)


In [35]:
for i, class_name in enumerate(le1.classes_):
    print(f"- {class_name}: {i}")

df_failures

- H: 0
- L: 1
- M: 2


Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type,Temp_diff,Torque_per_rpm,Wear_per_rpm
50,51,L47230,1,298.9,309.1,2861,4.6,143,1,Power Failure,10.2,0.001608,0.049983
69,70,L47249,1,298.9,309.0,1410,65.7,191,1,Power Failure,10.1,0.046596,0.135461
77,78,L47257,1,298.8,308.9,1455,41.3,208,1,Tool Wear Failure,10.1,0.028385,0.142955
160,161,L47340,1,298.4,308.2,1282,60.7,216,1,Overstrain Failure,9.8,0.047348,0.168487
161,162,L47341,1,298.3,308.1,1412,52.3,218,1,Overstrain Failure,9.8,0.037040,0.154391
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9758,9759,L56938,1,298.6,309.8,2271,16.2,218,1,Tool Wear Failure,11.2,0.007133,0.095993
9764,9765,L56944,1,298.5,309.5,1294,66.7,12,1,Power Failure,11.0,0.051546,0.009274
9822,9823,L57002,1,298.5,309.4,1360,60.9,187,1,Overstrain Failure,10.9,0.044779,0.137500
9830,9831,L57010,1,298.3,309.3,1337,56.1,206,1,Overstrain Failure,11.0,0.041960,0.154076


In [None]:
smote = SMOTE(random_state=42)

print("\nClass distribution before SMOTE:")
print(pd.Series(y2_train).value_counts())

X_train_smote, y_train_smote = smote.fit_resample(X2_train, y2_train)

print("\nClass distribution after SMOTE:")
print(pd.Series(y_train_smote).value_counts())
print("\nShape of resampled training data:", X_train_smote.shape)


Class distribution before SMOTE:
0    90
2    76
1    62
3    36
Name: count, dtype: int64

Class distribution after SMOTE:
3    90
1    90
0    90
2    90
Name: count, dtype: int64

Shape of resampled training data: (360, 9)


In [1]:
stage2_results = train_stage2_models(X_train_smote, y_train_smote, X2_test, y2_test, le2)


NameError: name 'train_stage2_models' is not defined

In [None]:
# Extract the best models
rf2_best = stage2_results["rf"]["model"]
xgb2_best = stage2_results["xgb"]["model"]
lgb2_best = stage2_results["lgbm"]["model"]

# Include them in the VotingClassifier
voting2 = VotingClassifier(
    estimators=[('lgbm', lgb2_best), ('xgb', xgb2_best), ('rf', rf2_best)],
    voting='soft',
    n_jobs=-1
)

# Fit and evaluate
voting2.fit(X_train_smote, y_train_smote)
y_pred_voting2 = voting2.predict(X2_test)
y_proba_voting2 = voting2.predict_proba(X2_test)
metrics_voting2 = evaluate_metrics(y2_test, y_pred_voting2, y_proba_voting2)

print("\nVoting Classifier Metrics:")
for k, v in metrics_voting2.items():
    print(f"{k}: {v}")


Voting Classifier Metrics:
Accuracy: 0.9090909090909091
Precision: 0.9077020202020202
Recall: 0.9090909090909091
F1: 0.9071908758403272
MCC: 0.8753385807049933
ROC_AUC: 0.9843723913264787
Confusion_Matrix: [[22  0  0  0]
 [ 1 13  1  1]
 [ 1  0 18  0]
 [ 0  2  0  7]]
AUC_PR: 0.9275081266866959


In [None]:
report = classification_report(y2_test, y_pred_voting2, output_dict=True)
report_df = pd.DataFrame(report).transpose()

# keep only class rows (drop avg/total)
class_report_df = report_df.iloc[:-3, :][['precision', 'recall', 'f1-score', 'support']].reset_index()
class_report_df.rename(columns={'index': 'Class'}, inplace=True)

# print table
print("\n=== Per-Class F1 + Support Summary ===")
print(class_report_df.round(3))


=== Per-Class F1 + Support Summary ===
  Class  precision  recall  f1-score  support
0     0      0.917   1.000     0.957     22.0
1     1      0.867   0.812     0.839     16.0
2     2      0.947   0.947     0.947     19.0
3     3      0.875   0.778     0.824      9.0


In [None]:
# Save the second voting ensemble
joblib.dump(voting2, "models/voting_stage2_model.pkl")

print("✅ Both voting ensemble models saved successfully.")


✅ Both voting ensemble models saved successfully.
