In [2]:
import pandas as pd
from pycaret.classification import *

In [5]:
# Step 1: Load the cleaned dataset
df = pd.read_csv("Dataset/3 Balanced_Predictive_Maintenance.csv")

# Step 2: Initialize PyCaret setup (for PyCaret v3.x)
clf_setup = setup(
    data=df,
    target='Machine failure',
    ignore_features=['UDI', 'Product ID', 'Type', 'TWF', 'HDF', 'PWF', 'OSF', 'RNF'],
    session_id=123,
    normalize=True,
    verbose=False  # Still okay to keep in v3
)

TypeError: setup() got an unexpected keyword argument 'silent'

In [3]:
# Step 3: Compare models and select top 5
top_models = compare_models(n_select=5, sort='F1')

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9982,1.0,1.0,0.9963,0.9982,0.9963,0.9963,0.182
rf,Random Forest Classifier,0.9947,1.0,1.0,0.9895,0.9947,0.9894,0.9894,0.325
dt,Decision Tree Classifier,0.9928,0.9928,1.0,0.9859,0.9929,0.9857,0.9858,0.027
lightgbm,Light Gradient Boosting Machine,0.9917,0.9995,1.0,0.9838,0.9918,0.9834,0.9836,0.128
knn,K Neighbors Classifier,0.974,0.9905,1.0,0.9506,0.9747,0.9479,0.9493,0.074
gbc,Gradient Boosting Classifier,0.9575,0.9895,0.9789,0.9388,0.9584,0.915,0.9158,0.349
ada,Ada Boost Classifier,0.9246,0.9738,0.9301,0.9201,0.925,0.8492,0.8493,0.131
qda,Quadratic Discriminant Analysis,0.839,0.9279,0.7832,0.8817,0.8294,0.6779,0.6824,0.019
nb,Naive Bayes,0.8242,0.904,0.7962,0.8436,0.8192,0.6485,0.6496,0.019
ridge,Ridge Classifier,0.8213,0.9023,0.8178,0.8238,0.8207,0.6426,0.6428,0.021


In [4]:
# Step 4: Tune each of the top models
tuned_models = [tune_model(model, optimize='F1') for model in top_models]

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9982,1.0,1.0,0.9963,0.9982,0.9963,0.9963,0.195
rf,Random Forest Classifier,0.9947,1.0,1.0,0.9895,0.9947,0.9894,0.9894,0.325
dt,Decision Tree Classifier,0.9928,0.9928,1.0,0.9859,0.9929,0.9857,0.9858,0.027
lightgbm,Light Gradient Boosting Machine,0.9917,0.9995,1.0,0.9838,0.9918,0.9834,0.9836,0.133
knn,K Neighbors Classifier,0.974,0.9905,1.0,0.9506,0.9747,0.9479,0.9493,0.072
gbc,Gradient Boosting Classifier,0.9575,0.9895,0.9789,0.9388,0.9584,0.915,0.9158,0.368
ada,Ada Boost Classifier,0.9246,0.9738,0.9301,0.9201,0.925,0.8492,0.8493,0.138
qda,Quadratic Discriminant Analysis,0.839,0.9279,0.7832,0.8817,0.8294,0.6779,0.6824,0.018
nb,Naive Bayes,0.8242,0.904,0.7962,0.8436,0.8192,0.6485,0.6496,0.024
ridge,Ridge Classifier,0.8213,0.9023,0.8178,0.8238,0.8207,0.6426,0.6428,0.019


In [5]:
# Step 5: Compare only the tuned models
best_tuned_model = compare_models(models=tuned_models, sort='F1')

In [6]:
# Step 6: Finalize the best tuned model
final_model = finalize_model(best_tuned_model)

In [8]:
# Step 7: Evaluate the final model
evaluate_model(final_model)

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['Air temperature [K]',
                                              'Process temperature [K]',
                                              'Rotational speed [rpm]',
                                              'Torque [Nm]', 'Tool wear [min]'],
                                     transformer=SimpleImputer(add_indicator=False,
                                                               copy=True,
                                                               fill_value=None,
                                                               keep_empty_features=False,
                                                               missing_values=nan,
                                                               strategy='mean'))),
                 ('categori...
                  RandomForestClassifier(bootstrap

In [None]:
save_model(final_model, 'best_machine_failure_model')