In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
    roc_auc_score
)


In [9]:
#Load Dataset
data = pd.read_csv("predictive_maintenance.csv")
print(data.head())

   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  
0                    1551         42.8                0       0   No Failure  
1                    1408         46.3                3       0   No Failure  
2                    1498         49.4                5       0   No Failure  
3                    1433         39.5                7       0   No Failure  
4                    1408         40.0                9       0   No Failure  


In [45]:
# Clean column names for XGBoost compatibility
data.columns = (
    data.columns
    .str.replace('[', '', regex=False)
    .str.replace(']', '', regex=False)
    .str.replace(' ', '_')
)
print(data.head())

   Type  Air_temperature_K  Process_temperature_K  Rotational_speed_rpm  \
0     2              298.1                  308.6                  1551   
1     1              298.2                  308.7                  1408   
2     1              298.1                  308.5                  1498   
3     1              298.2                  308.6                  1433   
4     1              298.2                  308.7                  1408   

   Torque_Nm  Tool_wear_min  Target Failure_Type  
0       42.8              0       0   No Failure  
1       46.3              3       0   No Failure  
2       49.4              5       0   No Failure  
3       39.5              7       0   No Failure  
4       40.0              9       0   No Failure  


In [49]:
# data preprocessing

# Drop ID-like columns
for col in ['UDI', 'Product ID']:
    if col in data.columns:
        data.drop(col, axis=1, inplace=True)

# Encode categorical feature
label_encoder = LabelEncoder()
data['Type'] = label_encoder.fit_transform(data['Type'])

# Features and target
X = data.drop(['Target', 'Failure Type'], axis=1)
y = data['Target']

In [21]:
#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [53]:
#Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [55]:
#COMMON EVALUATION FUNCTION
def evaluate_model(y_test, y_pred, y_prob=None):
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall   :", recall_score(y_test, y_pred))
    print("F1 Score :", f1_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    
    if y_prob is not None:
        print("ROC-AUC  :", roc_auc_score(y_test, y_prob))


In [27]:
#Logistic Regression
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression()
lr_model.fit(X_train_scaled, y_train)

y_pred_lr = lr_model.predict(X_test_scaled)
y_prob_lr = lr_model.predict_proba(X_test_scaled)[:, 1]

print("\nLOGISTIC REGRESSION RESULTS")
evaluate_model(y_test, y_pred_lr, y_prob_lr)


LOGISTIC REGRESSION RESULTS
Accuracy : 0.973
Precision: 0.64
Recall   : 0.26229508196721313
F1 Score : 0.37209302325581395
Confusion Matrix:
 [[1930    9]
 [  45   16]]
ROC-AUC  : 0.8930325755205911


In [57]:
#Support Vector Machine (SVM)
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', probability=True)
svm_model.fit(X_train_scaled, y_train)

y_pred_svm = svm_model.predict(X_test_scaled)
y_prob_svm = svm_model.predict_proba(X_test_scaled)[:, 1]

print("\nSVM RESULTS")
evaluate_model(y_test, y_pred_svm, y_prob_svm)


SVM RESULTS
Accuracy : 0.977
Precision: 0.8571428571428571
Recall   : 0.29508196721311475
F1 Score : 0.43902439024390244
Confusion Matrix:
 [[1936    3]
 [  43   18]]
ROC-AUC  : 0.9523330430592075


In [35]:
#Random Forest
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
y_prob_rf = rf_model.predict_proba(X_test)[:, 1]

print("\nRANDOM FOREST RESULTS")
evaluate_model(y_test, y_pred_rf, y_prob_rf)


RANDOM FOREST RESULTS
Accuracy : 0.984
Precision: 0.8372093023255814
Recall   : 0.5901639344262295
F1 Score : 0.6923076923076923
Confusion Matrix:
 [[1932    7]
 [  25   36]]
ROC-AUC  : 0.9537956864701257


In [39]:
#XGBoost
!pip install xgboost



In [61]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42,
    eval_metric='logloss'
)
xgb_model.fit(X_train.values, y_train.values)

y_pred_xgb = xgb_model.predict(X_test)
y_prob_xgb = xgb_model.predict_proba(X_test)[:, 1]

print("\nXGBOOST RESULTS")
evaluate_model(y_test, y_pred_xgb, y_prob_xgb)


XGBOOST RESULTS
Accuracy : 0.9845
Precision: 0.8125
Recall   : 0.639344262295082
F1 Score : 0.7155963302752295
Confusion Matrix:
 [[1930    9]
 [  22   39]]
ROC-AUC  : 0.9787451703176386
