In [16]:
# Machine Learning Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,classification_report

import pandas as pd
import numpy as np

In [17]:
# Handle Warnings
import warnings
warnings.filterwarnings('ignore')  

In [18]:
# create dataframes
X_train = pd.read_csv("/kaggle/input/maintenance-dataset/X_train.csv")
X_test = pd.read_csv("/kaggle/input/maintenance-dataset/X_test.csv")
y_train = pd.read_csv("/kaggle/input/maintenance-dataset/y_train.csv")
y_test = pd.read_csv("/kaggle/input/maintenance-dataset/y_test.csv")

In [19]:
# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred_lr = logreg.predict(X_test)

log_train = round(logreg.score(X_train, y_train) * 100, 2)
log_accuracy = round(accuracy_score(y_pred_lr, y_test) * 100, 2)


print("Training Accuracy    :",log_train ,"%")
print("Model Accuracy Score :",log_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Logistic Regression Classification_Report: \n",classification_report(y_test,y_pred_lr))
print("\033[1m--------------------------------------------------------\033[0m")

Training Accuracy    : 96.51 %
Model Accuracy Score : 96.85 %
[1m--------------------------------------------------------[0m
Logistic Regression Classification_Report: 
               precision    recall  f1-score   support

           0       0.97      1.00      0.99      1933
           1       0.75      0.14      0.23        22
           2       0.00      0.00      0.00         7
           3       0.70      0.50      0.58        14
           4       0.00      0.00      0.00         5
           5       0.00      0.00      0.00        19

    accuracy                           0.97      2000
   macro avg       0.40      0.27      0.30      2000
weighted avg       0.95      0.97      0.96      2000

[1m--------------------------------------------------------[0m


In [20]:
# Support Vector Machines
svc = SVC()
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)

svc_train = round(svc.score(X_train, y_train) * 100, 2)
svc_accuracy = round(accuracy_score(y_pred_svc, y_test) * 100, 2)

print("Training Accuracy    :",svc_train ,"%")
print("Model Accuracy Score :",svc_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("SVC Classification_Report: \n",classification_report(y_test,y_pred_svc))
print("\033[1m--------------------------------------------------------\033[0m")


Training Accuracy    : 96.54 %
Model Accuracy Score : 96.7 %
[1m--------------------------------------------------------[0m
SVC Classification_Report: 
               precision    recall  f1-score   support

           0       0.97      1.00      0.98      1933
           1       1.00      0.05      0.09        22
           2       0.00      0.00      0.00         7
           3       0.00      0.00      0.00        14
           4       0.00      0.00      0.00         5
           5       0.00      0.00      0.00        19

    accuracy                           0.97      2000
   macro avg       0.33      0.17      0.18      2000
weighted avg       0.95      0.97      0.95      2000

[1m--------------------------------------------------------[0m


In [21]:
# Random Forest
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, y_train.values.ravel())
y_pred_rf = random_forest.predict(X_test)
random_forest.score(X_train, y_train)

random_forest_train = round(random_forest.score(X_train, y_train.values.ravel()) * 100, 2)
random_forest_accuracy = round(accuracy_score(y_pred_rf, y_test.values.ravel()) * 100, 2)

print("Training Accuracy    :",random_forest_train ,"%")
print("Model Accuracy Score :",random_forest_accuracy ,"%")
print("\033[1m--------------------------------------------------------\033[0m")
print("Random Forest Classification_Report: \n",classification_report(y_test,y_pred_rf))
print("\033[1m--------------------------------------------------------\033[0m")

Training Accuracy    : 100.0 %
Model Accuracy Score : 98.6 %
[1m--------------------------------------------------------[0m
Random Forest Classification_Report: 
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      1933
           1       0.91      0.91      0.91        22
           2       0.00      0.00      0.00         7
           3       0.86      0.43      0.57        14
           4       0.00      0.00      0.00         5
           5       0.94      0.84      0.89        19

    accuracy                           0.99      2000
   macro avg       0.62      0.53      0.56      2000
weighted avg       0.98      0.99      0.98      2000

[1m--------------------------------------------------------[0m


**At first glance, it may seem like there is only a 2% difference between random forest & logistic regression however, random forest also has much higher precision at predicting the type of error when compared to logistic regression.**

This makes a massive difference when the model is handed to the hypothetical client because the client can be confident that the model not only predicted the occurrence of an error but also the type of error.