In [1]:
import pandas as pd
from metrics import model_experiment

In [2]:
# Data Import
df = pd.read_csv("../data/machine failure.csv")
df

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,M,298.8,308.4,1604,29.5,14,0,0,0,0,0,0
9996,9997,H39410,H,298.9,308.4,1632,31.8,17,0,0,0,0,0,0
9997,9998,M24857,M,299.0,308.6,1645,33.4,22,0,0,0,0,0,0
9998,9999,H39412,H,299.0,308.7,1408,48.5,25,0,0,0,0,0,0


In [3]:
features = ['Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Air temperature [K]', 'Process temperature [K]']
target = 'Machine failure'


In [4]:
X = df[features]
y = df[target]

In [5]:
from sklearn.ensemble import RandomForestClassifier

rf_args = {
    'n_estimators': 100, 
    'random_state': 42
}

model, metrics = model_experiment(X, y, RandomForestClassifier, rf_args)

Beginning Experiment for Model: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1939
           1       0.84      0.61      0.70        61

    accuracy                           0.98      2000
   macro avg       0.91      0.80      0.85      2000
weighted avg       0.98      0.98      0.98      2000


ROC-AUC Score: 0.8014736343729656

Confusion Matrix:
[[1932    7]
 [  24   37]]
--------------------------------------------------



In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler

models = [
    LogisticRegression,
    KNeighborsClassifier,
    # RadiusNeighborsClassifier,
    SVC,
    GaussianNB,
    BernoulliNB,
    DecisionTreeClassifier,
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier
]

for model in models:
    model_experiment(X, y, model)

Beginning Experiment for Model: <class 'sklearn.linear_model._logistic.LogisticRegression'>
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1939
           1       0.62      0.26      0.37        61

    accuracy                           0.97      2000
   macro avg       0.80      0.63      0.68      2000
weighted avg       0.97      0.97      0.97      2000


ROC-AUC Score: 0.6285688921955715

Confusion Matrix:
[[1929   10]
 [  45   16]]
--------------------------------------------------

Beginning Experiment for Model: <class 'sklearn.neighbors._classification.KNeighborsClassifier'>
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      1939
           1       0.57      0.13      0.21        61

    accuracy                           0.97      2000
   macro avg       0.77      0.56      0.60      2000
weighted avg       0.96      0.97

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1939
           1       0.80      0.59      0.68        61

    accuracy                           0.98      2000
   macro avg       0.89      0.79      0.84      2000
weighted avg       0.98      0.98      0.98      2000


ROC-AUC Score: 0.7927611833038831

Confusion Matrix:
[[1930    9]
 [  25   36]]
--------------------------------------------------

Beginning Experiment for Model: <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1939
           1       0.67      0.49      0.57        61

    accuracy                           0.98      2000
   macro avg       0.83      0.74      0.78      2000
weighted avg       0.97      0.98      0.98      2000


ROC-AUC Score: 0.7420336661622097

Confusion Matrix:
[[1924   15]
 [  3

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler

models = [
    LogisticRegression,
    KNeighborsClassifier,
    # RadiusNeighborsClassifier,
    SVC,
    GaussianNB,
    BernoulliNB,
    DecisionTreeClassifier,
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier
]

scalers = [
    MinMaxScaler,
    MinMaxScaler,
    # MinMaxScaler,
    MinMaxScaler,
    MinMaxScaler,
    MinMaxScaler,
    None,
    None,
    None,
    None
]

for model, scaler in zip(models, scalers):
    model_experiment(X, y, model, scaling=scaler)

Beginning Experiment for Model: <class 'sklearn.linear_model._logistic.LogisticRegression'>
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      1939
           1       1.00      0.02      0.03        61

    accuracy                           0.97      2000
   macro avg       0.98      0.51      0.51      2000
weighted avg       0.97      0.97      0.96      2000


ROC-AUC Score: 0.5081967213114754

Confusion Matrix:
[[1939    0]
 [  60    1]]
--------------------------------------------------

Beginning Experiment for Model: <class 'sklearn.neighbors._classification.KNeighborsClassifier'>
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1939
           1       0.73      0.26      0.39        61

    accuracy                           0.97      2000
   macro avg       0.85      0.63      0.69      2000
weighted avg       0.97      0.97

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler

models = [
    LogisticRegression,
    KNeighborsClassifier,
    # RadiusNeighborsClassifier,
    SVC,
    GaussianNB,
    BernoulliNB,
    DecisionTreeClassifier,
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier
]

scalers = [
    MinMaxScaler,
    # MinMaxScaler,
    MinMaxScaler,
    MinMaxScaler,
    MinMaxScaler,
    MinMaxScaler,
    None,
    None,
    None,
    None
]

for model, scaler in zip(models, scalers):
    model_experiment(X, y, model, scaling=scaler, oversample=0.7, undersample=1)

Beginning Experiment for Model: <class 'sklearn.linear_model._logistic.LogisticRegression'>
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.82      0.90      1939
           1       0.13      0.84      0.22        61

    accuracy                           0.82      2000
   macro avg       0.56      0.83      0.56      2000
weighted avg       0.97      0.82      0.88      2000


ROC-AUC Score: 0.828553673940429

Confusion Matrix:
[[1592  347]
 [  10   51]]
--------------------------------------------------

Beginning Experiment for Model: <class 'sklearn.neighbors._classification.KNeighborsClassifier'>
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.91      0.95      1939
           1       0.23      0.82      0.35        61

    accuracy                           0.91      2000
   macro avg       0.61      0.87      0.65      2000
weighted avg       0.97      0.91 