In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pickle import dump
from pickle import load

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, classification_report, roc_auc_score, roc_curve

In [2]:
data = pd.read_csv('Maintenance_final.csv', index_col=0)
data

Unnamed: 0_level_0,TWF,HDF,PWF,OSF,Machine failure
UDI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0
5,0,0,0,0,0
...,...,...,...,...,...
9996,0,0,0,0,0
9997,0,0,0,0,0
9998,0,0,0,0,0
9999,0,0,0,0,0


In [3]:
x = data.iloc[:, :-1]
y = data[['Machine failure']]

## Logistic Regression

In [4]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)
model.fit(x, y)

  y = column_or_1d(y, warn=True)


In [5]:
y_pred = model.predict(x)

log_acc= accuracy_score(y, y_pred)

log_f1 = f1_score(y, y_pred)

log_prec = precision_score(y, y_pred)

log_roc = roc_auc_score(y, y_pred)

In [6]:
dump(model,open('Logistic_Regression_model.sav', 'wb'))

## Decision Tree Model
This model works on the construction of trees that contains different branches at different splits. And these branches contains leaves or nodes which are the end points. A split occurs at a node based on a certain condition which the model will make based on the data.

This model is normally used for classification purposes. But it can also be used for regression.

In [7]:
from sklearn.tree import DecisionTreeClassifier
dec = DecisionTreeClassifier(max_depth=10)
dec.fit(x, y)

In [8]:
y_pred = dec.predict(x)

dec_acc= accuracy_score(y, y_pred)

dec_f1 = f1_score(y, y_pred)

dec_prec = precision_score(y, y_pred)

dec_roc = roc_auc_score(y, y_pred)

In [9]:
dump(dec,open('Decision_Tree.sav', 'wb'))

## KNN Classifier

In [10]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=8)
knn.fit(x, y)

  return self._fit(X, y)


In [11]:
y_pred = knn.predict(x)

knn_acc= accuracy_score(y, y_pred)

knn_f1 = f1_score(y, y_pred)

knn_prec = precision_score(y, y_pred)

knn_roc = roc_auc_score(y, y_pred)

In [12]:
dump(knn,open('KNN.sav', 'wb'))

## Random Forest Classifier

In [13]:
from sklearn.ensemble import RandomForestClassifier
rnd = RandomForestClassifier(max_depth=10)
rnd.fit(x, y)

  rnd.fit(x, y)


In [14]:
y_pred = rnd.predict(x)

rnd_acc= accuracy_score(y, y_pred)

rnd_f1 = f1_score(y, y_pred)

rnd_prec = precision_score(y, y_pred)

rnd_roc = roc_auc_score(y, y_pred)

In [15]:
dump(rnd,open('Random_Forest.sav', 'wb'))

## Bagging Classifier

In [16]:
from sklearn.ensemble import BaggingClassifier
bag = BaggingClassifier(estimator=RandomForestClassifier(max_depth = 10), n_estimators=10)
bag.fit(x, y)

  y = column_or_1d(y, warn=True)


In [17]:
y_pred = bag.predict(x)

bag_acc= accuracy_score(y, y_pred)

bag_f1 = f1_score(y, y_pred)

bag_prec = precision_score(y, y_pred)

bag_roc = roc_auc_score(y, y_pred)

In [18]:
dump(bag,open('Bagging.sav', 'wb'))

## AdaBoost
This is another ensemble technique that assigns the weights to the parameters depending the errors. Higher weights are assigned to the incorrect predictions and lower weights are assigned to the correct predictions in each iteration. This will help it reach the convergence much faster.

In [19]:
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier(estimator=RandomForestClassifier(max_depth=10))
ada.fit(x, y)

  y = column_or_1d(y, warn=True)


In [20]:
y_pred = ada.predict(x)

ada_acc= accuracy_score(y, y_pred)

ada_f1 = f1_score(y, y_pred)

ada_prec = precision_score(y, y_pred)

ada_roc = roc_auc_score(y, y_pred)

In [21]:
dump(ada,open('AdaBoost.sav', 'wb'))

## Gradient Boosting Classifier
This is an ensemble technique that uses the Gradient Descent algorithm in changing the weights.

In [22]:
from sklearn.ensemble import GradientBoostingClassifier
grad = GradientBoostingClassifier(learning_rate=0.1)
grad.fit(x, y)

  y = column_or_1d(y, warn=True)


In [23]:
y_pred = grad.predict(x)

grad_acc= accuracy_score(y, y_pred)

grad_f1 = f1_score(y, y_pred)

grad_prec = precision_score(y, y_pred)

grad_roc = roc_auc_score(y, y_pred)

In [24]:
dump(grad,open('Gradient_Boost.sav', 'wb'))

## SVR
This model uses the support vector concept. There are 3 kernels which are linear, polynomial, and rbf.

### Linear Kernel

In [25]:
from sklearn.svm import SVC

svcl = SVC(kernel= "linear") 
svcl.fit(x, y) 

  y = column_or_1d(y, warn=True)


In [26]:
y_pred = svcl.predict(x)

svcl_acc= accuracy_score(y, y_pred)

svcl_f1 = f1_score(y, y_pred)

svcl_prec = precision_score(y, y_pred)

svcl_roc = roc_auc_score(y, y_pred)

In [27]:
dump(svcl,open('SVC_Linear.sav', 'wb'))

### Polynomial Kernel

In [28]:
svcp = SVC(kernel= "poly") 
svcp.fit(x, y) 

  y = column_or_1d(y, warn=True)


In [29]:
y_pred = svcp.predict(x)

svcp_acc= accuracy_score(y, y_pred)

svcp_f1 = f1_score(y, y_pred)

svcp_prec = precision_score(y, y_pred)

svcp_roc = roc_auc_score(y, y_pred)

In [30]:
dump(svcp,open('SVC_Poly.sav', 'wb'))

## Stacking
In this technique different types of estimators or even ensembles can be used stacked upon each other to get the results.

Here the logistic regressor, decision tree classifier, and the gradient boosting classifier are stacked together.

In [31]:
from sklearn.ensemble import StackingClassifier

estimators = [('log', LogisticRegression(max_iter=500)), ('rnd', RandomForestClassifier(max_depth=10)), ('grad', GradientBoostingClassifier(learning_rate=0.1))]
stack = StackingClassifier(estimators=estimators)
stack.fit(x, y)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [32]:
y_pred = stack.predict(x)

stack_acc= accuracy_score(y, y_pred)

stack_f1 = f1_score(y, y_pred)

stack_prec = precision_score(y, y_pred)

stack_roc = roc_auc_score(y, y_pred)

In [33]:
dump(stack,open('Stacking.sav', 'wb'))

## Artificial Neural Networks

In [35]:
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import Adam
from keras.layers import Dropout

# Defining the model

def create_model():
    model = Sequential()
    model.add(Dense(16,input_dim = 4,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(8,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(1,activation = 'sigmoid'))
    
    adam = Adam(lr = 0.1) #sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

ann = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 512,epochs = 10)

ann.fit(x, y)

y_pred = ann.predict(x)

ann_acc= accuracy_score(y, y_pred)

ann_f1 = f1_score(y, y_pred)

ann_prec = precision_score(y, y_pred)

ann_roc = roc_auc_score(y, y_pred)

y_pred

  ann = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 512,epochs = 10)
  super().__init__(name, **kwargs)




array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int64)

In [36]:
ann.model.save('ANN.h5')

In [37]:
results = {
    'Model Name': ['Logistic Regression','Decision Tree', 'KNN', 'Random Forest', 'Bagging', 'AdaBoost', 'Gradient Boost', 'SVC Linear Kernel', 'SVC Polynomial Kernel', 'Stacking', 'Neural Networks'],
    'Accuracy': [log_acc, dec_acc, knn_acc, rnd_acc, bag_acc, ada_acc, grad_acc, svcl_acc, svcp_acc, stack_acc, ann_acc],
    'F1 Score': [log_f1, dec_f1, knn_f1, rnd_f1, bag_f1, ada_f1, grad_f1, svcl_f1, svcp_f1, stack_f1, ann_f1],
    'Precision': [log_prec, dec_prec, knn_prec, rnd_prec, bag_prec, ada_prec, grad_prec, svcl_prec, svcp_prec, stack_prec, ann_prec],
    'ROC-AUC Score': [log_roc, dec_roc, knn_roc, rnd_roc, bag_roc, ada_roc, grad_roc, svcl_roc, svcp_roc, stack_roc, ann_roc]
}

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model Name,Accuracy,F1 Score,Precision,ROC-AUC Score
0,Logistic Regression,0.9991,0.986547,1.0,0.986726
1,Decision Tree,0.9991,0.986547,1.0,0.986726
2,KNN,0.9991,0.986547,1.0,0.986726
3,Random Forest,0.9991,0.986547,1.0,0.986726
4,Bagging,0.9991,0.986547,1.0,0.986726
5,AdaBoost,0.9991,0.986547,1.0,0.986726
6,Gradient Boost,0.9991,0.986547,1.0,0.986726
7,SVC Linear Kernel,0.9991,0.986547,1.0,0.986726
8,SVC Polynomial Kernel,0.9991,0.986547,1.0,0.986726
9,Stacking,0.9991,0.986547,1.0,0.986726


In [38]:
results_df.sort_values(by=['Accuracy'], ascending=False)

Unnamed: 0,Model Name,Accuracy,F1 Score,Precision,ROC-AUC Score
0,Logistic Regression,0.9991,0.986547,1.0,0.986726
1,Decision Tree,0.9991,0.986547,1.0,0.986726
2,KNN,0.9991,0.986547,1.0,0.986726
3,Random Forest,0.9991,0.986547,1.0,0.986726
4,Bagging,0.9991,0.986547,1.0,0.986726
5,AdaBoost,0.9991,0.986547,1.0,0.986726
6,Gradient Boost,0.9991,0.986547,1.0,0.986726
7,SVC Linear Kernel,0.9991,0.986547,1.0,0.986726
8,SVC Polynomial Kernel,0.9991,0.986547,1.0,0.986726
9,Stacking,0.9991,0.986547,1.0,0.986726
