In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv('./predictive_maintenance.csv')

In [3]:
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [4]:
data.shape

(10000, 10)

In [5]:
# check for missing values
data.isnull().sum()

UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Target                     0
Failure Type               0
dtype: int64

In [12]:
X = data.iloc[:,3:7]

In [13]:
X

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm]
0,298.1,308.6,1551,42.8
1,298.2,308.7,1408,46.3
2,298.1,308.5,1498,49.4
3,298.2,308.6,1433,39.5
4,298.2,308.7,1408,40.0
...,...,...,...,...
9995,298.8,308.4,1604,29.5
9996,298.9,308.4,1632,31.8
9997,299.0,308.6,1645,33.4
9998,299.0,308.7,1408,48.5


In [14]:
y = data['Target']

In [15]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Target, dtype: int64

In [16]:
y.value_counts()

Target
0    9661
1     339
Name: count, dtype: int64

In [17]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [19]:
X_train.shape

(8000, 4)

In [20]:
y_train.value_counts()

Target
0    7722
1     278
Name: count, dtype: int64

In [21]:
y_test.value_counts()

Target
0    1939
1      61
Name: count, dtype: int64

In [23]:
# Standardization

In [24]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [25]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [26]:
X_train_scaled

array([[-0.85406604, -0.60958879,  0.42763418, -0.89269644],
       [-0.90401361, -1.08052803, -0.83494457,  1.38218727],
       [-0.90401361, -1.48419023, -0.05967692, -0.89269644],
       ...,
       [ 1.39357475,  1.54327629, -0.30887009,  0.72076734],
       [-1.95291265, -2.08968354,  0.01231222, -0.74237372],
       [ 0.09493785,  0.26501265,  1.49085839, -1.42383669]])

In [27]:
# fit a machine learning model
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [28]:
lr.fit(X_train_scaled,y_train)


In [29]:
predictions = lr.predict(X_test_scaled)

In [30]:
predictions

array([0, 0, 0, ..., 0, 0, 0])

In [32]:
y_test

6252    0
4684    1
1731    0
4742    0
4521    0
       ..
6412    0
8285    0
7853    0
1095    1
6929    0
Name: Target, Length: 2000, dtype: int64

In [33]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [34]:
accuracy = accuracy_score(y_test,predictions)

In [35]:
accuracy

0.973

In [37]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1939
           1       0.65      0.25      0.36        61

    accuracy                           0.97      2000
   macro avg       0.81      0.62      0.67      2000
weighted avg       0.97      0.97      0.97      2000



In [38]:
print(confusion_matrix(y_test,predictions))

[[1931    8]
 [  46   15]]


In [44]:
sample = lr.predict(scaler.transform([[298.9,309.1,2861,4.6]]))



In [45]:
sample

array([1])

In [43]:
data[data['Target']==1]

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
50,51,L47230,L,298.9,309.1,2861,4.6,143,1,Power Failure
69,70,L47249,L,298.9,309.0,1410,65.7,191,1,Power Failure
77,78,L47257,L,298.8,308.9,1455,41.3,208,1,Tool Wear Failure
160,161,L47340,L,298.4,308.2,1282,60.7,216,1,Overstrain Failure
161,162,L47341,L,298.3,308.1,1412,52.3,218,1,Overstrain Failure
...,...,...,...,...,...,...,...,...,...,...
9758,9759,L56938,L,298.6,309.8,2271,16.2,218,1,Tool Wear Failure
9764,9765,L56944,L,298.5,309.5,1294,66.7,12,1,Power Failure
9822,9823,L57002,L,298.5,309.4,1360,60.9,187,1,Overstrain Failure
9830,9831,L57010,L,298.3,309.3,1337,56.1,206,1,Overstrain Failure


In [42]:
# saving the model and the scaler
import pickle
pickle.dump(scaler,open('scaler.h5','wb'))
pickle.dump(lr,open('model.h5','wb'))