In [None]:
# Artificial Neural Network MULTI class Classification


# #Machine Predictive Maintenance Classification Dataset
# Since real predictive maintenance datasets are generally difficult to obtain and in particular difficult to publish, we present and provide a synthetic dataset that reflects real predictive maintenance encountered in the industry to the best of our knowledge.

# The dataset consists of 10 000 data points stored as rows with 14 features in columns

# UID: unique identifier ranging from 1 to 10000
# productID: consisting of a letter L, M, or H for low (50% of all products), medium (30%), and high (20%) as product quality variants and a variant-specific serial number
# air temperature [K]: generated using a random walk process later normalized to a standard deviation of 2 K around 300 K
# process temperature [K]: generated using a random walk process normalized to a standard deviation of 1 K, added to the air temperature plus 10 K.
# rotational speed [rpm]: calculated from powepower of 2860 W, overlaid with a normally distributed noise
# torque [Nm]: torque values are normally distributed around 40 Nm with an σ = 10 Nm and no negative values.
# tool wear [min]: The quality variants H/M/L add 5/3/2 minutes of tool wear to the used tool in the process. and a
# 'machine failure' label that indicates, whether the machine has failed in this particular data point for any of the following failure modes are true.
# Important : There are two Targets - Do not make the mistake of using one of them as feature, as it will lead to leakage.
# Target : Failure or Not
# Failure Type : Type of Failure

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [4]:
from google.colab import files

In [5]:
uploaded = files.upload()

Saving predictive_maintenance.csv to predictive_maintenance.csv


In [6]:
dataset = pd.read_csv("predictive_maintenance.csv")

In [7]:
dataset.head()


Unnamed: 0,UDI,Product_ID,Type,Air_temperature,Process_temperature,Rotational_speed,Torque,Tool_wear,Target,Failure_Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [8]:
pd.set_option('display.max_columns', None)


In [9]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   UDI                  10000 non-null  int64  
 1   Product_ID           10000 non-null  object 
 2   Type                 10000 non-null  object 
 3   Air_temperature      10000 non-null  float64
 4   Process_temperature  10000 non-null  float64
 5   Rotational_speed     10000 non-null  int64  
 6   Torque               10000 non-null  float64
 7   Tool_wear            10000 non-null  int64  
 8   Target               10000 non-null  int64  
 9   Failure_Type         10000 non-null  object 
dtypes: float64(3), int64(4), object(3)
memory usage: 781.4+ KB


In [10]:
dataset=dataset.drop(["UDI",
              "Product_ID","Target"],axis=1)


In [11]:
dataset.Failure_Type.value_counts()

Unnamed: 0_level_0,count
Failure_Type,Unnamed: 1_level_1
No Failure,9652
Heat Dissipation Failure,112
Power Failure,95
Overstrain Failure,78
Tool Wear Failure,45
Random Failures,18


In [12]:
from sklearn.utils import shuffle

In [13]:
No_failure_sample = dataset[dataset['Failure_Type']=='No Failure'].sample(n=100, random_state=42)


In [14]:
No_failure_sample1=No_failure_sample

In [15]:
No_failure_sample1.shape

(100, 7)

In [16]:
other_classes = dataset[dataset['Failure_Type'] != 'No Failure']


In [17]:
other_classes.shape

(348, 7)

In [18]:
dataset_balanced = pd.concat([No_failure_sample1, other_classes])


In [19]:
dataset_balanced.shape

(448, 7)

In [20]:
dataset_balanced1 = shuffle(dataset_balanced, random_state=42).reset_index(drop=True)


In [21]:
dataset_balanced1.shape

(448, 7)

In [22]:

dataset_balanced2=pd.get_dummies(data=dataset_balanced1,
columns=['Type'],
drop_first=True)

In [23]:
dataset_balanced2.head()

Unnamed: 0,Air_temperature,Process_temperature,Rotational_speed,Torque,Tool_wear,Failure_Type,Type_L,Type_M
0,302.4,310.2,1351,45.1,168,Heat Dissipation Failure,True,False
1,303.0,311.3,1335,53.6,164,Heat Dissipation Failure,False,True
2,297.9,309.8,1336,71.6,31,Power Failure,True,False
3,302.7,312.4,1477,46.3,251,Overstrain Failure,True,False
4,297.8,307.4,1902,24.3,129,No Failure,False,True


In [24]:
#missing value checking
dataset_balanced2.isnull().sum()

Unnamed: 0,0
Air_temperature,0
Process_temperature,0
Rotational_speed,0
Torque,0
Tool_wear,0
Failure_Type,0
Type_L,0
Type_M,0


In [25]:
dataset_balanced2.shape

(448, 8)

In [68]:
dataset_balanced2['Failure_Type'].value_counts()

Unnamed: 0_level_0,count
Failure_Type,Unnamed: 1_level_1
0,112
1,100
3,95
2,78
5,45
4,18


In [26]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(dataset_balanced2["Failure_Type"])



In [27]:
dataset_balanced2["Failure_Type"] = encoder.transform(dataset_balanced2["Failure_Type"])


In [28]:
dataset_balanced2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 448 entries, 0 to 447
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Air_temperature      448 non-null    float64
 1   Process_temperature  448 non-null    float64
 2   Rotational_speed     448 non-null    int64  
 3   Torque               448 non-null    float64
 4   Tool_wear            448 non-null    int64  
 5   Failure_Type         448 non-null    int64  
 6   Type_L               448 non-null    bool   
 7   Type_M               448 non-null    bool   
dtypes: bool(2), float64(3), int64(3)
memory usage: 22.0 KB


In [29]:
dataset_balanced2["Failure_Type"].unique()

array([0, 3, 2, 1, 5, 4])

In [30]:
X = dataset_balanced2.drop("Failure_Type",axis=1)


In [31]:
y = dataset_balanced2.loc[:, "Failure_Type"]

In [32]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [33]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()



In [34]:
x_train = sc.fit_transform(x_train)



In [35]:
x_test = sc.transform(x_test)

In [36]:
import numpy as np

np.unique(y_train)

array([0, 1, 2, 3, 4, 5])

In [39]:
x_train.shape[1]

7

In [40]:
pd.DataFrame(x_train)

Unnamed: 0,0,1,2,3,4,5,6
0,-0.666206,-0.821397,0.135212,-0.452870,-0.234565,-1.408309,-0.588103
1,0.957880,0.181977,-0.170917,-0.440061,1.047163,0.710072,-0.588103
2,1.626621,2.260393,0.037020,-0.343999,-0.445839,-1.408309,1.700384
3,-0.188534,-0.248041,-0.289326,0.277203,0.962653,-1.408309,-0.588103
4,0.098070,0.325316,-0.269110,0.623027,-1.699397,0.710072,-0.588103
...,...,...,...,...,...,...,...
353,1.101181,0.683663,-0.416398,-0.010984,-0.431754,0.710072,-0.588103
354,0.002535,0.253646,0.192973,-0.593761,-1.220510,-1.408309,1.700384
355,-0.092999,1.113681,-0.396182,0.648643,0.765464,0.710072,-0.588103
356,-0.761741,-0.033032,-0.543471,0.450115,1.328861,0.710072,-0.588103


In [41]:
import tensorflow as tf

In [65]:
classifier = Sequential()

In [45]:
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense

from tensorflow.keras import backend as K

In [47]:
classifier = Sequential()

In [48]:
classifier.add(Dense(units=4,
kernel_initializer='uniform', activation='relu',
input_dim=7)) ####following a principle ind+dep /2

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [49]:
classifier.add(Dense(units=4,
kernel_initializer='uniform', activation='relu'))####2nd hidden layer

In [50]:
classifier.add(Dense(units=6,
kernel_initializer='uniform', activation='softmax'))#output layer

In [51]:
classifier.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])  ##sparse since label encoded

In [61]:
classifier.fit(x_train, y_train, batch_size=10,
epochs=120, validation_split=0.1)

Epoch 1/120
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8832 - loss: 0.3143 - val_accuracy: 0.8056 - val_loss: 0.5861
Epoch 2/120
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9092 - loss: 0.2754 - val_accuracy: 0.8333 - val_loss: 0.5703
Epoch 3/120
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8948 - loss: 0.3309 - val_accuracy: 0.8056 - val_loss: 0.5731
Epoch 4/120
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8723 - loss: 0.3305 - val_accuracy: 0.8333 - val_loss: 0.5569
Epoch 5/120
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8594 - loss: 0.3460 - val_accuracy: 0.8333 - val_loss: 0.5576
Epoch 6/120
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9081 - loss: 0.2878 - val_accuracy: 0.8333 - val_loss: 0.5686
Epoch 7/120
[1m33/33[0m [32m━━━

<keras.src.callbacks.history.History at 0x7f8f64285410>

In [63]:


from sklearn.metrics import confusion_matrix
y_pred = classifier.predict(x_test)
y_test_class = y_test
y_pred_class = np.argmax(y_pred, axis=1)
confusion_matrix(y_test_class, y_pred_class)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


array([[17,  0,  0,  0,  0,  0],
       [ 0, 23,  0,  0,  0,  2],
       [ 1,  0, 16,  1,  0,  0],
       [ 1,  0,  0, 17,  0,  0],
       [ 0,  3,  0,  0,  1,  1],
       [ 0,  0,  1,  0,  0,  6]])

In [64]:

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(confusion_matrix(y_test_class, y_pred_class))
print(classification_report(y_test_class, y_pred_class))
print(accuracy_score(y_test_class, y_pred_class))


[[17  0  0  0  0  0]
 [ 0 23  0  0  0  2]
 [ 1  0 16  1  0  0]
 [ 1  0  0 17  0  0]
 [ 0  3  0  0  1  1]
 [ 0  0  1  0  0  6]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        17
           1       0.88      0.92      0.90        25
           2       0.94      0.89      0.91        18
           3       0.94      0.94      0.94        18
           4       1.00      0.20      0.33         5
           5       0.67      0.86      0.75         7

    accuracy                           0.89        90
   macro avg       0.89      0.80      0.80        90
weighted avg       0.90      0.89      0.88        90

0.8888888888888888
