In [2]:
import numpy as np
from sklearn.utils import shuffle
import pandas as pd
from sklearn import preprocessing

In [3]:
raw_data = pd.read_csv('predictive_maintenance.csv')
raw_data

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure
...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,M,298.8,308.4,1604,29.5,14,0,No Failure
9996,9997,H39410,H,298.9,308.4,1632,31.8,17,0,No Failure
9997,9998,M24857,M,299.0,308.6,1645,33.4,22,0,No Failure
9998,9999,H39412,H,299.0,308.7,1408,48.5,25,0,No Failure


In [None]:
data = raw_data.copy()
data = shuffle(data, random_state=20)

Failure Type
0    9652
2     112
3      95
1      78
4      45
5      18
Name: count, dtype: int64

In [33]:
data_mapping = data.copy()
data_mapping['Failure Type'] = data['Failure Type'].map({'No Failure':0, 'Overstrain Failure':1, 'Heat Dissipation Failure':2, 'Power Failure':3, 'Tool Wear Failure':4, 'Random Failures':5})
data_mapping = data_mapping.drop(['UDI', 'Product ID', 'Target'], axis=1)
data_mapping = pd.get_dummies(data_mapping, columns=['Type'])
data_mapping = data_mapping.reset_index(drop=True)
data_mapping

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Failure Type,Type_H,Type_L,Type_M
0,298.1,307.8,1606,33.0,130,0,False,True,False
1,297.9,307.3,1502,36.7,12,0,False,True,False
2,299.4,309.0,1381,44.9,88,0,False,False,True
3,298.3,308.4,1408,43.1,177,0,False,True,False
4,300.2,309.6,1556,35.8,36,0,False,False,True
...,...,...,...,...,...,...,...,...,...
9995,302.5,311.5,1700,29.5,133,0,False,True,False
9996,298.9,310.0,1470,45.9,109,0,False,True,False
9997,300.8,310.7,1461,42.8,173,0,False,True,False
9998,299.7,310.6,1675,31.3,129,0,False,True,False


In [38]:
column_to_scale = ['Air temperature [K]', 'Process temperature [K]',
       'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

scale_array = preprocessing.scale(data_mapping[column_to_scale])
data_mapping_scaled = pd.DataFrame(scale_array, columns=column_to_scale)
data_mapping_scaled_input = pd.concat([data_mapping[['Type_H', 'Type_L', 'Type_M']] , data_mapping_scaled], axis=1)
data_mapping_scaled_input

Unnamed: 0,Type_H,Type_L,Type_M,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
0,False,True,False,-0.952389,-1.486567,0.374976,-0.700903,0.346405
1,False,True,False,-1.052382,-1.823571,-0.205138,-0.329732,-1.507456
2,False,False,True,-0.302441,-0.677756,-0.880078,0.492865,-0.313444
3,False,True,False,-0.852397,-1.082162,-0.729472,0.312295,1.084807
4,False,False,True,0.097527,-0.273351,0.096075,-0.420017,-1.130399
...,...,...,...,...,...,...,...,...
9995,False,True,False,1.247436,1.007266,0.899310,-1.052012,0.393537
9996,False,True,False,-0.552421,-0.003747,-0.383634,0.593181,0.016481
9997,False,True,False,0.397503,0.468059,-0.433837,0.282200,1.021964
9998,False,True,False,-0.152453,0.400658,0.759859,-0.871442,0.330694


In [43]:
data_target2 = data_mapping['Failure Type']
data_target2 = pd.DataFrame(data_target2, columns=['Failure Type'])
data_target2['Failure Type'].unique()

array([0, 1, 2, 3, 4, 5], dtype=int64)

In [45]:
n = data_mapping.shape[0]
train_size = int(0.8*n)
validation_size = int(0.1*n)
test_size = n - train_size - validation_size

train_input = data_mapping_scaled_input[:train_size]
validation_input = data_mapping_scaled_input[train_size:validation_size + train_size]
test_input = data_mapping_scaled_input[validation_size + train_size:]

train_target = data_target2[:train_size]
validation_target = data_target2[train_size:validation_size + train_size]
test_target = data_target2[validation_size + train_size:]


In [48]:
train_target

Unnamed: 0,Failure Type
0,0
1,0
2,0
3,0
4,0
...,...
7995,0
7996,0
7997,0
7998,0


In [46]:
np.savez('predictive_train2.npz', inputs=train_input, targets=train_target)
np.savez('predictive_validation2.npz', inputs=validation_input, targets=validation_target)
np.savez('predictive_test2.npz', inputs=test_input, targets=test_target)