In [92]:
import pandas as pd

In [93]:
raw_data = pd.read_csv('predictive_maintenance.csv')
raw_data

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure
...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,M,298.8,308.4,1604,29.5,14,0,No Failure
9996,9997,H39410,H,298.9,308.4,1632,31.8,17,0,No Failure
9997,9998,M24857,M,299.0,308.6,1645,33.4,22,0,No Failure
9998,9999,H39412,H,299.0,308.7,1408,48.5,25,0,No Failure


In [94]:
from sklearn.utils import shuffle
data = raw_data.copy()
data = shuffle(data, random_state=45)
data = data.reset_index(drop=True)
data.columns.values

array(['UDI', 'Product ID', 'Type', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]', 'Target', 'Failure Type'], dtype=object)

In [95]:
from sklearn.preprocessing import StandardScaler
column_to_scal = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']
scaler = StandardScaler()
scaled = scaler.fit_transform(data[column_to_scal])
scaled = pd.DataFrame(scaled, columns = column_to_scal)
data_scled = pd.concat([data[['Type']], scaled, data[['Target', 'Failure Type']].reset_index(drop=True)], axis=1)
data_scled

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,M,0.897464,0.198455,-0.723893,1.255271,-1.287506,0,No Failure
1,L,0.697480,0.535460,-0.835454,1.275335,0.267851,0,No Failure
2,L,0.197519,0.872464,0.559051,-0.540397,-1.318927,0,No Failure
3,L,-1.202370,-0.340752,2.578293,-1.954861,1.760366,0,No Failure
4,L,0.147523,0.131054,-0.947014,0.884100,-1.130399,0,No Failure
...,...,...,...,...,...,...,...,...
9995,L,-1.252366,-0.947360,0.458646,-1.112202,-1.695984,0,No Failure
9996,M,-0.802401,-1.216963,-0.394790,0.382516,-1.664562,0,No Failure
9997,H,0.397503,0.602860,-0.634645,0.402580,-1.177531,0,No Failure
9998,M,0.747476,0.400658,-0.511929,0.191915,0.503512,0,No Failure


In [96]:
data_scled_dummies = pd.get_dummies(data_scled, columns=['Type'])
data_scled_dummies = data_scled_dummies[['Type_H', 'Type_L', 'Type_M', 'Air temperature [K]', 'Process temperature [K]',
       'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]',
       'Target', 'Failure Type']]
data_scled_dummies

Unnamed: 0,Type_H,Type_L,Type_M,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,False,False,True,0.897464,0.198455,-0.723893,1.255271,-1.287506,0,No Failure
1,False,True,False,0.697480,0.535460,-0.835454,1.275335,0.267851,0,No Failure
2,False,True,False,0.197519,0.872464,0.559051,-0.540397,-1.318927,0,No Failure
3,False,True,False,-1.202370,-0.340752,2.578293,-1.954861,1.760366,0,No Failure
4,False,True,False,0.147523,0.131054,-0.947014,0.884100,-1.130399,0,No Failure
...,...,...,...,...,...,...,...,...,...,...
9995,False,True,False,-1.252366,-0.947360,0.458646,-1.112202,-1.695984,0,No Failure
9996,False,False,True,-0.802401,-1.216963,-0.394790,0.382516,-1.664562,0,No Failure
9997,True,False,False,0.397503,0.602860,-0.634645,0.402580,-1.177531,0,No Failure
9998,False,False,True,0.747476,0.400658,-0.511929,0.191915,0.503512,0,No Failure


In [97]:

data_scled_dummies['Failure Type'] = data_scled_dummies['Failure Type'].map(
    {'No Failure':0,
     'Power Failure':1,
     'Heat Dissipation Failure':2, 
     'Overstrain Failure':3,
     'Tool Wear Failure':4,
     'Random Failures':5})
data_scled_dummies['Failure Type'].unique()

array([0, 1, 2, 3, 4, 5], dtype=int64)

In [98]:
data_scled_dummies.columns.unique()

Index(['Type_H', 'Type_L', 'Type_M', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]', 'Target', 'Failure Type'],
      dtype='object')

In [99]:
data_input = data_scled_dummies[['Type_H', 'Type_L', 'Type_M', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]']]

data_target1 = data_scled_dummies['Target']
data_target2 = data_scled_dummies['Failure Type']

In [100]:
n = len(data_scled_dummies)
train_size = int(0.8*n)
validation_size = int(0.1*n)
test_size = n - train_size - validation_size

train_input = data_input[:train_size]
validation_input = data_input[train_size: train_size + validation_size]
test_input = data_input[train_size + validation_size:]

train_target1 = data_target1[:train_size]
validation_target1 = data_target1[train_size: train_size + validation_size]
test_target1 = data_target1[train_size + validation_size:]

train_target2 = data_target2[:train_size]
validation_target2 = data_target2[train_size: train_size + validation_size]
test_target2 = data_target2[train_size + validation_size:]

In [101]:
mask = data_target2 != 0

train_input_b = train_input[mask[:train_size]]
train_target_b = train_target2[mask[:train_size]]

validation_input_b = validation_input[mask[train_size:train_size + validation_size]]
validation_target_b = validation_target2[mask[train_size:train_size + validation_size]]

test_input_b = test_input[mask[train_size + validation_size:]]
test_target_b = test_target2[mask[train_size + validation_size:]]

In [102]:
train_input_b

Unnamed: 0,Type_H,Type_L,Type_M,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
31,False,False,True,0.747476,0.265856,6.137069,-2.807553,-1.366059
78,False,True,False,1.797392,1.209468,-1.766983,2.689799,0.629197
84,False,True,False,1.347428,0.333257,-1.538284,2.790116,1.399020
121,False,True,False,1.197440,0.198455,-0.919124,1.124860,0.864857
144,False,True,False,1.647404,1.074667,-1.326319,0.512928,1.744655
...,...,...,...,...,...,...,...,...
7781,False,True,False,-1.252366,-1.149563,5.718718,-2.727300,0.299273
7791,False,True,False,-0.202449,0.602860,-1.727937,2.529293,1.587549
7955,False,True,False,-0.102457,-0.475554,-1.337476,2.519261,1.634681
7957,False,True,False,-0.502425,0.265856,-0.969326,0.914195,1.854630


In [103]:
import numpy as np

np.savez('train1.npz', inputs = train_input, targets = train_target1)
np.savez('validation1.npz', inputs = validation_input, targets = validation_target1)
np.savez('test1.npz', inputs = test_input, targets = test_target1)

np.savez('train2.npz', inputs = train_input_b, targets = train_target_b)
np.savez('validation2.npz', inputs = validation_input_b, targets = validation_target_b)
np.savez('test2.npz', inputs = test_input_b, targets = test_target_b)

In [118]:
data_target_fail = data_scled_dummies['Failure Type']
data_target_fail = data_target_fail[train_size + validation_size:]
data_target_fail = pd.DataFrame(data_target_fail, columns=['Failure Type'])

data_target_fail['Failure Type'] = data_target_fail['Failure Type'].map({
     0:'No Failure',
     1:'Power Failure',
     2:'Heat Dissipation Failure', 
     3:'Overstrain Failure',
     4:'Tool Wear Failure',
     5:'Random Failures'
})

data_target_fail.to_csv('true_traget_failure.csv', index=False)