In [None]:
import pandas as pd
from numpy import NaN

In [None]:
# Load train dataset

fd_train = pd.read_csv('../datasets/train_FD001.txt', sep=" ", header=None)

In [None]:
# Load test dataset

fd_test = pd.read_csv('../datasets/test_FD001.txt', sep=" ", header=None)

In [None]:
# Change columns names

names_col=['unit_number', 'time_cycles', 'setting_1', 'setting_2','setting_3']
for i in range (len(fd_train.columns)-5):
  names_col.append('s_' + str(i+1))

fd_train.columns = names_col
fd_test.columns = names_col

In [None]:
# Remove settings columns, because it is redundant information for model

fd_train.drop(columns = ['setting_1','setting_2','setting_3'], inplace = True)
fd_test.drop(columns = ['setting_1','setting_2','setting_3'], inplace = True)

In [None]:
# Search and delete constant features

fd_descr = fd_train.loc[:,'s_1':].describe().transpose()

zer_std = []

for i, row in fd_descr.iterrows():
  if fd_descr['std'][i] < 1e-10:
    zer_std.append(i)
  elif fd_descr['count'][i] == 0:
    zer_std.append(i)

fd_train.drop(columns = zer_std, inplace = True)
fd_test.drop(columns = zer_std, inplace = True)

In [None]:
# Create Remaining Useful Life column for train dataset

fd_train_cycles = fd_train[["unit_number","time_cycles"]]
max_cycles = fd_train_cycles.groupby('unit_number').max()
max_cycles['unit_number'] = max_cycles.index
fd_train['RUL'] = fd_train.groupby('unit_number')['time_cycles'].transform(max)

for i in range(0,len(fd_train)):
  if fd_train['RUL'].loc[i] - fd_train['time_cycles'].loc[i] > 124:
    fd_train['RUL'].loc[i] = 125
  else:
    fd_train['RUL'].loc[i] = fd_train['RUL'].loc[i] - fd_train['time_cycles'].loc[i]

In [None]:
# # Create Remaining Useful Life column for test dataset

with open('../datasets/RUL_FD001.txt', 'r') as f:
    true_ruls = [int(row.strip()) for row in f]

ruls = list()

for machine_id in fd_test['unit_number'].unique():
    length = fd_test[fd_test['unit_number'] == machine_id].shape[0]
    rul = true_ruls[machine_id-1]
    ruls += sorted([rul + inc for inc in range(length)], reverse=True)

fd_test['RUL'] = ruls

In [None]:
# Save clean datasets

fd_train.to_csv(r'../datasets/clean_train_data.csv', index = False, header = True)
fd_test.to_csv(r'../datasets/clean_test_data.csv', index = False, header = True)