## Import Libraries

In [83]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings 
import os
warnings.filterwarnings('ignore')
from sklearn.preprocessing import MinMaxScaler

In [84]:
CWD = os.getcwd()
#Importing both test and train datasets

train_df = pd.read_csv(os.path.join(CWD,"train_FD001.txt"), sep=' ', header=None)
test_df = pd.read_csv(os.path.join(CWD,"test_FD001.txt"), sep=' ', header=None)
truth_df = pd.read_csv(os.path.join(CWD,"RUL_FD001.txt"),sep=' ', header=None)
truth_df.drop(truth_df.columns[[1]], axis=1, inplace=True)

# truth_df.head()

train_df.dropna(axis=1, how='all',inplace=True)
test_df.dropna(axis=1, how='all',inplace=True)

column_names = ["unit", "cycle", "setting1","setting2", "setting3"]
for i in range(1,22):
    column_names.append(f's{i}')

train_df.columns = column_names
test_df.columns = column_names

train_df.shape

(20631, 26)

In [85]:
test_df.shape

(13096, 26)

## Scale DataFrames with MinMax Scaler

In [86]:
to_exclude = ['unit','cycle']
cols_normalize = [col for col in column_names if col not in to_exclude]

def scaleDF(df_in):
    df = df_in.copy()
    scaler = MinMaxScaler(feature_range = (-1,1))
    norm_df = pd.DataFrame(scaler.fit_transform(df[cols_normalize]), 
                    columns=cols_normalize,index=df.index)
    join_df = df[df.columns.difference(cols_normalize)].join(norm_df)
    df = join_df.reindex(columns = df.columns)
    return df

In [87]:
train_df = scaleDF(train_df)
test_df = scaleDF(test_df)

In [88]:
train_df.shape

(20631, 26)

In [89]:
test_df.shape

(13096, 26)

## Add RUL Column to the training set

In [90]:
# Data Labeling - generate column RUL
rul = pd.DataFrame(train_df.groupby('unit')['cycle'].max()).reset_index()
rul.columns = ['unit', 'max']
train_df = train_df.merge(rul, on=['unit'], how='left')
train_df['RUL'] = train_df['max'] - train_df['cycle']
train_df.drop('max', axis=1, inplace=True)
train_df.head()

Unnamed: 0,unit,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s13,s14,s15,s16,s17,s18,s19,s20,s21,RUL
0,1,1,-0.08046,-0.666667,-1.0,-1.0,-0.63253,-0.186396,-0.380486,-1.0,...,-0.588235,-0.600784,-0.272028,-1.0,-0.333333,-1.0,-1.0,0.426357,0.449323,191
1,1,2,0.218391,-0.5,-1.0,-1.0,-0.433735,-0.093961,-0.294733,-1.0,...,-0.441176,-0.674373,-0.177376,-1.0,-0.333333,-1.0,-1.0,0.333333,0.462027,190
2,1,3,-0.494253,0.5,-1.0,-1.0,-0.313253,-0.260955,-0.258947,-1.0,...,-0.558824,-0.656414,-0.28511,-1.0,-0.666667,-1.0,-1.0,0.255814,0.242751,189
3,1,4,0.08046,0.0,-1.0,-1.0,-0.313253,-0.487683,-0.33761,-1.0,...,-0.411765,-0.650222,-0.666795,-1.0,-0.333333,-1.0,-1.0,0.147287,0.324772,188
4,1,5,-0.218391,-0.333333,-1.0,-1.0,-0.301205,-0.485066,-0.190749,-1.0,...,-0.529412,-0.650532,-0.195845,-1.0,-0.166667,-1.0,-1.0,0.178295,0.409003,187


## Add RUL Column to the test set

In [91]:
# generate column max for test data
rul = pd.DataFrame(test_df.groupby('unit')['cycle'].max()).reset_index()
rul.columns = ['unit', 'max']

truth_df.columns = ['more']
truth_df['unit'] = truth_df.index + 1
truth_df['max'] = rul['max'] + truth_df['more']
truth_df.drop('more', axis=1, inplace=True)

# generate RUL for test data
test_df = test_df.merge(truth_df, on=['unit'], how='left')
test_df['RUL'] = test_df['max'] - test_df['cycle']
test_df.drop('max', axis=1, inplace=True)
test_df.head()

Unnamed: 0,unit,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s13,s14,s15,s16,s17,s18,s19,s20,s21,RUL
0,1,1,0.3125,0.384615,-1.0,-1.0,0.192429,-0.156063,-0.435573,-1.0,...,-0.348837,-0.695481,-0.305849,-1.0,-0.25,-1.0,-1.0,0.0,0.240198,142
1,1,2,-0.3125,-0.538462,-1.0,-1.0,-0.634069,0.00805,-0.54952,-1.0,...,-0.209302,-0.444186,-0.544583,-1.0,0.0,-1.0,-1.0,0.290909,0.291437,141
2,1,3,0.0625,0.076923,-1.0,-1.0,-0.160883,-0.070371,-0.307739,-1.0,...,-0.348837,-0.614217,0.067114,-1.0,0.0,-1.0,-1.0,0.4,0.362208,140
3,1,4,0.55,-0.076923,-1.0,-1.0,-0.173502,-0.216827,-0.100265,-1.0,...,-0.255814,-0.564208,-0.435283,-1.0,-0.5,-1.0,-1.0,0.254545,0.240764,139
4,1,5,0.2,-0.076923,-1.0,-1.0,-0.129338,-0.057388,-0.284051,-1.0,...,-0.348837,-0.624219,-0.325983,-1.0,-0.75,-1.0,-1.0,0.236364,0.352017,138


In [93]:
train_df.to_csv(os.path.join(CWD,"train_data.csv"),index=False)
test_df.to_csv(os.path.join(CWD,"test_data.csv"),index=False)