In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 

In [2]:
train_df = pd.read_csv('train_df.csv')

In [3]:
train_df.head()
train_df.shape

(20631, 20)

In [6]:
Cols = ['Unit','Cycle','Setting1','Setting2','Setting3',
           'Fan °R1', 'LPC °R2', 'HPC °R3','LPT °R4',
           'Fan Psia5', 'Bypass-duct psia6','HPC psia7',
           'Fan rpm8','Core rpm9', 'Engin P ratio10', 'Hpc psia11',
           'Ratio of fuel12','Corrected fan rpm13', 'Corrected core rpm14',
           'Bypass ratio15', 'Burner fuel-air ratio16','BE17','Demanded fan speed18',
           'Demanded corrected fan speed19','HPT coolant bleed lbm/s20','LPT coolant bleed lbm/s21','s22','s23']

In [7]:
test_df = pd.read_csv('PM_test.txt',sep = ' ',header = None)

In [8]:
test_df.columns = Cols
test_df.head()
test_df.shape

(13096, 28)

In [7]:
# Load truth rul data
df_truth = pd.read_csv('PM_truth.txt', header = None)
df_truth = df_truth.reset_index()
df_truth.columns = ['Unit','True max cycle']
df_truth['Unit'] = df_truth['Unit'].apply(lambda x: x+1)
df_truth

Unnamed: 0,Unit,True max cycle
0,1,112
1,2,98
2,3,69
3,4,82
4,5,91
...,...,...
95,96,137
96,97,82
97,98,59
98,99,117


In [8]:
# retrieve the max cycles per engine: RUL
test_rul = pd.DataFrame(test_df.groupby('Unit')['Cycle'].max()).reset_index()

# merge the RULs into the training data
test_rul.columns = ['Unit','Max']
test_rul = test_rul.merge(df_truth, on = 'Unit')
test_rul['RUL'] = test_rul['Max'] + test_rul['True max cycle']

test_df = test_df.merge(test_rul, on =['Unit'], how = 'left')
test_df

Unnamed: 0,Unit,Cycle,Setting1,Setting2,Setting3,Fan °R1,LPC °R2,HPC °R3,LPT °R4,Fan Psia5,...,BE17,Demanded fan speed18,Demanded corrected fan speed19,HPT coolant bleed lbm/s20,LPT coolant bleed lbm/s21,s22,s23,Max,True max cycle,RUL
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,392,2388,100.0,38.86,23.3735,,,31,112,143
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,393,2388,100.0,39.02,23.3916,,,31,112,143
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,393,2388,100.0,39.08,23.4166,,,31,112,143
3,1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,...,391,2388,100.0,39.00,23.3737,,,31,112,143
4,1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,...,390,2388,100.0,38.99,23.4130,,,31,112,143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,100.0,518.67,643.24,1599.45,1415.79,14.62,...,394,2388,100.0,38.65,23.1974,,,198,20,218
13092,100,195,-0.0011,-0.0001,100.0,518.67,643.22,1595.69,1422.05,14.62,...,395,2388,100.0,38.57,23.2771,,,198,20,218
13093,100,196,-0.0006,-0.0003,100.0,518.67,643.44,1593.15,1406.82,14.62,...,395,2388,100.0,38.62,23.2051,,,198,20,218
13094,100,197,-0.0038,0.0001,100.0,518.67,643.26,1594.99,1419.36,14.62,...,395,2388,100.0,38.66,23.2699,,,198,20,218


In [9]:
test_df['RUL'] = test_df['RUL'] - test_df['Cycle']
test_df.drop(['Max','True max cycle'], axis = 1, inplace = True)
test_df

Unnamed: 0,Unit,Cycle,Setting1,Setting2,Setting3,Fan °R1,LPC °R2,HPC °R3,LPT °R4,Fan Psia5,...,Bypass ratio15,Burner fuel-air ratio16,BE17,Demanded fan speed18,Demanded corrected fan speed19,HPT coolant bleed lbm/s20,LPT coolant bleed lbm/s21,s22,s23,RUL
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,8.4052,0.03,392,2388,100.0,38.86,23.3735,,,142
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,8.3803,0.03,393,2388,100.0,39.02,23.3916,,,141
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,8.4441,0.03,393,2388,100.0,39.08,23.4166,,,140
3,1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,...,8.3917,0.03,391,2388,100.0,39.00,23.3737,,,139
4,1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,...,8.4031,0.03,390,2388,100.0,38.99,23.4130,,,138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,100.0,518.67,643.24,1599.45,1415.79,14.62,...,8.4715,0.03,394,2388,100.0,38.65,23.1974,,,24
13092,100,195,-0.0011,-0.0001,100.0,518.67,643.22,1595.69,1422.05,14.62,...,8.4512,0.03,395,2388,100.0,38.57,23.2771,,,23
13093,100,196,-0.0006,-0.0003,100.0,518.67,643.44,1593.15,1406.82,14.62,...,8.4569,0.03,395,2388,100.0,38.62,23.2051,,,22
13094,100,197,-0.0038,0.0001,100.0,518.67,643.26,1594.99,1419.36,14.62,...,8.4711,0.03,395,2388,100.0,38.66,23.2699,,,21


In [10]:
# # add on True rul
# test_df = test_df.merge(df_truth, on = ['Unit'], how = 'left', suffixes=('', '_y'))
# test_df['True RUL'] = test_df['True max cycle'] - test_df['Cycle']
# test_df.drop(['True max cycle'], axis = 1, inplace = True)
# test_df

In [11]:
col_to_drop = ['Setting3','Fan °R1','Fan Psia5','Engin P ratio10','Burner fuel-air ratio16',
               'Demanded fan speed18','Demanded corrected fan speed19','s22','s23']

In [12]:
# train_df.drop(col_to_drop, axis = 1, inplace = True)

In [13]:
test_df.drop(col_to_drop, axis = 1, inplace = True)

In [14]:
test_df

Unnamed: 0,Unit,Cycle,Setting1,Setting2,LPC °R2,HPC °R3,LPT °R4,Bypass-duct psia6,HPC psia7,Fan rpm8,Core rpm9,Hpc psia11,Ratio of fuel12,Corrected fan rpm13,Corrected core rpm14,Bypass ratio15,BE17,HPT coolant bleed lbm/s20,LPT coolant bleed lbm/s21,RUL
0,1,1,0.0023,0.0003,643.02,1585.29,1398.21,21.61,553.90,2388.04,9050.17,47.20,521.72,2388.03,8125.55,8.4052,392,38.86,23.3735,142
1,1,2,-0.0027,-0.0003,641.71,1588.45,1395.42,21.61,554.85,2388.01,9054.42,47.50,522.16,2388.06,8139.62,8.3803,393,39.02,23.3916,141
2,1,3,0.0003,0.0001,642.46,1586.94,1401.34,21.61,554.11,2388.05,9056.96,47.50,521.97,2388.03,8130.10,8.4441,393,39.08,23.4166,140
3,1,4,0.0042,0.0000,642.44,1584.12,1406.42,21.61,554.07,2388.03,9045.29,47.28,521.38,2388.05,8132.90,8.3917,391,39.00,23.3737,139
4,1,5,0.0014,0.0000,642.51,1587.19,1401.92,21.61,554.16,2388.01,9044.55,47.31,522.15,2388.03,8129.54,8.4031,390,38.99,23.4130,138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,643.24,1599.45,1415.79,21.61,553.41,2388.02,9142.37,47.69,520.69,2388.00,8213.28,8.4715,394,38.65,23.1974,24
13092,100,195,-0.0011,-0.0001,643.22,1595.69,1422.05,21.61,553.22,2388.05,9140.68,47.60,521.05,2388.09,8210.85,8.4512,395,38.57,23.2771,23
13093,100,196,-0.0006,-0.0003,643.44,1593.15,1406.82,21.61,553.04,2388.11,9146.81,47.57,521.18,2388.04,8217.24,8.4569,395,38.62,23.2051,22
13094,100,197,-0.0038,0.0001,643.26,1594.99,1419.36,21.61,553.37,2388.07,9148.85,47.61,521.33,2388.08,8220.48,8.4711,395,38.66,23.2699,21


In [15]:
train_df

Unnamed: 0,Unit,Cycle,Setting1,Setting2,Setting3,Fan °R1,LPC °R2,HPC °R3,LPT °R4,Fan Psia5,...,Corrected fan rpm13,Corrected core rpm14,Bypass ratio15,Burner fuel-air ratio16,BE17,Demanded fan speed18,Demanded corrected fan speed19,HPT coolant bleed lbm/s20,LPT coolant bleed lbm/s21,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189
3,1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,100.0,518.67,643.49,1597.98,1428.63,14.62,...,2388.26,8137.60,8.4956,0.03,397,2388,100.0,38.49,22.9735,4
20627,100,197,-0.0016,-0.0005,100.0,518.67,643.54,1604.50,1433.58,14.62,...,2388.22,8136.50,8.5139,0.03,395,2388,100.0,38.30,23.1594,3
20628,100,198,0.0004,0.0000,100.0,518.67,643.42,1602.46,1428.18,14.62,...,2388.24,8141.05,8.5646,0.03,398,2388,100.0,38.44,22.9333,2
20629,100,199,-0.0011,0.0003,100.0,518.67,643.23,1605.26,1426.53,14.62,...,2388.23,8139.29,8.5389,0.03,395,2388,100.0,38.29,23.0640,1


In [16]:
col_to_drop_train = ['Setting3','Fan °R1','Fan Psia5','Engin P ratio10','Burner fuel-air ratio16',
               'Demanded fan speed18','Demanded corrected fan speed19']

In [17]:
train_df.drop(col_to_drop_train, axis = 1, inplace = True)

In [18]:
train_df.to_csv('train_df.csv',index = None)
test_df.to_csv('test_df.csv',index = None)