# **Import**

In [73]:
import warnings

warnings.filterwarnings('ignore')

In [74]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

# !pip install keras-tcn
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tcn import TCN

from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import mean_absolute_error, mean_squared_error

# **Data Load**

In [75]:
cd /content/drive/MyDrive/[Projects]/Kaggle/NASA 터보팬 제트 엔진의 잔존 수명 예측 및 예지보전 방안/Data

/content/drive/MyDrive/[Projects]/Kaggle/NASA 터보팬 제트 엔진의 잔존 수명 예측 및 예지보전 방안/Data


In [76]:
index_names = ['unit_number', 'time_cycles']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names = [f's_{i + 1}' for i in range(21)]
col_names = index_names + setting_names + sensor_names

train_df = pd.read_csv('./train_FD004.csv')

In [77]:
max_cycle = train_df.groupby('unit_number')['time_cycles'].max().reset_index()
max_cycle.columns = ['unit_number', 'max_cycle']

train_df = train_df.merge(max_cycle, on='unit_number', how='left')

train_df['RUL'] = train_df['max_cycle'] - train_df['time_cycles']

In [78]:
test_df = pd.read_csv('./test_FD004.csv')
rul_df = pd.read_csv('./RUL_FD004.csv')

In [79]:
results = []

using_sensors = [
    's_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_10', 's_11', 's_12',
    's_13', 's_14', 's_15', 's_17', 's_20', 's_21'
]

In [80]:
train_df['setting_1'] = train_df['setting_1'].round(1)
train_df['setting_2'] = train_df['setting_2'].round(1)
train_df['setting_group'] = train_df[['setting_1','setting_2','setting_3']].astype(str).agg('_'.join, axis=1)
train_df = pd.get_dummies(train_df, columns=['setting_group'])
train_df.head()

Unnamed: 0.1,Unnamed: 0,unit_number,time_cycles,setting_1,setting_2,setting_3,s_1,s_2,s_3,s_4,...,s_21,max_cycle,RUL,setting_group_0.0_0.0_100.0,setting_group_10.0_0.2_100.0,setting_group_10.0_0.3_100.0,setting_group_20.0_0.7_100.0,setting_group_25.0_0.6_60.0,setting_group_35.0_0.8_100.0,setting_group_42.0_0.8_100.0
0,0,1,1,42.0,0.8,100.0,445.0,549.68,1343.43,1112.93,...,6.367,321,320,False,False,False,False,False,False,True
1,1,1,2,20.0,0.7,100.0,491.19,606.07,1477.61,1237.5,...,14.6552,321,319,False,False,False,True,False,False,False
2,2,1,3,42.0,0.8,100.0,445.0,548.95,1343.12,1117.05,...,6.4213,321,318,False,False,False,False,False,False,True
3,3,1,4,42.0,0.8,100.0,445.0,548.7,1341.24,1118.03,...,6.4176,321,317,False,False,False,False,False,False,True
4,4,1,5,25.0,0.6,60.0,462.54,536.1,1255.23,1033.59,...,8.6754,321,316,False,False,False,False,True,False,False


In [81]:
setting_group = [col for col in train_df.columns if col.startswith('setting_group')]
setting_group

['setting_group_0.0_0.0_100.0',
 'setting_group_10.0_0.2_100.0',
 'setting_group_10.0_0.3_100.0',
 'setting_group_20.0_0.7_100.0',
 'setting_group_25.0_0.6_60.0',
 'setting_group_35.0_0.8_100.0',
 'setting_group_42.0_0.8_100.0']

In [82]:
test_df['setting_1'] = test_df['setting_1'].round(1)
test_df['setting_2'] = test_df['setting_2'].round(1)
test_df['setting_group'] = test_df[['setting_1','setting_2','setting_3']].astype(str).agg('_'.join, axis=1)
test_df = pd.get_dummies(test_df, columns=['setting_group'])
test_df.head()

Unnamed: 0.1,Unnamed: 0,unit_number,time_cycles,setting_1,setting_2,setting_3,s_1,s_2,s_3,s_4,...,s_19,s_20,s_21,setting_group_0.0_0.0_100.0,setting_group_10.0_0.2_100.0,setting_group_10.0_0.3_100.0,setting_group_20.0_0.7_100.0,setting_group_25.0_0.6_60.0,setting_group_35.0_0.8_100.0,setting_group_42.0_0.8_100.0
0,0,1,1,20.0,0.7,100.0,491.19,606.67,1481.04,1227.81,...,100.0,24.31,14.7007,False,False,False,True,False,False,False
1,1,1,2,25.0,0.6,60.0,462.54,536.22,1256.17,1031.48,...,84.93,14.36,8.5748,False,False,False,False,True,False,False
2,2,1,3,42.0,0.8,100.0,445.0,549.23,1340.13,1105.88,...,100.0,10.39,6.4365,False,False,False,False,False,False,True
3,3,1,4,42.0,0.8,100.0,445.0,549.19,1339.7,1107.26,...,100.0,10.56,6.2367,False,False,False,False,False,False,True
4,4,1,5,35.0,0.8,100.0,449.44,555.1,1353.04,1117.8,...,100.0,14.85,8.9326,False,False,False,False,False,True,False


## **Raw Data**

In [83]:
sensor_setting_cols = using_sensors + setting_group

def make_train_sequence_data(df, seq_len):
    x, y = [], []

    for unit in df['unit_number'].unique():
        unit_df = df[df['unit_number'] == unit]
        sensor_values = unit_df[sensor_setting_cols].values
        rul_values = unit_df['RUL'].values

        for i in range(len(unit_df) - seq_len + 1):
            x.append(sensor_values[i:i + seq_len])
            y.append(rul_values[i + seq_len - 1])

    return np.array(x), np.array(y)

def make_test_sequence_data(df, seq_len):
    x = []
    unit_list = df['unit_number'].unique()

    for unit in unit_list:
        unit_df = df[df['unit_number'] == unit]
        values = unit_df[sensor_setting_cols].values

        if len(unit_df) >= seq_len:
            x.append(values[-seq_len:])
        else:
            pad = np.repeat(values[-1:], seq_len - len(unit_df), axis=0)
            x.append(np.vstack([pad, values]))

    return np.array(x)

In [84]:
def build_lstm(seq_len, n_features):
    return Sequential([
        LSTM(64, input_shape=(seq_len, n_features)),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1)
    ])

def build_tcn(seq_len, n_features):
    return Sequential([
        TCN(64, input_shape=(seq_len, n_features)),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1)
    ])

In [85]:
raw_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

for seq_len in [30, 40, 50]:
    for scaler_name, scaler in scaler_dict.items():
        train_x_sensors = train_df[using_sensors].copy()
        train_x_settings = train_df[setting_group].copy()
        test_x_sensors = test_df[using_sensors].copy()
        test_x_settings = test_df[setting_group].copy()

        if scaler:
            scaler.fit(train_x_sensors)

            train_x_sensors = scaler.transform(train_x_sensors)
            test_x_sensors = scaler.transform(test_x_sensors)

        scaled_train_df = pd.DataFrame(
            np.concatenate([train_x_sensors, train_x_settings], axis=1),
            columns=sensor_setting_cols
        )
        scaled_train_df['unit_number'] = train_df['unit_number'].values
        scaled_train_df['RUL'] = train_df['RUL'].values

        scaled_test_df = pd.DataFrame(
            np.concatenate([test_x_sensors, test_x_settings], axis=1),
            columns=sensor_setting_cols
        )
        scaled_test_df['unit_number'] = test_df['unit_number'].values

        train_x, train_y = make_train_sequence_data(scaled_train_df, seq_len)
        test_x = make_test_sequence_data(scaled_test_df, seq_len)

        model_dict = {
            'LSTM': build_lstm(seq_len, len(sensor_setting_cols)),
            'TCN': build_tcn(seq_len, len(sensor_setting_cols))
        }
        es = EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        )
        for model_name, model in model_dict.items():
            model.compile(optimizer='adam', loss='mse', metrics=['mae'])
            model.fit(
                train_x, train_y,
                epochs=500, batch_size=32,
                validation_split=0.2,
                callbacks=[es],
                verbose=0
            )

            pred = model.predict(test_x).ravel()
            true = rul_df['RUL'].copy().values.ravel()

            mae = mean_absolute_error(true, pred)
            rmse = mean_squared_error(true, pred)**0.5

            print(f'Seq: {seq_len}, Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

            raw_results.append({
                'Data': f'Raw - {seq_len}',
                'Model': model_name,
                'Scaler': scaler_name,
                'MAE': mae,
                'RMSE': rmse
            })
results.extend(raw_results)

Epoch 8: early stopping
Restoring model weights from the end of the best epoch: 3.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Seq: 30, Model: LSTM, Scaler: None, MAE: 52.426692962646484, RMSE: 62.401619836372234
Epoch 22: early stopping
Restoring model weights from the end of the best epoch: 17.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 168ms/step
Seq: 30, Model: TCN, Scaler: None, MAE: 52.53707504272461, RMSE: 62.49919335416958
Epoch 20: early stopping
Restoring model weights from the end of the best epoch: 15.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Seq: 30, Model: LSTM, Scaler: MinMaxScaler, MAE: 32.20425033569336, RMSE: 40.16518170367293
Epoch 7: early stopping
Restoring model weights from the end of the best epoch: 2.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 168ms/step
Seq: 30, Model: TCN, Scaler: MinMaxScaler, MAE: 34.374794006347656, RMSE: 42.878950198403736
Epoch 9: early stopp

In [86]:
pd.DataFrame(raw_results).sort_values(by='MAE')

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
22,Raw - 50,LSTM,RobustScaler,24.142551,33.119146
14,Raw - 40,LSTM,RobustScaler,24.755493,33.188186
21,Raw - 50,TCN,StandardScaler,25.279907,34.352029
18,Raw - 50,LSTM,MinMaxScaler,25.457209,33.198853
6,Raw - 30,LSTM,RobustScaler,26.296017,35.020683
20,Raw - 50,LSTM,StandardScaler,26.572498,37.606261
10,Raw - 40,LSTM,MinMaxScaler,27.113085,34.702684
7,Raw - 30,TCN,RobustScaler,28.137636,36.437974
12,Raw - 40,LSTM,StandardScaler,28.471281,38.254514
5,Raw - 30,TCN,StandardScaler,29.112837,37.522501


In [87]:
raw_clipping_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

for seq_len in [30, 40, 50]:
    for scaler_name, scaler in scaler_dict.items():
        train_x_sensors = train_df[using_sensors].copy()
        train_x_settings = train_df[setting_group].copy()
        test_x_sensors = test_df[using_sensors].copy()
        test_x_settings = test_df[setting_group].copy()

        if scaler:
            scaler.fit(train_x_sensors)

            train_x_sensors = scaler.transform(train_x_sensors)
            test_x_sensors = scaler.transform(test_x_sensors)

        scaled_train_df = pd.DataFrame(
            np.concatenate([train_x_sensors, train_x_settings], axis=1),
            columns=sensor_setting_cols
        )
        scaled_train_df['unit_number'] = train_df['unit_number'].values

        train_rul_df = train_df.copy()
        train_rul_df.loc[train_rul_df['RUL'] > 125, 'RUL'] = 125
        scaled_train_df['RUL'] = train_rul_df['RUL'].values

        scaled_test_df = pd.DataFrame(
            np.concatenate([test_x_sensors, test_x_settings], axis=1),
            columns=sensor_setting_cols
        )
        scaled_test_df['unit_number'] = test_df['unit_number'].values

        train_x, train_y = make_train_sequence_data(scaled_train_df, seq_len)
        test_x = make_test_sequence_data(scaled_test_df, seq_len)

        model_dict = {
            'LSTM': build_lstm(seq_len, len(sensor_setting_cols)),
            'TCN': build_tcn(seq_len, len(sensor_setting_cols))
        }
        es = EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        )
        for model_name, model in model_dict.items():
            model.compile(optimizer='adam', loss='mse', metrics=['mae'])
            model.fit(
                train_x, train_y,
                epochs=500, batch_size=32,
                validation_split=0.2,
                callbacks=[es],
                verbose=0
            )

            pred = model.predict(test_x).ravel()
            true = rul_df['RUL'].copy()
            true.loc[true > 125] = 125
            true = true.values.ravel()

            mae = mean_absolute_error(true, pred)
            rmse = mean_squared_error(true, pred)**0.5

            print(f'Seq: {seq_len}, Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

            raw_clipping_results.append({
                'Data': f'Raw (RUL Clipping) - {seq_len}',
                'Model': model_name,
                'Scaler': scaler_name,
                'MAE': mae,
                'RMSE': rmse
            })
results.extend(raw_clipping_results)

Epoch 7: early stopping
Restoring model weights from the end of the best epoch: 2.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Seq: 30, Model: LSTM, Scaler: None, MAE: 37.79010772705078, RMSE: 44.08772272324661
Epoch 8: early stopping
Restoring model weights from the end of the best epoch: 3.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 169ms/step
Seq: 30, Model: TCN, Scaler: None, MAE: 38.39555740356445, RMSE: 42.994547653507034
Epoch 40: early stopping
Restoring model weights from the end of the best epoch: 35.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Seq: 30, Model: LSTM, Scaler: MinMaxScaler, MAE: 12.947137832641602, RMSE: 17.967538172451885
Epoch 11: early stopping
Restoring model weights from the end of the best epoch: 6.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 167ms/step
Seq: 30, Model: TCN, Scaler: MinMaxScaler, MAE: 20.387001037597656, RMSE: 24.50378264206429
Epoch 16: early stopp

In [88]:
pd.DataFrame(raw_clipping_results).sort_values(by='MAE')

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
20,Raw (RUL Clipping) - 50,LSTM,StandardScaler,11.41221,16.287795
18,Raw (RUL Clipping) - 50,LSTM,MinMaxScaler,12.116726,16.991025
12,Raw (RUL Clipping) - 40,LSTM,StandardScaler,12.289542,17.392301
14,Raw (RUL Clipping) - 40,LSTM,RobustScaler,12.512035,17.375655
22,Raw (RUL Clipping) - 50,LSTM,RobustScaler,12.829434,17.426133
2,Raw (RUL Clipping) - 30,LSTM,MinMaxScaler,12.947138,17.967538
10,Raw (RUL Clipping) - 40,LSTM,MinMaxScaler,13.122982,17.295241
4,Raw (RUL Clipping) - 30,LSTM,StandardScaler,14.217325,18.867741
13,Raw (RUL Clipping) - 40,TCN,StandardScaler,14.45837,18.213253
6,Raw (RUL Clipping) - 30,LSTM,RobustScaler,15.010695,19.890608


## **Cumulative Stat**

In [89]:
cum_stat_train_df = pd.read_csv('./cum_stat_train_FD004.csv')
cum_stat_test_df = pd.read_csv('./cum_stat_test_FD004.csv')

In [90]:
stat_cols = [col for col in cum_stat_train_df.columns if col.startswith('s_')]

In [91]:
stat_setting_cols = stat_cols + setting_group

def make_cum_train_sequence_data(df, seq_len):
    x, y = [], []

    for unit in df['unit_number'].unique():
        unit_df = df[df['unit_number'] == unit]
        sensor_values = unit_df[stat_setting_cols].values
        rul_values = unit_df['RUL'].values

        for i in range(len(unit_df) - seq_len + 1):
            x.append(sensor_values[i:i + seq_len])
            y.append(rul_values[i + seq_len - 1])

    return np.array(x), np.array(y)

def make_cum_test_sequence_data(df, seq_len):
    x = []
    unit_list = df['unit_number'].unique()

    for unit in unit_list:
        unit_df = df[df['unit_number'] == unit]
        values = unit_df[stat_setting_cols].values

        if len(unit_df) >= seq_len:
            x.append(values[-seq_len:])
        else:
            pad = np.repeat(values[-1:], seq_len - len(unit_df), axis=0)
            x.append(np.vstack([pad, values]))

    return np.array(x)

In [92]:
cum_stat_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

for seq_len in [30, 40, 50]:
    for scaler_name, scaler in scaler_dict.items():
        train_x_stats = cum_stat_train_df[stat_cols].copy()
        train_x_settings = cum_stat_train_df[setting_group].copy()
        test_x_stats = cum_stat_test_df[stat_cols].copy()
        test_x_settings = cum_stat_test_df[setting_group].copy()

        if scaler:
            scaler.fit(train_x_stats)

            train_x_stats = scaler.transform(train_x_stats)
            test_x_stats = scaler.transform(test_x_stats)

        scaled_train_df = pd.DataFrame(
            np.concatenate([train_x_stats, train_x_settings], axis=1),
            columns=stat_setting_cols
        )
        scaled_train_df['unit_number'] = cum_stat_train_df['unit_number'].values
        scaled_train_df['RUL'] = cum_stat_train_df['RUL'].values

        scaled_test_df = pd.DataFrame(
            np.concatenate([test_x_stats, test_x_settings], axis=1),
            columns=stat_setting_cols
        )
        scaled_test_df['unit_number'] = cum_stat_test_df['unit_number'].values

        train_x, train_y = make_cum_train_sequence_data(scaled_train_df, seq_len)
        test_x = make_cum_test_sequence_data(scaled_test_df, seq_len)

        model_dict = {
            'LSTM': build_lstm(seq_len, len(stat_setting_cols)),
            'TCN': build_tcn(seq_len, len(stat_setting_cols))
        }
        es = EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        )
        for model_name, model in model_dict.items():
            model.compile(optimizer='adam', loss='mse', metrics=['mae'])
            model.fit(
                train_x, train_y,
                epochs=500, batch_size=32,
                validation_split=0.2,
                callbacks=[es],
                verbose=0
            )

            pred = model.predict(test_x).ravel()
            true = rul_df['RUL'].copy().values.ravel()

            mae = mean_absolute_error(true, pred)
            rmse = mean_squared_error(true, pred)**0.5

            print(f'Seq: {seq_len}, Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

            cum_stat_results.append({
                'Data': f'Cumulative Stat - {seq_len}',
                'Model': model_name,
                'Scaler': scaler_name,
                'MAE': mae,
                'RMSE': rmse
            })
results.extend(cum_stat_results)

Epoch 7: early stopping
Restoring model weights from the end of the best epoch: 2.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Seq: 30, Model: LSTM, Scaler: None, MAE: 46.45683288574219, RMSE: 54.53930142925031
Epoch 7: early stopping
Restoring model weights from the end of the best epoch: 2.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 166ms/step
Seq: 30, Model: TCN, Scaler: None, MAE: 45.957359313964844, RMSE: 53.685492268192206
Epoch 26: early stopping
Restoring model weights from the end of the best epoch: 21.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Seq: 30, Model: LSTM, Scaler: MinMaxScaler, MAE: 24.86740493774414, RMSE: 31.476788400467612
Epoch 10: early stopping
Restoring model weights from the end of the best epoch: 5.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 167ms/step
Seq: 30, Model: TCN, Scaler: MinMaxScaler, MAE: 26.1522274017334, RMSE: 34.281146735259235
Epoch 15: early stoppi

In [93]:
pd.DataFrame(cum_stat_results).sort_values(by='MAE')

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
18,Cumulative Stat - 50,LSTM,MinMaxScaler,21.046511,29.671321
10,Cumulative Stat - 40,LSTM,MinMaxScaler,21.142378,28.218692
4,Cumulative Stat - 30,LSTM,StandardScaler,23.188921,30.417611
6,Cumulative Stat - 30,LSTM,RobustScaler,24.135073,31.406606
13,Cumulative Stat - 40,TCN,StandardScaler,24.466372,33.811623
12,Cumulative Stat - 40,LSTM,StandardScaler,24.60107,33.431074
20,Cumulative Stat - 50,LSTM,StandardScaler,24.652161,32.590128
2,Cumulative Stat - 30,LSTM,MinMaxScaler,24.867405,31.476788
19,Cumulative Stat - 50,TCN,MinMaxScaler,25.100151,34.221176
14,Cumulative Stat - 40,LSTM,RobustScaler,25.333349,32.821504


In [94]:
cum_stat_rul_clipping_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

for seq_len in [30, 40, 50]:
    for scaler_name, scaler in scaler_dict.items():
        train_x_stats = cum_stat_train_df[stat_cols].copy()
        train_x_settings = cum_stat_train_df[setting_group].copy()
        test_x_stats = cum_stat_test_df[stat_cols].copy()
        test_x_settings = cum_stat_test_df[setting_group].copy()

        if scaler:
            scaler.fit(train_x_stats)

            train_x_stats = scaler.transform(train_x_stats)
            test_x_stats = scaler.transform(test_x_stats)

        scaled_train_df = pd.DataFrame(
            np.concatenate([train_x_stats, train_x_settings], axis=1),
            columns=stat_setting_cols
        )
        scaled_train_df['unit_number'] = cum_stat_train_df['unit_number'].values

        train_rul_df = cum_stat_train_df.copy()
        train_rul_df.loc[train_rul_df['RUL'] > 125, 'RUL'] = 125
        scaled_train_df['RUL'] = train_rul_df['RUL'].values

        scaled_test_df = pd.DataFrame(
            np.concatenate([test_x_stats, test_x_settings], axis=1),
            columns=stat_setting_cols
        )
        scaled_test_df['unit_number'] = cum_stat_test_df['unit_number'].values

        train_x, train_y = make_cum_train_sequence_data(scaled_train_df, seq_len)
        test_x = make_cum_test_sequence_data(scaled_test_df, seq_len)

        model_dict = {
            'LSTM': build_lstm(seq_len, len(stat_setting_cols)),
            'TCN': build_tcn(seq_len, len(stat_setting_cols))
        }
        es = EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        )
        for model_name, model in model_dict.items():
            model.compile(optimizer='adam', loss='mse', metrics=['mae'])
            model.fit(
                train_x, train_y,
                epochs=500, batch_size=32,
                validation_split=0.2,
                callbacks=[es],
                verbose=0
            )

            pred = model.predict(test_x).ravel()
            true = rul_df['RUL'].copy()
            true.loc[true > 125] = 125
            true = true.values.ravel()

            mae = mean_absolute_error(true, pred)
            rmse = mean_squared_error(true, pred)**0.5

            print(f'Seq: {seq_len}, Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

            cum_stat_rul_clipping_results.append({
                'Data': f'Cumulative Stat (RUL Clipping) - {seq_len}',
                'Model': model_name,
                'Scaler': scaler_name,
                'MAE': mae,
                'RMSE': rmse
            })
results.extend(cum_stat_rul_clipping_results)

Epoch 6: early stopping
Restoring model weights from the end of the best epoch: 1.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Seq: 30, Model: LSTM, Scaler: None, MAE: 37.765987396240234, RMSE: 44.05299376620873
Epoch 10: early stopping
Restoring model weights from the end of the best epoch: 5.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 178ms/step
Seq: 30, Model: TCN, Scaler: None, MAE: 37.493385314941406, RMSE: 44.1600966455259
Epoch 28: early stopping
Restoring model weights from the end of the best epoch: 23.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Seq: 30, Model: LSTM, Scaler: MinMaxScaler, MAE: 15.654744148254395, RMSE: 20.239925185958647
Epoch 8: early stopping
Restoring model weights from the end of the best epoch: 3.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 172ms/step
Seq: 30, Model: TCN, Scaler: MinMaxScaler, MAE: 17.05877113342285, RMSE: 22.02119326650714
Epoch 13: early stoppi

In [95]:
pd.DataFrame(cum_stat_rul_clipping_results).sort_values(by='MAE')

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
18,Cumulative Stat (RUL Clipping) - 50,LSTM,MinMaxScaler,12.252406,17.54315
6,Cumulative Stat (RUL Clipping) - 30,LSTM,RobustScaler,14.478898,19.854821
10,Cumulative Stat (RUL Clipping) - 40,LSTM,MinMaxScaler,14.852691,19.201096
12,Cumulative Stat (RUL Clipping) - 40,LSTM,StandardScaler,15.360208,21.522655
2,Cumulative Stat (RUL Clipping) - 30,LSTM,MinMaxScaler,15.654744,20.239925
4,Cumulative Stat (RUL Clipping) - 30,LSTM,StandardScaler,16.151237,21.890458
14,Cumulative Stat (RUL Clipping) - 40,LSTM,RobustScaler,16.574486,22.041912
22,Cumulative Stat (RUL Clipping) - 50,LSTM,RobustScaler,16.874752,22.63172
3,Cumulative Stat (RUL Clipping) - 30,TCN,MinMaxScaler,17.058771,22.021193
20,Cumulative Stat (RUL Clipping) - 50,LSTM,StandardScaler,17.064957,22.848806


# **Result**

In [96]:
result = pd.DataFrame(results)
result.to_csv('/content/drive/MyDrive/[Projects]/Kaggle/NASA 터보팬 제트 엔진의 잔존 수명 예측 및 예지보전 방안/Results_FD004/Sequence_Regression.csv', index=False)

In [97]:
result = result.sort_values(by='MAE')
result.head()

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
44,Raw (RUL Clipping) - 50,LSTM,StandardScaler,11.41221,16.287795
42,Raw (RUL Clipping) - 50,LSTM,MinMaxScaler,12.116726,16.991025
90,Cumulative Stat (RUL Clipping) - 50,LSTM,MinMaxScaler,12.252406,17.54315
36,Raw (RUL Clipping) - 40,LSTM,StandardScaler,12.289542,17.392301
38,Raw (RUL Clipping) - 40,LSTM,RobustScaler,12.512035,17.375655
