# **Import**

In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

from sklearn.linear_model import Ridge, Lasso, ElasticNet

from sklearn.metrics import mean_absolute_error, mean_squared_error

# **Data Load**

In [3]:
cd /content/drive/MyDrive/[Projects]/Kaggle/NASA 터보팬 제트 엔진의 잔존 수명 예측 및 예지보전 방안/Data

/content/drive/MyDrive/[Projects]/Kaggle/NASA 터보팬 제트 엔진의 잔존 수명 예측 및 예지보전 방안/Data


In [4]:
index_names = ['unit_number', 'time_cycles']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names = [f's_{i + 1}' for i in range(21)]
col_names = index_names + setting_names + sensor_names

train_df = pd.read_csv('./train_FD004.csv')

In [5]:
max_cycle = train_df.groupby('unit_number')['time_cycles'].max().reset_index()
max_cycle.columns = ['unit_number', 'max_cycle']

train_df = train_df.merge(max_cycle, on='unit_number', how='left')

train_df['RUL'] = train_df['max_cycle'] - train_df['time_cycles']

In [6]:
test_df = pd.read_csv('./test_FD004.csv')
rul_df = pd.read_csv('./RUL_FD004.csv')

In [15]:
results = []

using_sensors = [
    's_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_10', 's_11', 's_12',
    's_13', 's_14', 's_15', 's_17', 's_20', 's_21'
]

In [8]:
train_df['setting_1'] = train_df['setting_1'].round(1)
train_df['setting_2'] = train_df['setting_2'].round(1)
train_df['setting_group'] = train_df[['setting_1','setting_2','setting_3']].astype(str).agg('_'.join, axis=1)
train_df = pd.get_dummies(train_df, columns=['setting_group'])
train_df.head()

Unnamed: 0.1,Unnamed: 0,unit_number,time_cycles,setting_1,setting_2,setting_3,s_1,s_2,s_3,s_4,...,s_21,max_cycle,RUL,setting_group_0.0_0.0_100.0,setting_group_10.0_0.2_100.0,setting_group_10.0_0.3_100.0,setting_group_20.0_0.7_100.0,setting_group_25.0_0.6_60.0,setting_group_35.0_0.8_100.0,setting_group_42.0_0.8_100.0
0,0,1,1,42.0,0.8,100.0,445.0,549.68,1343.43,1112.93,...,6.367,321,320,False,False,False,False,False,False,True
1,1,1,2,20.0,0.7,100.0,491.19,606.07,1477.61,1237.5,...,14.6552,321,319,False,False,False,True,False,False,False
2,2,1,3,42.0,0.8,100.0,445.0,548.95,1343.12,1117.05,...,6.4213,321,318,False,False,False,False,False,False,True
3,3,1,4,42.0,0.8,100.0,445.0,548.7,1341.24,1118.03,...,6.4176,321,317,False,False,False,False,False,False,True
4,4,1,5,25.0,0.6,60.0,462.54,536.1,1255.23,1033.59,...,8.6754,321,316,False,False,False,False,True,False,False


In [9]:
setting_group = [col for col in train_df.columns if col.startswith('setting_group')]
setting_group

['setting_group_0.0_0.0_100.0',
 'setting_group_10.0_0.2_100.0',
 'setting_group_10.0_0.3_100.0',
 'setting_group_20.0_0.7_100.0',
 'setting_group_25.0_0.6_60.0',
 'setting_group_35.0_0.8_100.0',
 'setting_group_42.0_0.8_100.0']

In [10]:
test_df['setting_1'] = test_df['setting_1'].round(1)
test_df['setting_2'] = test_df['setting_2'].round(1)
test_df['setting_group'] = test_df[['setting_1','setting_2','setting_3']].astype(str).agg('_'.join, axis=1)
test_df = pd.get_dummies(test_df, columns=['setting_group'])
test_df.head()

Unnamed: 0.1,Unnamed: 0,unit_number,time_cycles,setting_1,setting_2,setting_3,s_1,s_2,s_3,s_4,...,s_19,s_20,s_21,setting_group_0.0_0.0_100.0,setting_group_10.0_0.2_100.0,setting_group_10.0_0.3_100.0,setting_group_20.0_0.7_100.0,setting_group_25.0_0.6_60.0,setting_group_35.0_0.8_100.0,setting_group_42.0_0.8_100.0
0,0,1,1,20.0,0.7,100.0,491.19,606.67,1481.04,1227.81,...,100.0,24.31,14.7007,False,False,False,True,False,False,False
1,1,1,2,25.0,0.6,60.0,462.54,536.22,1256.17,1031.48,...,84.93,14.36,8.5748,False,False,False,False,True,False,False
2,2,1,3,42.0,0.8,100.0,445.0,549.23,1340.13,1105.88,...,100.0,10.39,6.4365,False,False,False,False,False,False,True
3,3,1,4,42.0,0.8,100.0,445.0,549.19,1339.7,1107.26,...,100.0,10.56,6.2367,False,False,False,False,False,False,True
4,4,1,5,35.0,0.8,100.0,449.44,555.1,1353.04,1117.8,...,100.0,14.85,8.9326,False,False,False,False,False,True,False


## **Raw Data**

In [16]:
# RUL Clipping X
raw_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

model_dict = {
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet()
}

for scaler_name, scaler in scaler_dict.items():
    for model_name, model in model_dict.items():
        train_x_sensors = train_df[using_sensors].copy()
        train_x_settings = train_df[setting_group].copy()
        train_y = train_df['RUL'].copy().values.ravel()
        test_x_sensors = test_df.groupby('unit_number').tail(1)[using_sensors]
        test_x_settings = test_df.groupby('unit_number').tail(1)[setting_group]

        if scaler:
            scaler.fit(train_x_sensors)

            train_x_sensors = scaler.transform(train_x_sensors)
            test_x_sensors = scaler.transform(test_x_sensors)

        train_x = np.concatenate([train_x_sensors, train_x_settings], axis=1)
        test_x = np.concatenate([test_x_sensors, test_x_settings], axis=1)

        model.fit(train_x, train_y)

        pred = model.predict(test_x)
        true = rul_df['RUL'].copy().values.ravel()

        mae = mean_absolute_error(true, pred)
        rmse = mean_squared_error(true, pred)**0.5

        print(f'Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

        raw_results.append({
        'Data': 'Raw',
        'Model': model_name,
        'Scaler': scaler_name,
        'MAE': mae,
        'RMSE': rmse
        })
results.extend(raw_results)

Model: Ridge, Scaler: None, MAE: 29.85654882680231, RMSE: 36.51687327144987
Model: Lasso, Scaler: None, MAE: 33.573174642567665, RMSE: 40.16470191650241
Model: ElasticNet, Scaler: None, MAE: 39.7513201502914, RMSE: 46.99553462740278
Model: Ridge, Scaler: MinMaxScaler, MAE: 30.080498945138473, RMSE: 36.794501041873
Model: Lasso, Scaler: MinMaxScaler, MAE: 46.75271860923075, RMSE: 54.90195429215725
Model: ElasticNet, Scaler: MinMaxScaler, MAE: 46.74668578879544, RMSE: 54.88871420084261
Model: Ridge, Scaler: StandardScaler, MAE: 29.904993403237075, RMSE: 36.5763510286562
Model: Lasso, Scaler: StandardScaler, MAE: 42.81798515870014, RMSE: 50.49674307060013
Model: ElasticNet, Scaler: StandardScaler, MAE: 46.5001320080057, RMSE: 54.548370580059235
Model: Ridge, Scaler: RobustScaler, MAE: 29.90448641054201, RMSE: 36.587321088991494
Model: Lasso, Scaler: RobustScaler, MAE: 42.18265384628105, RMSE: 50.045515705389214
Model: ElasticNet, Scaler: RobustScaler, MAE: 46.36221899352154, RMSE: 54.3733

In [17]:
pd.DataFrame(raw_results).sort_values(by='MAE')

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
0,Raw,Ridge,,29.856549,36.516873
9,Raw,Ridge,RobustScaler,29.904486,36.587321
6,Raw,Ridge,StandardScaler,29.904993,36.576351
3,Raw,Ridge,MinMaxScaler,30.080499,36.794501
1,Raw,Lasso,,33.573175,40.164702
2,Raw,ElasticNet,,39.75132,46.995535
10,Raw,Lasso,RobustScaler,42.182654,50.045516
7,Raw,Lasso,StandardScaler,42.817985,50.496743
11,Raw,ElasticNet,RobustScaler,46.362219,54.373344
8,Raw,ElasticNet,StandardScaler,46.500132,54.548371


In [18]:
# RUL Clipping O
raw_clipping_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

model_dict = {
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet()
}

for scaler_name, scaler in scaler_dict.items():
    for model_name, model in model_dict.items():
        train_x_sensors = train_df[using_sensors].copy()
        train_x_settings = train_df[setting_group].copy()
        train_y = train_df['RUL']
        test_x_sensors = test_df.groupby('unit_number').tail(1)[using_sensors]
        test_x_settings = test_df.groupby('unit_number').tail(1)[setting_group]

        train_y.loc[train_y > 125] = 125
        train_y = train_y.values.ravel()

        if scaler:
            scaler.fit(train_x_sensors)

            train_x_sensors = scaler.transform(train_x_sensors)
            test_x_sensors = scaler.transform(test_x_sensors)

        train_x = np.concatenate([train_x_sensors, train_x_settings], axis=1)
        test_x = np.concatenate([test_x_sensors, test_x_settings], axis=1)

        model.fit(train_x, train_y)

        pred = model.predict(test_x)
        true = rul_df['RUL'].copy()
        true.loc[true > 125] = 125
        true = true.values.ravel()

        mae = mean_absolute_error(true, pred)
        rmse = mean_squared_error(true, pred)**0.5

        print(f'Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

        raw_clipping_results.append({
        'Data': 'Raw (RUL Clipping)',
        'Model': model_name,
        'Scaler': scaler_name,
        'MAE': mae,
        'RMSE': rmse
        })
results.extend(raw_clipping_results)

Model: Ridge, Scaler: None, MAE: 21.922172251453098, RMSE: 26.133020220472602
Model: Lasso, Scaler: None, MAE: 25.189707794248214, RMSE: 30.008693282701387
Model: ElasticNet, Scaler: None, MAE: 31.241757664248958, RMSE: 37.55751018330879
Model: Ridge, Scaler: MinMaxScaler, MAE: 22.076866615366132, RMSE: 26.447054387261485
Model: Lasso, Scaler: MinMaxScaler, MAE: 38.05917022213398, RMSE: 45.56517147846795
Model: ElasticNet, Scaler: MinMaxScaler, MAE: 38.05313740169866, RMSE: 45.556411843707416
Model: Ridge, Scaler: StandardScaler, MAE: 21.985808896686436, RMSE: 26.225336606782633
Model: Lasso, Scaler: StandardScaler, MAE: 34.124436771603364, RMSE: 41.043018159671604
Model: ElasticNet, Scaler: StandardScaler, MAE: 37.806583620908924, RMSE: 45.229088163312646
Model: Ridge, Scaler: RobustScaler, MAE: 21.981309578814077, RMSE: 26.227585723065747
Model: Lasso, Scaler: RobustScaler, MAE: 33.48910545918428, RMSE: 40.62298280278214
Model: ElasticNet, Scaler: RobustScaler, MAE: 37.66867060642475

In [19]:
pd.DataFrame(raw_clipping_results).sort_values(by='MAE')

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
0,Raw (RUL Clipping),Ridge,,21.922172,26.13302
9,Raw (RUL Clipping),Ridge,RobustScaler,21.98131,26.227586
6,Raw (RUL Clipping),Ridge,StandardScaler,21.985809,26.225337
3,Raw (RUL Clipping),Ridge,MinMaxScaler,22.076867,26.447054
1,Raw (RUL Clipping),Lasso,,25.189708,30.008693
2,Raw (RUL Clipping),ElasticNet,,31.241758,37.55751
10,Raw (RUL Clipping),Lasso,RobustScaler,33.489105,40.622983
7,Raw (RUL Clipping),Lasso,StandardScaler,34.124437,41.043018
11,Raw (RUL Clipping),ElasticNet,RobustScaler,37.668671,45.028562
8,Raw (RUL Clipping),ElasticNet,StandardScaler,37.806584,45.229088


## **Cumulative Stat**

In [20]:
cum_stat_train_df = pd.read_csv('./cum_stat_train_FD004.csv')
cum_stat_test_df = pd.read_csv('./cum_stat_test_FD004.csv')

In [21]:
stat_cols = [col for col in cum_stat_train_df.columns if col.startswith('s_')]

In [23]:
# RUL Clipping X
cum_stat_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

model_dict = {
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet()
}

for scaler_name, scaler in scaler_dict.items():
    for model_name, model in model_dict.items():
        train_x_sensors = cum_stat_train_df[stat_cols].copy()
        train_x_settings = cum_stat_train_df[setting_group].copy()
        train_y = cum_stat_train_df['RUL'].copy().values.ravel()
        test_x_sensors = cum_stat_test_df.groupby('unit_number').tail(1)[stat_cols]
        test_x_settings = cum_stat_test_df.groupby('unit_number').tail(1)[setting_group]

        if scaler:
            scaler.fit(train_x_sensors)

            train_x_sensors = scaler.transform(train_x_sensors)
            test_x_sensors = scaler.transform(test_x_sensors)

        train_x = np.concatenate([train_x_sensors, train_x_settings], axis=1)
        test_x = np.concatenate([test_x_sensors, test_x_settings], axis=1)

        model.fit(train_x, train_y)

        pred = model.predict(test_x)
        true = rul_df['RUL'].copy().values.ravel()

        mae = mean_absolute_error(true, pred)
        rmse = mean_squared_error(true, pred)**0.5

        print(f'Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

        cum_stat_results.append({
        'Data': 'Cumulative Stat',
        'Model': model_name,
        'Scaler': scaler_name,
        'MAE': mae,
        'RMSE': rmse
        })
results.extend(cum_stat_results)

Model: Ridge, Scaler: None, MAE: 25.64190935145792, RMSE: 32.614537751829936
Model: Lasso, Scaler: None, MAE: 29.89946055264964, RMSE: 36.65473001192645
Model: ElasticNet, Scaler: None, MAE: 29.79034834326486, RMSE: 36.51189347977799
Model: Ridge, Scaler: MinMaxScaler, MAE: 26.310388335632076, RMSE: 33.319817817537164
Model: Lasso, Scaler: MinMaxScaler, MAE: 46.54874206444403, RMSE: 54.59035873457367
Model: ElasticNet, Scaler: MinMaxScaler, MAE: 46.63467063684169, RMSE: 54.71911304535932
Model: Ridge, Scaler: StandardScaler, MAE: 25.66546822005574, RMSE: 32.656943819630776
Model: Lasso, Scaler: StandardScaler, MAE: 32.3312134110167, RMSE: 39.33600093644071
Model: ElasticNet, Scaler: StandardScaler, MAE: 37.79862593935618, RMSE: 44.67102175855832
Model: Ridge, Scaler: RobustScaler, MAE: 25.6367807611968, RMSE: 32.62447674269149
Model: Lasso, Scaler: RobustScaler, MAE: 30.535087299589502, RMSE: 37.4993108108831
Model: ElasticNet, Scaler: RobustScaler, MAE: 32.18195114491392, RMSE: 38.925

In [24]:
pd.DataFrame(cum_stat_results)

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
0,Cumulative Stat,Ridge,,25.641909,32.614538
1,Cumulative Stat,Lasso,,29.899461,36.65473
2,Cumulative Stat,ElasticNet,,29.790348,36.511893
3,Cumulative Stat,Ridge,MinMaxScaler,26.310388,33.319818
4,Cumulative Stat,Lasso,MinMaxScaler,46.548742,54.590359
5,Cumulative Stat,ElasticNet,MinMaxScaler,46.634671,54.719113
6,Cumulative Stat,Ridge,StandardScaler,25.665468,32.656944
7,Cumulative Stat,Lasso,StandardScaler,32.331213,39.336001
8,Cumulative Stat,ElasticNet,StandardScaler,37.798626,44.671022
9,Cumulative Stat,Ridge,RobustScaler,25.636781,32.624477


In [25]:
# RUL Clipping O
cum_stat_rul_clipping_results = []

scaler_dict = {
    'None': None,
    'MinMaxScaler': MinMaxScaler(),
    'StandardScaler': StandardScaler(),
    'RobustScaler': RobustScaler()
}

model_dict = {
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet()
}

for scaler_name, scaler in scaler_dict.items():
    for model_name, model in model_dict.items():
        train_x_sensors = cum_stat_train_df[stat_cols].copy()
        train_x_settings = cum_stat_train_df[setting_group].copy()
        train_y = cum_stat_train_df['RUL'].copy()
        test_x_sensors = cum_stat_test_df.groupby('unit_number').tail(1)[stat_cols]
        test_x_settings = cum_stat_test_df.groupby('unit_number').tail(1)[setting_group]

        train_y.loc[train_y > 125] = 125
        train_y = train_y.values.ravel()

        if scaler:
            scaler.fit(train_x_sensors)

            train_x_sensors = scaler.transform(train_x_sensors)
            test_x_sensors = scaler.transform(test_x_sensors)

        train_x = np.concatenate([train_x_sensors, train_x_settings], axis=1)
        test_x = np.concatenate([test_x_sensors, test_x_settings], axis=1)

        model.fit(train_x, train_y)

        pred = model.predict(test_x)
        true = rul_df['RUL'].copy()
        true.loc[true > 125] = 125
        true = true.values.ravel()

        mae = mean_absolute_error(true, pred)
        rmse = mean_squared_error(true, pred)**0.5

        print(f'Model: {model_name}, Scaler: {scaler_name}, MAE: {mae}, RMSE: {rmse}')

        cum_stat_rul_clipping_results.append({
        'Data': 'Cumulative Stat (RUL Clipping)',
        'Model': model_name,
        'Scaler': scaler_name,
        'MAE': mae,
        'RMSE': rmse
        })
results.extend(cum_stat_rul_clipping_results)

Model: Ridge, Scaler: None, MAE: 17.742342099211704, RMSE: 21.558354667293035
Model: Lasso, Scaler: None, MAE: 21.858354285045202, RMSE: 26.372076161879367
Model: ElasticNet, Scaler: None, MAE: 21.773577014669165, RMSE: 26.21092039035773
Model: Ridge, Scaler: MinMaxScaler, MAE: 18.21989902832776, RMSE: 22.233853193407192
Model: Lasso, Scaler: MinMaxScaler, MAE: 37.85519367734726, RMSE: 45.15690578433169
Model: ElasticNet, Scaler: MinMaxScaler, MAE: 37.941122249744915, RMSE: 45.323657157536516
Model: Ridge, Scaler: StandardScaler, MAE: 17.745798042607344, RMSE: 21.696824354923873
Model: Lasso, Scaler: StandardScaler, MAE: 24.024182598565982, RMSE: 29.01134341214255
Model: ElasticNet, Scaler: StandardScaler, MAE: 29.160501225180976, RMSE: 34.41761322291255
Model: Ridge, Scaler: RobustScaler, MAE: 17.715605541768454, RMSE: 21.64151539600884
Model: Lasso, Scaler: RobustScaler, MAE: 22.317942941650436, RMSE: 27.027351392874422
Model: ElasticNet, Scaler: RobustScaler, MAE: 23.74493255922836,

In [26]:
pd.DataFrame(cum_stat_rul_clipping_results)

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
0,Cumulative Stat (RUL Clipping),Ridge,,17.742342,21.558355
1,Cumulative Stat (RUL Clipping),Lasso,,21.858354,26.372076
2,Cumulative Stat (RUL Clipping),ElasticNet,,21.773577,26.21092
3,Cumulative Stat (RUL Clipping),Ridge,MinMaxScaler,18.219899,22.233853
4,Cumulative Stat (RUL Clipping),Lasso,MinMaxScaler,37.855194,45.156906
5,Cumulative Stat (RUL Clipping),ElasticNet,MinMaxScaler,37.941122,45.323657
6,Cumulative Stat (RUL Clipping),Ridge,StandardScaler,17.745798,21.696824
7,Cumulative Stat (RUL Clipping),Lasso,StandardScaler,24.024183,29.011343
8,Cumulative Stat (RUL Clipping),ElasticNet,StandardScaler,29.160501,34.417613
9,Cumulative Stat (RUL Clipping),Ridge,RobustScaler,17.715606,21.641515


# **Result**

In [27]:
result = pd.DataFrame(results)
result.to_csv('/content/drive/MyDrive/[Projects]/Kaggle/NASA 터보팬 제트 엔진의 잔존 수명 예측 및 예지보전 방안/Results_FD004/Regularized_Regression.csv')

In [28]:
result = result.sort_values(by='MAE')
result.head()

Unnamed: 0,Data,Model,Scaler,MAE,RMSE
45,Cumulative Stat (RUL Clipping),Ridge,RobustScaler,17.715606,21.641515
36,Cumulative Stat (RUL Clipping),Ridge,,17.742342,21.558355
42,Cumulative Stat (RUL Clipping),Ridge,StandardScaler,17.745798,21.696824
39,Cumulative Stat (RUL Clipping),Ridge,MinMaxScaler,18.219899,22.233853
38,Cumulative Stat (RUL Clipping),ElasticNet,,21.773577,26.21092
