In [12]:
import pandas as pd 
import pickle as pkl 

In [13]:
pd.set_option('display.max_columns', None)

In [14]:
test_fd001 = pd.read_csv(r'C:\potfolio\RUL_Prediction\data\raw\test_FD001.txt', sep= ' ', header= None)
test_fd001.dropna(axis=1,how='all', inplace=True)
scaled_df = pd.read_csv(r'C:\potfolio\RUL_Prediction\data\processed_data\scaled_data.csv')

In [15]:
scaled_df = scaled_df.drop(columns=['RUL'])
cols = scaled_df.columns

In [16]:
test_fd001.columns = cols 
test_fd001.head()

Unnamed: 0,unit_number,time_in_cycles,oprational_setting_1,oprational_setting_2,oprational_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,21.61,553.9,2388.04,9050.17,1.3,47.2,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,21.61,554.85,2388.01,9054.42,1.3,47.5,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,21.61,554.11,2388.05,9056.96,1.3,47.5,521.97,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,21.61,554.07,2388.03,9045.29,1.3,47.28,521.38,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,21.61,554.16,2388.01,9044.55,1.3,47.31,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413


In [24]:
len(test_fd001)

13096

In [17]:
# normalize per engine to remove engine-specific bias
def normalize_per_engine(df, sensors):
    df_norm = df.copy()
    for sensor in sensors:
        df_norm[sensor] = df.groupby("unit_number")[sensor].transform(
            lambda x: (x - x.mean()) / x.std()
        )
    return df_norm


def add_rolling_features(df, sensors, windows=[5, 10, 20]):
    df_feat = df.copy()
    for sensor in sensors:
        for w in windows:
            df_feat[f"{sensor}_mean_{w}"] = df.groupby("unit_number")[sensor].transform(
                lambda x: x.rolling(w, min_periods=1).mean()
            )
            df_feat[f"{sensor}_std_{w}"] = df.groupby("unit_number")[sensor].transform(
                lambda x: x.rolling(w, min_periods=1).std()
            )
    return df_feat

def add_diff_features(df, sensors):
    df_feat = df.copy()
    for sensor in sensors:
        df_feat[f"{sensor}_diff"] = df.groupby("unit_number")[sensor].diff().fillna(0)
    return df_feat

**As observed in the feature selection and model-building file, we identified only 6 sensors that contribute significantly to the prediction. Similarly, we will use only these selected sensors as input to the model.**


In [18]:
selected_sensors =  ['sensor_9',  'sensor_11',  'sensor_7',  'sensor_12',  'sensor_14',  'sensor_4']

In [19]:
def feature_engineering_pipeline(df, sensors=selected_sensors):
    df_proc = normalize_per_engine(df, sensors)
    df_proc = add_rolling_features(df_proc, sensors)
    df_proc = add_diff_features(df_proc, sensors)
    return df_proc

test_feat = feature_engineering_pipeline(test_fd001, selected_sensors)

In [20]:
test_feat.head()

Unnamed: 0,unit_number,time_in_cycles,oprational_setting_1,oprational_setting_2,oprational_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,sensor_9_mean_5,sensor_9_std_5,sensor_9_mean_10,sensor_9_std_10,sensor_9_mean_20,sensor_9_std_20,sensor_11_mean_5,sensor_11_std_5,sensor_11_mean_10,sensor_11_std_10,sensor_11_mean_20,sensor_11_std_20,sensor_7_mean_5,sensor_7_std_5,sensor_7_mean_10,sensor_7_std_10,sensor_7_mean_20,sensor_7_std_20,sensor_12_mean_5,sensor_12_std_5,sensor_12_mean_10,sensor_12_std_10,sensor_12_mean_20,sensor_12_std_20,sensor_14_mean_5,sensor_14_std_5,sensor_14_mean_10,sensor_14_std_10,sensor_14_mean_20,sensor_14_std_20,sensor_4_mean_5,sensor_4_std_5,sensor_4_mean_10,sensor_4_std_10,sensor_4_mean_20,sensor_4_std_20,sensor_9_diff,sensor_11_diff,sensor_7_diff,sensor_12_diff,sensor_14_diff,sensor_4_diff
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,-0.852023,14.62,21.61,-0.576462,2388.04,0.256372,1.3,-1.058969,-1.007896,2388.03,-1.685561,8.4052,0.03,392,2388,100.0,38.86,23.3735,0.256372,,0.256372,,0.256372,,-1.058969,,-1.058969,,-1.058969,,-0.576462,,-0.576462,,-0.576462,,-1.007896,,-1.007896,,-1.007896,,-1.685561,,-1.685561,,-1.685561,,-0.852023,,-0.852023,,-0.852023,,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,-1.568866,14.62,21.61,1.660272,2388.01,1.233151,1.3,1.863409,0.743403,2388.06,2.662624,8.3803,0.03,393,2388,100.0,39.02,23.3916,0.744761,0.690687,0.744761,0.690687,0.744761,0.690687,0.40222,2.066433,0.40222,2.066433,0.40222,2.066433,0.541905,1.58161,0.541905,1.58161,0.541905,1.58161,-0.132246,1.238355,-0.132246,1.238355,-0.132246,1.238355,0.488531,3.074631,0.488531,3.074631,0.488531,3.074631,-1.210445,0.506885,-1.210445,0.506885,-1.210445,0.506885,0.976779,2.922378,2.236734,1.751299,4.348184,-0.716843
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,-0.047823,14.62,21.61,-0.082026,2388.05,1.81692,1.3,1.863409,-0.012839,2388.03,-0.279431,8.4441,0.03,393,2388,100.0,39.08,23.4166,1.102148,0.788479,1.102148,0.788479,1.102148,0.788479,0.889283,1.687236,0.889283,1.687236,0.889283,1.687236,0.333928,1.17495,0.333928,1.17495,0.333928,1.17495,-0.092444,0.878359,-0.092444,0.878359,-0.092444,0.878359,0.232544,2.218843,0.232544,2.218843,0.232544,2.218843,-0.822904,0.76094,-0.822904,0.76094,-0.822904,0.76094,0.583769,0.0,-1.742298,-0.756243,-2.942055,1.521043
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1.257397,14.62,21.61,-0.176205,2388.03,-0.8652,1.3,-0.279668,-2.361172,2388.05,0.585879,8.3917,0.03,391,2388,100.0,39.0,23.3737,0.610311,1.175619,0.610311,1.175619,0.610311,1.175619,0.597045,1.496481,0.597045,1.496481,0.597045,1.496481,0.206395,0.992672,0.206395,0.992672,0.206395,0.992672,-0.659626,1.34206,-0.659626,1.34206,-0.659626,1.34206,0.320878,1.820271,0.320878,1.820271,0.320878,1.820271,-0.302829,1.211583,-0.302829,1.211583,-0.302829,1.211583,-2.68212,-2.143077,-0.094178,-2.348333,0.86531,1.30522
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,0.101198,14.62,21.61,0.035697,2388.01,-1.035274,1.3,0.012569,0.703601,2388.03,-0.452493,8.4031,0.03,390,2388,100.0,38.99,23.413,0.281194,1.256244,0.281194,1.256244,0.281194,1.256244,0.48015,1.322087,0.48015,1.322087,0.48015,1.322087,0.172255,0.863062,0.172255,0.863062,0.172255,0.863062,-0.386981,1.312449,-0.386981,1.312449,-0.386981,1.312449,0.166203,1.613896,0.166203,1.613896,0.166203,1.613896,-0.222023,1.064705,-0.222023,1.064705,-0.222023,1.064705,-0.170074,0.292238,0.211901,3.064773,-1.038372,-1.156199


In [21]:
test_feat.shape

(13096, 68)

In [26]:
from sklearn.impute import SimpleImputer 

imputer = SimpleImputer(strategy= 'mean')
test_feat_imputed = imputer.fit_transform(test_feat)

In [23]:
with open(r'C:\potfolio\RUL_Prediction\models\stack_model.pkl', 'rb') as file:
    stack_model = pkl.load(file)

stack_model

0,1,2
,estimators,"[('xgb', ...), ('rf', ...), ...]"
,final_estimator,"XGBRegressor(...obs=None, ...)"
,cv,3
,n_jobs,-1
,passthrough,False
,verbose,0

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.9
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,n_estimators,300
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,missing_values,
,strategy,'mean'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,0.1
,coef0,0.0
,tol,0.001
,C,100
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,loss,'squared_error'
,hidden_layer_sizes,"(256, ...)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,1000

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [27]:
y_pred = stack_model.predict(test_feat_imputed)
y_pred

array([218.56706  , 137.42572  , 131.28052  , ...,   7.9229026,
        10.193046 ,   5.838184 ], dtype=float32)