In [1]:
# imports
import warnings
import os
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Activation, Dropout, LSTM, Dense, TimeDistributed
from tensorflow.keras.ops import concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, BackupAndRestore, EarlyStopping
from tensorflow.keras import Input, Model
from tensorflow.keras.optimizers import Adam
from sklearn.dummy import DummyRegressor
from sklearn.metrics import make_scorer, r2_score, mean_squared_error, mean_absolute_error
from tabulate import tabulate

In [2]:
# configurate general settings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)

In [3]:
# get the raw data directory
raw_data_dir = os.path.dirname(os.getcwd())

In [4]:
# load data
df_telemetry = pd.read_csv(os.path.join(raw_data_dir, "raw_data/PdM_telemetry.csv"))
df_errors = pd.read_csv(os.path.join(raw_data_dir, "raw_data/PdM_errors.csv"))
df_failures = pd.read_csv(os.path.join(raw_data_dir, "raw_data/PdM_failures.csv"))
df_machines = pd.read_csv(os.path.join(raw_data_dir, "raw_data/PdM_machines.csv"))
df_components = pd.read_csv(os.path.join(raw_data_dir, "raw_data/PdM_maint.csv"))

# Feature Engineering

In [5]:
def long_to_wide_form(data, n_in=1, n_out=1, dropnan=True, target=[], exep=[]):
# this function transform a long form dataframe to wide form one
    n_vars = 1 if type(data) is list else data.shape[1]
    cols, namen = list(),list()
    vars = list(data.columns)
    data1 = data.drop(exep,axis=1)
    for e in exep :
      vars.remove(e)
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(data1.shift(i))
        namen +=[('%s(t-%d)' %(s, i)) for s in vars]
        #forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(data1[target].shift(-i))
        if i == 0 :
          namen +=[(s+'(t)') for s in target]
        else :
          namen +=[(s+'(t+%d)' %(i)) for s in target]
    cols.append((data[exep]))
    namen += exep
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns=namen
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def transform_dataframe(df):
# this function process each machine id data in a separate dataframe and then combine them in a single one
    
    df_list = []
    for machine_id in df["machineID"].unique():
        try:
            # keep only one machine id
            df_maintenance_temp = df[df["machineID"]==machine_id]

            # resample dataframe to daily frequency
            df_maintenance_temp = df_maintenance_temp.resample("d").agg({"volt":"mean", "rotate":"mean", "pressure":"mean", 
                                                                        "vibration":"mean", "model":"first", "age":"first", 
                                                                        "comp_count":"sum", "error_count":"sum", 
                                                                        "failure_component_count":"sum"})

            # remove rows with unknown RUL value
            failures_dates = df_maintenance_temp[df_maintenance_temp["failure_component_count"]!=0].index
            first_failure_date = failures_dates[0]
            last_failure_date = failures_dates[-1]
            df_maintenance_temp = df_maintenance_temp[(df_maintenance_temp.index>=first_failure_date) & (df_maintenance_temp.index<=last_failure_date)]

            # add RUL column to the dataframe
            rul_list = []
            j = 0
            failure_date = failures_dates[0] 
            for i in df_maintenance_temp.index:
                if df_maintenance_temp.loc[i, "failure_component_count"] != 0:
                    rul_list.append(0)
                    if j<(len(failures_dates)-1):
                        j += 1
                        failure_date = failures_dates[j]     
                else:
                    rul_list.append((failure_date-i).days)
            df_maintenance_temp["RUL"] = rul_list

            df_maintenance_temp = long_to_wide_form(data=df_maintenance_temp, n_in=30, exep=["model", "age", "RUL"])
            
            df_list.append(df_maintenance_temp) # add dataframe
            df_transformed = pd.concat(df_list) # combine dataframes
        except: pass
    return(df_transformed)


def regression_metrics(model, X_train, X_test, y_train, y_test):
# display metrics for regression model
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    tab = tabulate([["Metric", "Training Set", "Test Set"],
                  ["r2", r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)],
                  ["MSE", mean_squared_error(y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)],
                  ["MAE", mean_absolute_error(y_train, y_train_pred), mean_absolute_error(y_test, y_test_pred)],
                  ["RMSE", mean_squared_error(y_train, y_train_pred, squared=False), mean_squared_error(y_test, y_test_pred, squared=False)]],
                headers='firstrow', numalign="left")
    return tab

In [6]:
# create an error count column for each datetime and machine id pair
df_components = df_components.groupby(by=["datetime", "machineID"], as_index=False).count()
df_components.rename(columns={"comp": "comp_count"}, inplace=True)
df_components


Unnamed: 0,datetime,machineID,comp_count
0,2014-06-01 06:00:00,1,1
1,2014-06-01 06:00:00,6,1
2,2014-06-01 06:00:00,9,2
3,2014-06-01 06:00:00,11,1
4,2014-06-01 06:00:00,16,1
...,...,...,...
2523,2016-01-01 06:00:00,35,1
2524,2016-01-01 06:00:00,42,1
2525,2016-01-01 06:00:00,43,1
2526,2016-01-01 06:00:00,55,1


In [7]:
# create an error count column for each datetime and machine id pair
df_errors = df_errors.groupby(by=["datetime", "machineID"], as_index=False).count()
df_errors.rename(columns={"errorID": "error_count"}, inplace=True)
df_errors

Unnamed: 0,datetime,machineID,error_count
0,2015-01-01 06:00:00,24,1
1,2015-01-01 06:00:00,73,1
2,2015-01-01 06:00:00,81,1
3,2015-01-01 07:00:00,43,1
4,2015-01-01 08:00:00,14,1
...,...,...,...
3611,2015-12-31 18:00:00,91,1
3612,2015-12-31 20:00:00,23,1
3613,2015-12-31 23:00:00,94,1
3614,2016-01-01 05:00:00,8,1


In [8]:
# create an failures comonent count column for each datetime and machine id pair
df_failures = df_failures.groupby(by=["datetime", "machineID"], as_index=False).count()
df_failures.rename(columns={"failure": "failure_component_count"}, inplace=True)
df_failures

Unnamed: 0,datetime,machineID,failure_component_count
0,2015-01-02 03:00:00,16,2
1,2015-01-02 03:00:00,17,1
2,2015-01-02 03:00:00,22,1
3,2015-01-02 03:00:00,35,1
4,2015-01-02 03:00:00,45,1
...,...,...,...
714,2015-12-30 06:00:00,88,1
715,2015-12-31 06:00:00,15,1
716,2015-12-31 06:00:00,64,1
717,2015-12-31 06:00:00,90,1


In [9]:
# join dataframes
df_maintenance = df_telemetry.merge(df_machines, on="machineID", how="left").merge(df_components, on=["datetime", "machineID"], how="left").merge(df_errors, on=["datetime", "machineID"], how="left").merge(df_failures, on=["datetime", "machineID"], how="left")
df_maintenance

Unnamed: 0,datetime,machineID,volt,rotate,pressure,vibration,model,age,comp_count,error_count,failure_component_count
0,2015-01-01 06:00:00,1,176.217853,418.504078,113.077935,45.087686,model3,18,,,
1,2015-01-01 07:00:00,1,162.879223,402.747490,95.460525,43.413973,model3,18,,,
2,2015-01-01 08:00:00,1,170.989902,527.349825,75.237905,34.178847,model3,18,,,
3,2015-01-01 09:00:00,1,162.462833,346.149335,109.248561,41.122144,model3,18,,,
4,2015-01-01 10:00:00,1,157.610021,435.376873,111.886648,25.990511,model3,18,,,
...,...,...,...,...,...,...,...,...,...,...,...
876095,2016-01-01 02:00:00,100,179.438162,395.222827,102.290715,50.771941,model4,5,,,
876096,2016-01-01 03:00:00,100,189.617555,446.207972,98.180607,35.123072,model4,5,,,
876097,2016-01-01 04:00:00,100,192.483414,447.816524,94.132837,48.314561,model4,5,,,
876098,2016-01-01 05:00:00,100,165.475310,413.771670,104.081073,44.835259,model4,5,,,


In [10]:
# replace nan values by 0
df_maintenance.fillna(0, inplace=True)

In [11]:
# change the age column type to float
df_maintenance["age"] = df_maintenance["age"].astype(float)

In [12]:
df_maintenance["datetime"] = pd.to_datetime(df_maintenance["datetime"]) # change datetime column type to datetime
df_maintenance.set_index("datetime", inplace=True) # set datetime column as index

In [13]:
df_transformed = transform_dataframe(df_maintenance) # transform dataframe
df_transformed.reset_index(inplace=True, drop=True) # reset index
df_transformed

Unnamed: 0,volt(t-30),rotate(t-30),pressure(t-30),vibration(t-30),comp_count(t-30),error_count(t-30),failure_component_count(t-30),volt(t-29),rotate(t-29),pressure(t-29),vibration(t-29),comp_count(t-29),error_count(t-29),failure_component_count(t-29),volt(t-28),rotate(t-28),pressure(t-28),vibration(t-28),comp_count(t-28),error_count(t-28),failure_component_count(t-28),volt(t-27),rotate(t-27),pressure(t-27),vibration(t-27),comp_count(t-27),error_count(t-27),failure_component_count(t-27),volt(t-26),rotate(t-26),pressure(t-26),vibration(t-26),comp_count(t-26),error_count(t-26),failure_component_count(t-26),volt(t-25),rotate(t-25),pressure(t-25),vibration(t-25),comp_count(t-25),error_count(t-25),failure_component_count(t-25),volt(t-24),rotate(t-24),pressure(t-24),vibration(t-24),comp_count(t-24),error_count(t-24),failure_component_count(t-24),volt(t-23),rotate(t-23),pressure(t-23),vibration(t-23),comp_count(t-23),error_count(t-23),failure_component_count(t-23),volt(t-22),rotate(t-22),pressure(t-22),vibration(t-22),comp_count(t-22),error_count(t-22),failure_component_count(t-22),volt(t-21),rotate(t-21),pressure(t-21),vibration(t-21),comp_count(t-21),error_count(t-21),failure_component_count(t-21),volt(t-20),rotate(t-20),pressure(t-20),vibration(t-20),comp_count(t-20),error_count(t-20),failure_component_count(t-20),volt(t-19),rotate(t-19),pressure(t-19),vibration(t-19),comp_count(t-19),error_count(t-19),failure_component_count(t-19),volt(t-18),rotate(t-18),pressure(t-18),vibration(t-18),comp_count(t-18),error_count(t-18),failure_component_count(t-18),volt(t-17),rotate(t-17),pressure(t-17),vibration(t-17),comp_count(t-17),error_count(t-17),failure_component_count(t-17),volt(t-16),rotate(t-16),pressure(t-16),vibration(t-16),comp_count(t-16),error_count(t-16),failure_component_count(t-16),volt(t-15),rotate(t-15),pressure(t-15),vibration(t-15),comp_count(t-15),error_count(t-15),failure_component_count(t-15),volt(t-14),rotate(t-14),pressure(t-14),vibration(t-14),comp_count(t-14),error_count(t-14),failure_component_count(t-14),volt(t-13),rotate(t-13),pressure(t-13),vibration(t-13),comp_count(t-13),error_count(t-13),failure_component_count(t-13),volt(t-12),rotate(t-12),pressure(t-12),vibration(t-12),comp_count(t-12),error_count(t-12),failure_component_count(t-12),volt(t-11),rotate(t-11),pressure(t-11),vibration(t-11),comp_count(t-11),error_count(t-11),failure_component_count(t-11),volt(t-10),rotate(t-10),pressure(t-10),vibration(t-10),comp_count(t-10),error_count(t-10),failure_component_count(t-10),volt(t-9),rotate(t-9),pressure(t-9),vibration(t-9),comp_count(t-9),error_count(t-9),failure_component_count(t-9),volt(t-8),rotate(t-8),pressure(t-8),vibration(t-8),comp_count(t-8),error_count(t-8),failure_component_count(t-8),volt(t-7),rotate(t-7),pressure(t-7),vibration(t-7),comp_count(t-7),error_count(t-7),failure_component_count(t-7),volt(t-6),rotate(t-6),pressure(t-6),vibration(t-6),comp_count(t-6),error_count(t-6),failure_component_count(t-6),volt(t-5),rotate(t-5),pressure(t-5),vibration(t-5),comp_count(t-5),error_count(t-5),failure_component_count(t-5),volt(t-4),rotate(t-4),pressure(t-4),vibration(t-4),comp_count(t-4),error_count(t-4),failure_component_count(t-4),volt(t-3),rotate(t-3),pressure(t-3),vibration(t-3),comp_count(t-3),error_count(t-3),failure_component_count(t-3),volt(t-2),rotate(t-2),pressure(t-2),vibration(t-2),comp_count(t-2),error_count(t-2),failure_component_count(t-2),volt(t-1),rotate(t-1),pressure(t-1),vibration(t-1),comp_count(t-1),error_count(t-1),failure_component_count(t-1),model,age,RUL
0,171.018408,454.822750,102.363114,43.330311,2.0,0.0,1.0,171.025033,454.614348,102.377665,41.506930,0.0,0.0,0.0,174.139410,444.337772,96.674842,41.702771,0.0,0.0,0.0,172.135931,446.126781,102.097039,39.281983,0.0,0.0,0.0,169.350052,466.884090,100.128260,38.571482,0.0,0.0,0.0,171.006246,455.695551,99.680231,39.789390,0.0,1.0,0.0,167.405610,433.871793,97.453837,41.290737,0.0,0.0,0.0,169.234045,445.874333,96.650557,39.437964,0.0,0.0,0.0,170.573458,451.978783,102.762072,40.140004,0.0,0.0,0.0,167.981092,459.668333,100.772665,40.485997,0.0,0.0,0.0,171.307628,471.330450,101.683031,40.000752,0.0,0.0,0.0,186.442784,438.538113,102.261363,40.498463,0.0,0.0,0.0,193.136405,455.412049,99.233100,40.985994,0.0,0.0,0.0,171.642199,452.965071,97.390321,39.433124,0.0,0.0,0.0,170.443004,462.081870,99.159579,37.763100,0.0,0.0,0.0,164.986541,454.439300,100.350350,40.340142,2.0,0.0,0.0,167.316883,460.409798,98.413494,40.422277,0.0,0.0,0.0,170.167154,469.968639,99.331048,39.040393,0.0,1.0,0.0,169.036144,455.496026,101.865150,39.228045,0.0,0.0,0.0,165.371702,447.988307,101.066052,41.624926,0.0,0.0,0.0,166.921255,436.674230,96.952274,40.546028,0.0,1.0,0.0,179.223233,456.998714,101.545344,40.328477,0.0,0.0,0.0,192.313627,454.429161,95.096870,39.007657,0.0,1.0,0.0,183.295335,454.376357,100.719850,37.381916,0.0,0.0,0.0,169.130134,440.163874,103.726632,39.297538,0.0,0.0,0.0,168.072639,459.299745,96.483402,40.032468,0.0,0.0,0.0,173.160661,443.410469,96.988903,40.319582,0.0,0.0,0.0,164.947744,443.721008,100.862632,39.781897,0.0,0.0,0.0,170.531014,425.313419,100.957334,39.330830,0.0,0.0,0.0,170.814359,450.342498,100.963982,40.368655,0.0,0.0,0.0,model3,18.0,30
1,171.025033,454.614348,102.377665,41.506930,0.0,0.0,0.0,174.139410,444.337772,96.674842,41.702771,0.0,0.0,0.0,172.135931,446.126781,102.097039,39.281983,0.0,0.0,0.0,169.350052,466.884090,100.128260,38.571482,0.0,0.0,0.0,171.006246,455.695551,99.680231,39.789390,0.0,1.0,0.0,167.405610,433.871793,97.453837,41.290737,0.0,0.0,0.0,169.234045,445.874333,96.650557,39.437964,0.0,0.0,0.0,170.573458,451.978783,102.762072,40.140004,0.0,0.0,0.0,167.981092,459.668333,100.772665,40.485997,0.0,0.0,0.0,171.307628,471.330450,101.683031,40.000752,0.0,0.0,0.0,186.442784,438.538113,102.261363,40.498463,0.0,0.0,0.0,193.136405,455.412049,99.233100,40.985994,0.0,0.0,0.0,171.642199,452.965071,97.390321,39.433124,0.0,0.0,0.0,170.443004,462.081870,99.159579,37.763100,0.0,0.0,0.0,164.986541,454.439300,100.350350,40.340142,2.0,0.0,0.0,167.316883,460.409798,98.413494,40.422277,0.0,0.0,0.0,170.167154,469.968639,99.331048,39.040393,0.0,1.0,0.0,169.036144,455.496026,101.865150,39.228045,0.0,0.0,0.0,165.371702,447.988307,101.066052,41.624926,0.0,0.0,0.0,166.921255,436.674230,96.952274,40.546028,0.0,1.0,0.0,179.223233,456.998714,101.545344,40.328477,0.0,0.0,0.0,192.313627,454.429161,95.096870,39.007657,0.0,1.0,0.0,183.295335,454.376357,100.719850,37.381916,0.0,0.0,0.0,169.130134,440.163874,103.726632,39.297538,0.0,0.0,0.0,168.072639,459.299745,96.483402,40.032468,0.0,0.0,0.0,173.160661,443.410469,96.988903,40.319582,0.0,0.0,0.0,164.947744,443.721008,100.862632,39.781897,0.0,0.0,0.0,170.531014,425.313419,100.957334,39.330830,0.0,0.0,0.0,170.814359,450.342498,100.963982,40.368655,0.0,0.0,0.0,167.311836,438.313408,99.835587,40.547315,2.0,0.0,0.0,model3,18.0,29
2,174.139410,444.337772,96.674842,41.702771,0.0,0.0,0.0,172.135931,446.126781,102.097039,39.281983,0.0,0.0,0.0,169.350052,466.884090,100.128260,38.571482,0.0,0.0,0.0,171.006246,455.695551,99.680231,39.789390,0.0,1.0,0.0,167.405610,433.871793,97.453837,41.290737,0.0,0.0,0.0,169.234045,445.874333,96.650557,39.437964,0.0,0.0,0.0,170.573458,451.978783,102.762072,40.140004,0.0,0.0,0.0,167.981092,459.668333,100.772665,40.485997,0.0,0.0,0.0,171.307628,471.330450,101.683031,40.000752,0.0,0.0,0.0,186.442784,438.538113,102.261363,40.498463,0.0,0.0,0.0,193.136405,455.412049,99.233100,40.985994,0.0,0.0,0.0,171.642199,452.965071,97.390321,39.433124,0.0,0.0,0.0,170.443004,462.081870,99.159579,37.763100,0.0,0.0,0.0,164.986541,454.439300,100.350350,40.340142,2.0,0.0,0.0,167.316883,460.409798,98.413494,40.422277,0.0,0.0,0.0,170.167154,469.968639,99.331048,39.040393,0.0,1.0,0.0,169.036144,455.496026,101.865150,39.228045,0.0,0.0,0.0,165.371702,447.988307,101.066052,41.624926,0.0,0.0,0.0,166.921255,436.674230,96.952274,40.546028,0.0,1.0,0.0,179.223233,456.998714,101.545344,40.328477,0.0,0.0,0.0,192.313627,454.429161,95.096870,39.007657,0.0,1.0,0.0,183.295335,454.376357,100.719850,37.381916,0.0,0.0,0.0,169.130134,440.163874,103.726632,39.297538,0.0,0.0,0.0,168.072639,459.299745,96.483402,40.032468,0.0,0.0,0.0,173.160661,443.410469,96.988903,40.319582,0.0,0.0,0.0,164.947744,443.721008,100.862632,39.781897,0.0,0.0,0.0,170.531014,425.313419,100.957334,39.330830,0.0,0.0,0.0,170.814359,450.342498,100.963982,40.368655,0.0,0.0,0.0,167.311836,438.313408,99.835587,40.547315,2.0,0.0,0.0,167.504306,439.191625,99.572642,39.993782,0.0,0.0,0.0,model3,18.0,28
3,172.135931,446.126781,102.097039,39.281983,0.0,0.0,0.0,169.350052,466.884090,100.128260,38.571482,0.0,0.0,0.0,171.006246,455.695551,99.680231,39.789390,0.0,1.0,0.0,167.405610,433.871793,97.453837,41.290737,0.0,0.0,0.0,169.234045,445.874333,96.650557,39.437964,0.0,0.0,0.0,170.573458,451.978783,102.762072,40.140004,0.0,0.0,0.0,167.981092,459.668333,100.772665,40.485997,0.0,0.0,0.0,171.307628,471.330450,101.683031,40.000752,0.0,0.0,0.0,186.442784,438.538113,102.261363,40.498463,0.0,0.0,0.0,193.136405,455.412049,99.233100,40.985994,0.0,0.0,0.0,171.642199,452.965071,97.390321,39.433124,0.0,0.0,0.0,170.443004,462.081870,99.159579,37.763100,0.0,0.0,0.0,164.986541,454.439300,100.350350,40.340142,2.0,0.0,0.0,167.316883,460.409798,98.413494,40.422277,0.0,0.0,0.0,170.167154,469.968639,99.331048,39.040393,0.0,1.0,0.0,169.036144,455.496026,101.865150,39.228045,0.0,0.0,0.0,165.371702,447.988307,101.066052,41.624926,0.0,0.0,0.0,166.921255,436.674230,96.952274,40.546028,0.0,1.0,0.0,179.223233,456.998714,101.545344,40.328477,0.0,0.0,0.0,192.313627,454.429161,95.096870,39.007657,0.0,1.0,0.0,183.295335,454.376357,100.719850,37.381916,0.0,0.0,0.0,169.130134,440.163874,103.726632,39.297538,0.0,0.0,0.0,168.072639,459.299745,96.483402,40.032468,0.0,0.0,0.0,173.160661,443.410469,96.988903,40.319582,0.0,0.0,0.0,164.947744,443.721008,100.862632,39.781897,0.0,0.0,0.0,170.531014,425.313419,100.957334,39.330830,0.0,0.0,0.0,170.814359,450.342498,100.963982,40.368655,0.0,0.0,0.0,167.311836,438.313408,99.835587,40.547315,2.0,0.0,0.0,167.504306,439.191625,99.572642,39.993782,0.0,0.0,0.0,170.407391,432.670091,100.351476,38.855831,0.0,0.0,0.0,model3,18.0,27
4,169.350052,466.884090,100.128260,38.571482,0.0,0.0,0.0,171.006246,455.695551,99.680231,39.789390,0.0,1.0,0.0,167.405610,433.871793,97.453837,41.290737,0.0,0.0,0.0,169.234045,445.874333,96.650557,39.437964,0.0,0.0,0.0,170.573458,451.978783,102.762072,40.140004,0.0,0.0,0.0,167.981092,459.668333,100.772665,40.485997,0.0,0.0,0.0,171.307628,471.330450,101.683031,40.000752,0.0,0.0,0.0,186.442784,438.538113,102.261363,40.498463,0.0,0.0,0.0,193.136405,455.412049,99.233100,40.985994,0.0,0.0,0.0,171.642199,452.965071,97.390321,39.433124,0.0,0.0,0.0,170.443004,462.081870,99.159579,37.763100,0.0,0.0,0.0,164.986541,454.439300,100.350350,40.340142,2.0,0.0,0.0,167.316883,460.409798,98.413494,40.422277,0.0,0.0,0.0,170.167154,469.968639,99.331048,39.040393,0.0,1.0,0.0,169.036144,455.496026,101.865150,39.228045,0.0,0.0,0.0,165.371702,447.988307,101.066052,41.624926,0.0,0.0,0.0,166.921255,436.674230,96.952274,40.546028,0.0,1.0,0.0,179.223233,456.998714,101.545344,40.328477,0.0,0.0,0.0,192.313627,454.429161,95.096870,39.007657,0.0,1.0,0.0,183.295335,454.376357,100.719850,37.381916,0.0,0.0,0.0,169.130134,440.163874,103.726632,39.297538,0.0,0.0,0.0,168.072639,459.299745,96.483402,40.032468,0.0,0.0,0.0,173.160661,443.410469,96.988903,40.319582,0.0,0.0,0.0,164.947744,443.721008,100.862632,39.781897,0.0,0.0,0.0,170.531014,425.313419,100.957334,39.330830,0.0,0.0,0.0,170.814359,450.342498,100.963982,40.368655,0.0,0.0,0.0,167.311836,438.313408,99.835587,40.547315,2.0,0.0,0.0,167.504306,439.191625,99.572642,39.993782,0.0,0.0,0.0,170.407391,432.670091,100.351476,38.855831,0.0,0.0,0.0,170.973324,461.844851,101.511545,40.645728,0.0,0.0,0.0,model3,18.0,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25802,167.932943,436.257431,103.117038,47.101458,0.0,1.0,0.0,170.498588,457.489047,103.366773,47.369026,0.0,0.0,0.0,166.041649,458.354809,97.010848,42.464778,0.0,1.0,0.0,170.218415,442.862559,97.235825,39.833933,0.0,0.0,0.0,166.773917,444.424385,104.672300,40.226068,1.0,0.0,0.0,169.117488,443.270578,97.841588,38.587576,0.0,0.0,0.0,168.024492,450.043861,100.519463,38.483660,0.0,0.0,0.0,163.103840,461.869567,101.752225,41.404936,0.0,1.0,0.0,169.193818,437.951370,100.102722,38.240214,0.0,0.0,0.0,170.190953,397.490543,99.945501,39.168881,0.0,0.0,0.0,176.070574,358.657675,101.535161,40.373803,0.0,0.0,0.0,169.725411,434.403099,101.346433,39.395033,0.0,0.0,0.0,172.886352,451.443813,98.352119,38.875182,0.0,0.0,0.0,169.888388,449.671378,98.534311,39.465596,0.0,0.0,0.0,168.123645,460.962673,97.896704,40.617498,0.0,0.0,0.0,170.387476,443.523160,96.642024,39.485418,0.0,0.0,0.0,170.247559,466.637114,99.085938,40.677375,0.0,1.0,0.0,167.883813,464.077661,100.804475,41.573803,0.0,0.0,0.0,173.415541,453.855680,99.103688,40.239520,0.0,0.0,0.0,168.585443,462.184849,102.744134,39.818608,0.0,0.0,0.0,175.398618,430.006778,99.244449,40.347964,0.0,0.0,0.0,164.650383,453.242663,100.452767,40.568980,0.0,0.0,0.0,175.250463,455.747295,99.739651,42.068915,0.0,0.0,0.0,162.438706,471.001357,99.936564,38.863403,0.0,0.0,0.0,167.760162,439.457458,101.088686,40.514435,0.0,0.0,0.0,171.438689,459.360961,96.504847,39.930355,0.0,0.0,0.0,168.964626,440.215638,100.169167,38.904080,0.0,0.0,0.0,168.858880,452.719653,99.520591,39.353887,0.0,0.0,0.0,168.710438,451.490999,99.776836,39.836434,0.0,0.0,0.0,170.292763,441.539577,99.573166,39.292405,0.0,1.0,0.0,model4,5.0,4
25803,170.498588,457.489047,103.366773,47.369026,0.0,0.0,0.0,166.041649,458.354809,97.010848,42.464778,0.0,1.0,0.0,170.218415,442.862559,97.235825,39.833933,0.0,0.0,0.0,166.773917,444.424385,104.672300,40.226068,1.0,0.0,0.0,169.117488,443.270578,97.841588,38.587576,0.0,0.0,0.0,168.024492,450.043861,100.519463,38.483660,0.0,0.0,0.0,163.103840,461.869567,101.752225,41.404936,0.0,1.0,0.0,169.193818,437.951370,100.102722,38.240214,0.0,0.0,0.0,170.190953,397.490543,99.945501,39.168881,0.0,0.0,0.0,176.070574,358.657675,101.535161,40.373803,0.0,0.0,0.0,169.725411,434.403099,101.346433,39.395033,0.0,0.0,0.0,172.886352,451.443813,98.352119,38.875182,0.0,0.0,0.0,169.888388,449.671378,98.534311,39.465596,0.0,0.0,0.0,168.123645,460.962673,97.896704,40.617498,0.0,0.0,0.0,170.387476,443.523160,96.642024,39.485418,0.0,0.0,0.0,170.247559,466.637114,99.085938,40.677375,0.0,1.0,0.0,167.883813,464.077661,100.804475,41.573803,0.0,0.0,0.0,173.415541,453.855680,99.103688,40.239520,0.0,0.0,0.0,168.585443,462.184849,102.744134,39.818608,0.0,0.0,0.0,175.398618,430.006778,99.244449,40.347964,0.0,0.0,0.0,164.650383,453.242663,100.452767,40.568980,0.0,0.0,0.0,175.250463,455.747295,99.739651,42.068915,0.0,0.0,0.0,162.438706,471.001357,99.936564,38.863403,0.0,0.0,0.0,167.760162,439.457458,101.088686,40.514435,0.0,0.0,0.0,171.438689,459.360961,96.504847,39.930355,0.0,0.0,0.0,168.964626,440.215638,100.169167,38.904080,0.0,0.0,0.0,168.858880,452.719653,99.520591,39.353887,0.0,0.0,0.0,168.710438,451.490999,99.776836,39.836434,0.0,0.0,0.0,170.292763,441.539577,99.573166,39.292405,0.0,1.0,0.0,166.637041,443.320475,98.053732,39.556574,0.0,0.0,0.0,model4,5.0,3
25804,166.041649,458.354809,97.010848,42.464778,0.0,1.0,0.0,170.218415,442.862559,97.235825,39.833933,0.0,0.0,0.0,166.773917,444.424385,104.672300,40.226068,1.0,0.0,0.0,169.117488,443.270578,97.841588,38.587576,0.0,0.0,0.0,168.024492,450.043861,100.519463,38.483660,0.0,0.0,0.0,163.103840,461.869567,101.752225,41.404936,0.0,1.0,0.0,169.193818,437.951370,100.102722,38.240214,0.0,0.0,0.0,170.190953,397.490543,99.945501,39.168881,0.0,0.0,0.0,176.070574,358.657675,101.535161,40.373803,0.0,0.0,0.0,169.725411,434.403099,101.346433,39.395033,0.0,0.0,0.0,172.886352,451.443813,98.352119,38.875182,0.0,0.0,0.0,169.888388,449.671378,98.534311,39.465596,0.0,0.0,0.0,168.123645,460.962673,97.896704,40.617498,0.0,0.0,0.0,170.387476,443.523160,96.642024,39.485418,0.0,0.0,0.0,170.247559,466.637114,99.085938,40.677375,0.0,1.0,0.0,167.883813,464.077661,100.804475,41.573803,0.0,0.0,0.0,173.415541,453.855680,99.103688,40.239520,0.0,0.0,0.0,168.585443,462.184849,102.744134,39.818608,0.0,0.0,0.0,175.398618,430.006778,99.244449,40.347964,0.0,0.0,0.0,164.650383,453.242663,100.452767,40.568980,0.0,0.0,0.0,175.250463,455.747295,99.739651,42.068915,0.0,0.0,0.0,162.438706,471.001357,99.936564,38.863403,0.0,0.0,0.0,167.760162,439.457458,101.088686,40.514435,0.0,0.0,0.0,171.438689,459.360961,96.504847,39.930355,0.0,0.0,0.0,168.964626,440.215638,100.169167,38.904080,0.0,0.0,0.0,168.858880,452.719653,99.520591,39.353887,0.0,0.0,0.0,168.710438,451.490999,99.776836,39.836434,0.0,0.0,0.0,170.292763,441.539577,99.573166,39.292405,0.0,1.0,0.0,166.637041,443.320475,98.053732,39.556574,0.0,0.0,0.0,170.826845,463.537218,100.731194,40.040140,0.0,0.0,0.0,model4,5.0,2
25805,170.218415,442.862559,97.235825,39.833933,0.0,0.0,0.0,166.773917,444.424385,104.672300,40.226068,1.0,0.0,0.0,169.117488,443.270578,97.841588,38.587576,0.0,0.0,0.0,168.024492,450.043861,100.519463,38.483660,0.0,0.0,0.0,163.103840,461.869567,101.752225,41.404936,0.0,1.0,0.0,169.193818,437.951370,100.102722,38.240214,0.0,0.0,0.0,170.190953,397.490543,99.945501,39.168881,0.0,0.0,0.0,176.070574,358.657675,101.535161,40.373803,0.0,0.0,0.0,169.725411,434.403099,101.346433,39.395033,0.0,0.0,0.0,172.886352,451.443813,98.352119,38.875182,0.0,0.0,0.0,169.888388,449.671378,98.534311,39.465596,0.0,0.0,0.0,168.123645,460.962673,97.896704,40.617498,0.0,0.0,0.0,170.387476,443.523160,96.642024,39.485418,0.0,0.0,0.0,170.247559,466.637114,99.085938,40.677375,0.0,1.0,0.0,167.883813,464.077661,100.804475,41.573803,0.0,0.0,0.0,173.415541,453.855680,99.103688,40.239520,0.0,0.0,0.0,168.585443,462.184849,102.744134,39.818608,0.0,0.0,0.0,175.398618,430.006778,99.244449,40.347964,0.0,0.0,0.0,164.650383,453.242663,100.452767,40.568980,0.0,0.0,0.0,175.250463,455.747295,99.739651,42.068915,0.0,0.0,0.0,162.438706,471.001357,99.936564,38.863403,0.0,0.0,0.0,167.760162,439.457458,101.088686,40.514435,0.0,0.0,0.0,171.438689,459.360961,96.504847,39.930355,0.0,0.0,0.0,168.964626,440.215638,100.169167,38.904080,0.0,0.0,0.0,168.858880,452.719653,99.520591,39.353887,0.0,0.0,0.0,168.710438,451.490999,99.776836,39.836434,0.0,0.0,0.0,170.292763,441.539577,99.573166,39.292405,0.0,1.0,0.0,166.637041,443.320475,98.053732,39.556574,0.0,0.0,0.0,170.826845,463.537218,100.731194,40.040140,0.0,0.0,0.0,169.602066,376.676980,98.959072,42.700168,0.0,0.0,0.0,model4,5.0,1


# Data Preprocessing

In [14]:
# defining features and target
X = df_transformed.drop(columns="RUL")
y = df_transformed["RUL"]

In [15]:
# splitting dataset into training, validation and testing data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_validation, X_test, y_validation, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [16]:
# define numerical columns
num_cols = df_transformed.select_dtypes(float).columns
# define categorical columns
cat_cols = df_transformed.select_dtypes(object).columns
# create a column transformer for features
cols_transformer = ColumnTransformer((("num_transformer", StandardScaler(), num_cols),
                                    ("cat_transformer", OneHotEncoder(sparse_output=False), cat_cols)))

In [17]:
X_train_transformed = cols_transformer.fit_transform(X_train)
X_validation_transformed = cols_transformer.transform(X_validation)
X_test_transformed = cols_transformer.transform(X_test)

# Modeling & Evaluation

In [18]:
# define a beseline model
baseline_reg = DummyRegressor(strategy='mean')

In [19]:
# fit the baseline regressor
baseline_reg.fit(X_train_transformed, y_train)
# baseline model metrics
print(regression_metrics(baseline_reg, X_train_transformed, X_test_transformed, y_train, y_test))

Metric    Training Set    Test Set
--------  --------------  ------------
r2        0               -2.56448e-05
MSE       1471.37         1447.83
MAE       27.2855         26.8183
RMSE      38.3585         38.0504


In [20]:
# define two sets of inputs representing time steps and non time steps freatures
# train inputs
time_steps_train_input = X_train_transformed[:, :-5].reshape((-1,30,7))
non_time_steps_train_input = X_train_transformed[:, -5:]

# validation inputs
time_steps_validation_input = X_validation_transformed[:, :-5].reshape((-1,30,7))
non_time_steps_validation_input = X_validation_transformed[:, -5:]

# test inputs
time_steps_test_input = X_test_transformed[:, :-5].reshape((-1,30,7))
non_time_steps_test_input = X_test_transformed[:, -5:]

In [21]:
# define two sets of inputs
time_steps_inputs = Input(shape=(30,7))
non_time_steps_inputs = Input(shape=(5,))
# define the first branch operating on the first input
x = LSTM(100, activation="relu", return_sequences=True)(time_steps_inputs)
x = LSTM(20, activation="relu")(x)
x = Model(inputs=time_steps_inputs, outputs=x)
# tdefine he second branch opreating on the second input
y = Dense(64, activation="relu")(non_time_steps_inputs)
y = Dense(32, activation="relu")(y)
y = Dense(4, activation="relu")(y)
y = Model(inputs=non_time_steps_inputs, outputs=y)
# combine the output of the two branches
combined = concatenate([x.output, y.output], axis=1)
# apply a FC layer and then a regression prediction on the ombined outputs
z = Dense(2, activation="relu")(combined)
z = Dense(1, activation="relu")(z)
# our model will accept the inputs of the two branches and
# then output a single value
model = Model(inputs=[x.input, y.input], outputs=z)

In [22]:
# define callbacks for the baseline model
model_checkpoint = ModelCheckpoint(
    filepath="callbacks/model_checkpoint.keras",
    monitor="val_loss",
    mode="min",
    save_best_only=True)
backup = BackupAndRestore(backup_dir="callbacks/backup")
model_callbacks=[backup, model_checkpoint]

In [23]:
# compile model
model.compile(loss="mse",
              optimizer=Adam(learning_rate=0.0005),
              metrics=["mae"])

In [25]:
# train model
model_history = model.fit(x=[time_steps_train_input, non_time_steps_train_input],
        y=y_train,                         
        batch_size=32,
        epochs=200,
        validation_data=([time_steps_validation_input, non_time_steps_validation_input], y_validation),
        validation_batch_size=32,
        callbacks=model_callbacks)

Epoch 1/200
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 48ms/step - loss: 2199.6143 - mae: 30.3964 - val_loss: 1310.9596 - val_mae: 26.5136
Epoch 2/200
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 49ms/step - loss: 1316.7878 - mae: 25.9648 - val_loss: 1273.7075 - val_mae: 25.2925
Epoch 3/200
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 40ms/step - loss: 1350.5957 - mae: 26.1089 - val_loss: 1261.0151 - val_mae: 24.4785
Epoch 4/200
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 40ms/step - loss: 1277.0020 - mae: 25.2640 - val_loss: 1209.9214 - val_mae: 25.1461
Epoch 5/200
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 40ms/step - loss: 1232.3530 - mae: 25.0987 - val_loss: 1163.8735 - val_mae: 25.0704
Epoch 6/200
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 40ms/step - loss: 1170.0778 - mae: 24.4362 - val_loss: 1143.5859 - val_mae: 25.2835
Epoch 7/200
[1m565/56

In [26]:
print(regression_metrics(model, [time_steps_train_input, non_time_steps_train_input], 
                         [time_steps_test_input, non_time_steps_test_input], 
                         y_train, y_test))

[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 34ms/step
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step
Metric    Training Set    Test Set
--------  --------------  ----------
r2        0.997358        0.986066
MSE       3.88709         20.1735
MAE       1.43086         2.30149
RMSE      1.97157         4.4915
