In [26]:
from sklearn.pipeline import Pipeline

In [27]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import FunctionTransformer
import xgboost as xgb


import pickle
import yaml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score

In [28]:
with open('./saved_models/Random Forest/rf_model.pkl', 'rb') as file:
        rf_model = pickle.load(file)

with open('./saved_models/XGBoost/xgb_model.pkl', 'rb') as f:
        xgb_model = pickle.load(f)

In [29]:
import importlib
import preprocess
importlib.reload(preprocess)
from preprocess import Preprocessor


with open("./saved_scalers/stdscaler.pkl", 'rb') as file:
        saved_scaler = pickle.load(file)

with open("./saved_models/PCA/pca_model.pkl", 'rb') as file:
        saved_pca = pickle.load(file)

p = Preprocessor(saved_std=saved_scaler)
pca = saved_pca

In [30]:
# Loading data
raw_test = pd.read_table("../Data/test_FD001.txt", sep=' ', header=None)
rul_values = pd.read_table("../Data/RUL_FD001.txt", names=['remaining_cycles'])

In [31]:
# Loading remaining cycles values
rul_values['unit']=rul_values.index+1
rul_values.count()

remaining_cycles    100
unit                100
dtype: int64

In [32]:
# Extracting current cycle values
raw_test_copy = raw_test.copy()
raw_test_copy.columns = ["unit", 'current_cycle'] + [f"{i}" for i in range(2, raw_test_copy.shape[1])]
current_cycle = raw_test_copy.groupby('unit').max().iloc[:,:1].reset_index()

In [33]:
# Calculating maximum cycles
max_cycle = rul_values.merge(current_cycle, on= 'unit', how='left')
max_cycle['max_cycle'] =  max_cycle['current_cycle'] + max_cycle['remaining_cycles']
max_cycle = max_cycle[['unit', 'max_cycle']]

In [34]:
# calculating rul = max cycles - current cycle
raw_test_copy['max_cycle'] = raw_test_copy.merge(max_cycle, on='unit', how='left')['max_cycle']
raw_test_copy['rul'] = raw_test_copy['max_cycle'] - raw_test_copy['current_cycle']
y_test = np.log1p(raw_test_copy['rul'])

In [35]:
preprocessing_pipeline = Pipeline([
    ("rename columns", FunctionTransformer(p.rename_columns, validate=False)),
    ("drop na", FunctionTransformer(p.dropna)),
    ("drop no info columns", FunctionTransformer(p.drop_no_info_cols)),
    ("dropping unit column", FunctionTransformer(p.drop_unit)),
    ("updating col groups", FunctionTransformer(p.update_col_groups)),    # returns the df from prev step
    ("Standardizing data", FunctionTransformer(p.standardize_predictors, kw_args={'cols':['cycles']+p.get_sensor_cols()+p.get_op_setting_cols()})),
    ("performing PCA(1)", pca)
])

In [36]:
processed_X_test = preprocessing_pipeline.fit_transform(raw_test)

In [None]:
rf_pred = rf_model.predict(processed_X_test)
mse = mean_squared_error(y_test, rf_pred)
rmse = np.sqrt(mse)

print("Random Forest Model Test results:")
print(f'Root Mean Squared Error: {rmse}')


Random Forest Model Test results:
Root Mean Squared Error: 122.42930119401544


In [39]:
xgb_pred = xgb_model.predict(processed_X_test)
mse = mean_squared_error(y_test, xgb_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, xgb_pred)

print("XGBoost Model Test results:")
print(f'Root Mean Squared Error: {rmse}')

XGBoost Model Test results:
Root Mean Squared Error: 120.80264180306887
