# Base Model Training

In [35]:
import os
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [36]:
os.chdir(r"C:\Users\amman\Documents\MLOPS\Aircraft-Engine-Predictive-Maintenance")
os.getcwd()

'C:\\Users\\amman\\Documents\\MLOPS\\Aircraft-Engine-Predictive-Maintenance'

#### Load the data

In [37]:
train_df = pd.read_csv(r"data\processed\train_FD001_processed")
test_df = pd.read_csv(r"data\processed\test_FD001_processed")

In [38]:
train_df.head()

Unnamed: 0,sensor_2,sensor_3,sensor_4,sensor_7,sensor_8,sensor_9,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_17,sensor_20,sensor_21,RUL
0,-1.721725,-0.134255,-0.925936,1.121141,-0.516338,-0.862813,-0.266467,0.334262,-1.05889,-0.269071,-0.603816,-0.78171,1.348493,1.194427,191
1,-1.06178,0.211528,-0.643726,0.43193,-0.798093,-0.958818,-0.191583,1.174899,-0.363646,-0.642845,-0.275852,-0.78171,1.016528,1.236922,190
2,-0.661813,-0.413166,-0.525953,1.008155,-0.234584,-0.557139,-1.015303,1.364721,-0.919841,-0.551629,-0.649144,-2.073094,0.739891,0.503423,189
3,-0.661813,-1.261314,-0.784831,1.222827,0.188048,-0.713826,-1.539489,1.961302,-0.224597,-0.520176,-1.971665,-0.78171,0.352598,0.777792,188
4,-0.621816,-1.251528,-0.301518,0.714393,-0.516338,-0.457059,-0.977861,1.052871,-0.780793,-0.521748,-0.339845,-0.136018,0.463253,1.059552,187


In [39]:
test_df.head()

Unnamed: 0,sensor_2,sensor_3,sensor_4,sensor_7,sensor_8,sensor_9,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_17,sensor_20,sensor_21,RUL
0,-0.20185,-1.51739,-1.113706,1.188932,-0.234584,-0.400453,-1.165071,0.510525,-0.502695,-0.715188,-1.059765,-0.136018,-0.034694,0.605041,112
1,-0.261846,-0.641513,0.210682,0.172064,0.047171,-0.927118,0.48237,0.442731,-0.085548,-0.883465,0.22276,-1.427402,-0.034694,-0.25779,98
2,0.3981,-0.1261,1.106199,-0.878699,0.892435,-0.723788,1.268649,-0.791108,0.609696,-0.644417,-0.80646,1.155367,0.629236,-0.145087,69
3,0.198117,0.653544,-0.228188,-0.822207,0.469803,-0.631407,0.407486,0.632553,0.192549,-0.530136,0.566722,1.155367,-1.307226,-0.291971,82
4,-0.821799,-0.09511,1.158419,-0.087802,0.047171,-0.50959,-0.303908,-0.56061,0.748745,-0.944275,-0.158531,0.509675,-0.366659,1.126989,91


#### Create Train and Test splits

In [41]:
X_train = train_df
y_train = X_train.pop("RUL")
X_test = test_df 
y_test = X_test.pop("RUL")

#### Create model evauluation function

In [42]:
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2 = r2_score(true, predicted)
    
    return mae, mse, rmse, r2

#### Model Training

In [47]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "KNN Regressor": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "AdaBoost Regressor": AdaBoostRegressor(),
    "XGBRegressor": XGBRegressor(),
    "CatBoosting Regressor": CatBoostRegressor(verbose=False)
}

model_list = []
model_metrics = {}

for i in range(len(list(models))):

    # Train model
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    # Model predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluate training and test sets
    model_train_mae, model_train_mse, model_train_rmse, model_train_r2 = evaluate_model(y_train, y_train_pred)
    model_test_mae, model_test_mse, model_test_rmse, model_test_r2 = evaluate_model(y_test, y_test_pred)

    print(list(models.keys())[i] + ":")
    model_list.append(list(models.keys())[i])

    model_metrics[model_list[-1]] = [model_train_mae, model_train_mse, model_train_rmse, model_train_r2, model_test_mae, model_test_mse, model_test_rmse, model_test_r2]
    


    print("Model Performance for Training Set")
    print("- Mean Absolute Error (MAE): {:.4f}".format(model_train_mae))
    print("- Mean Squared Error (MSE): {:.4f}".format(model_train_mse))
    print("- Root Mean Squared Error (RMSE) {:.4f}".format(model_train_rmse))
    print("- R2 Score: {:.4f}".format(model_train_r2))
    print("\n")
    print("Model Performance for Test Set")
    print("- Mean Absolute Error (MAE): {:.4f}".format(model_test_mae))
    print("- Mean Squared Error (MSE): {:.4f}".format(model_test_mse))
    print("- Root Mean Squared Error (RMSE) {:.4f}".format(model_test_rmse))
    print("- R2 Score: {:.4f}".format(model_test_r2))
    

    print('='*35)
    print("\n")




Linear Regression:
Model Performance for Training Set
- Mean Absolute Error (MAE): 34.1153
- Mean Squared Error (MSE): 1995.2473
- Root Mean Squared Error (RMSE) 44.6682
- R2 Score: 0.5794


Model Performance for Test Set
- Mean Absolute Error (MAE): 25.5403
- Mean Squared Error (MSE): 1020.9708
- Root Mean Squared Error (RMSE) 31.9526
- R2 Score: 0.4088


Ridge:
Model Performance for Training Set
- Mean Absolute Error (MAE): 34.1153
- Mean Squared Error (MSE): 1995.2473
- Root Mean Squared Error (RMSE) 44.6682
- R2 Score: 0.5794


Model Performance for Test Set
- Mean Absolute Error (MAE): 25.5401
- Mean Squared Error (MSE): 1020.9455
- Root Mean Squared Error (RMSE) 31.9522
- R2 Score: 0.4088


Lasso:
Model Performance for Training Set
- Mean Absolute Error (MAE): 34.1001
- Mean Squared Error (MSE): 1997.8162
- Root Mean Squared Error (RMSE) 44.6969
- R2 Score: 0.5789


Model Performance for Test Set
- Mean Absolute Error (MAE): 25.6284
- Mean Squared Error (MSE): 1024.7264
- Root Me

### Results

In [48]:
results = pd.DataFrame(model_metrics, index=["Train_MAE", "Train_MSE", "Train_RMSE", "Train_R2", "Test_MAE", "Test_MSE", "Test_RMSE", "Test_R2"]).T.sort_values("Test_RMSE")
results

Unnamed: 0,Train_MAE,Train_MSE,Train_RMSE,Train_R2,Test_MAE,Test_MSE,Test_RMSE,Test_R2
Ridge,34.115273,1995.247341,44.668192,0.579449,25.540133,1020.945492,31.952238,0.408788
Linear Regression,34.115287,1995.24734,44.668192,0.579449,25.540327,1020.970757,31.952633,0.408774
Lasso,34.100142,1997.816233,44.696938,0.578907,25.628435,1024.726444,32.011349,0.406599
CatBoosting Regressor,24.241091,1156.954235,34.01403,0.756141,23.58897,1096.759952,33.117366,0.364885
Random Forest Regressor,10.945419,243.300304,15.598087,0.948718,24.1315,1120.972617,33.480929,0.350864
KNN Regressor,26.285115,1378.162162,37.123607,0.709516,24.812,1150.0096,33.911791,0.33405
XGBRegressor,20.142942,809.382324,28.449645,0.829401,25.597263,1392.505615,37.316292,0.193624
AdaBoost Regressor,36.678372,2172.293008,46.607864,0.542132,37.028883,1986.285531,44.567763,-0.150223
Decision Tree,0.0,0.0,0.0,1.0,34.48,2295.94,47.915968,-0.329539
