# Base Model Training

In [1]:
import os
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [6]:
os.chdir(r"C:\Users\amman\Documents\MLOPS\Aircraft-Engine-Predictive-Maintenance")
os.getcwd()

'C:\\Users\\amman\\Documents\\MLOPS\\Aircraft-Engine-Predictive-Maintenance'

#### Load the data

In [7]:
train_df = pd.read_csv(r"data\processed\train_FD001_processed.txt")
test_df = pd.read_csv(r"data\processed\test_FD001_processed.txt")

In [8]:
train_df.head()

Unnamed: 0,unit,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,-0.31598,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-1.05889,-0.269071,-0.603816,-1.387779e-17,-0.78171,0.0,0.0,1.348493,1.194427,191
1,1,2,0.872722,-1.03172,0.0,0.0,-1.06178,0.211528,-0.643726,-1.776357e-15,...,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.78171,0.0,0.0,1.016528,1.236922,190
2,1,3,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0.0,0.0,0.739891,0.503423,189
3,1,4,0.32409,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.78171,0.0,0.0,0.352598,0.777792,188
4,1,5,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0.0,0.0,0.463253,1.059552,187


In [9]:
test_df.head()

Unnamed: 0,unit,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,31,-0.27026,1.35691,0.0,0.0,-0.20185,-1.51739,-1.113706,-1.776357e-15,...,-0.502695,-0.715188,-1.059765,-1.387779e-17,-0.136018,0.0,0.0,-0.034694,0.605041,112
1,2,49,0.827003,-0.349255,0.0,0.0,-0.261846,-0.641513,0.210682,-1.776357e-15,...,-0.085548,-0.883465,0.22276,-1.387779e-17,-1.427402,0.0,0.0,-0.034694,-0.25779,98
2,3,126,-0.727453,1.35691,0.0,0.0,0.3981,-0.1261,1.106199,-1.776357e-15,...,0.609696,-0.644417,-0.80646,-1.387779e-17,1.155367,0.0,0.0,0.629236,-0.145087,69
3,4,106,0.552687,1.35691,0.0,0.0,0.198117,0.653544,-0.228188,-1.776357e-15,...,0.192549,-0.530136,0.566722,-1.387779e-17,1.155367,0.0,0.0,-1.307226,-0.291971,82
4,5,98,-0.590295,-1.372953,0.0,0.0,-0.821799,-0.09511,1.158419,-1.776357e-15,...,0.748745,-0.944275,-0.158531,-1.387779e-17,0.509675,0.0,0.0,-0.366659,1.126989,91


#### Create Train and Test splits

In [10]:
X_train = train_df
y_train = X_train.pop("RUL")
X_test = test_df 
y_test = X_test.pop("RUL")

#### Create model evauluation function

In [11]:
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2 = r2_score(true, predicted)
    
    return mae, mse, rmse, r2

#### Model Training

In [12]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "KNN Regressor": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "AdaBoost Regressor": AdaBoostRegressor(),
    "XGBRegressor": XGBRegressor(),
    "CatBoosting Regressor": CatBoostRegressor(verbose=False)
}

model_list = []
model_metrics = {}

for i in range(len(list(models))):

    # Train model
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    # Model predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluate training and test sets
    model_train_mae, model_train_mse, model_train_rmse, model_train_r2 = evaluate_model(y_train, y_train_pred)
    model_test_mae, model_test_mse, model_test_rmse, model_test_r2 = evaluate_model(y_test, y_test_pred)

    print(list(models.keys())[i] + ":")
    model_list.append(list(models.keys())[i])

    model_metrics[model_list[-1]] = [model_train_mae, model_train_mse, model_train_rmse, model_train_r2, model_test_mae, model_test_mse, model_test_rmse, model_test_r2]
    


    print("Model Performance for Training Set")
    print("- Mean Absolute Error (MAE): {:.4f}".format(model_train_mae))
    print("- Mean Squared Error (MSE): {:.4f}".format(model_train_mse))
    print("- Root Mean Squared Error (RMSE) {:.4f}".format(model_train_rmse))
    print("- R2 Score: {:.4f}".format(model_train_r2))
    print("\n")
    print("Model Performance for Test Set")
    print("- Mean Absolute Error (MAE): {:.4f}".format(model_test_mae))
    print("- Mean Squared Error (MSE): {:.4f}".format(model_test_mse))
    print("- Root Mean Squared Error (RMSE) {:.4f}".format(model_test_rmse))
    print("- R2 Score: {:.4f}".format(model_test_r2))
    

    print('='*35)
    print("\n")




Linear Regression:
Model Performance for Training Set
- Mean Absolute Error (MAE): 30.4600
- Mean Squared Error (MSE): 1566.3166
- Root Mean Squared Error (RMSE) 39.5767
- R2 Score: 0.6699


Model Performance for Test Set
- Mean Absolute Error (MAE): 26.1235
- Mean Squared Error (MSE): 1007.4578
- Root Mean Squared Error (RMSE) 31.7405
- R2 Score: 0.4166


Ridge:
Model Performance for Training Set
- Mean Absolute Error (MAE): 30.4600
- Mean Squared Error (MSE): 1566.3166
- Root Mean Squared Error (RMSE) 39.5767
- R2 Score: 0.6699


Model Performance for Test Set
- Mean Absolute Error (MAE): 26.1234
- Mean Squared Error (MSE): 1007.4551
- Root Mean Squared Error (RMSE) 31.7404
- R2 Score: 0.4166


Lasso:
Model Performance for Training Set
- Mean Absolute Error (MAE): 30.5085
- Mean Squared Error (MSE): 1571.9015
- Root Mean Squared Error (RMSE) 39.6472
- R2 Score: 0.6687


Model Performance for Test Set
- Mean Absolute Error (MAE): 26.4835
- Mean Squared Error (MSE): 1024.5490
- Root Me

### Results

In [13]:
results = pd.DataFrame(model_metrics, index=["Train_MAE", "Train_MSE", "Train_RMSE", "Train_R2", "Test_MAE", "Test_MSE", "Test_RMSE", "Test_R2"]).T.sort_values("Test_RMSE")
results

Unnamed: 0,Train_MAE,Train_MSE,Train_RMSE,Train_R2,Test_MAE,Test_MSE,Test_RMSE,Test_R2
Ridge,30.460026,1566.316561,39.576717,0.669857,26.123426,1007.455088,31.740433,0.4166
Linear Regression,30.460028,1566.316561,39.576717,0.669857,26.123455,1007.457753,31.740475,0.416599
Lasso,30.508467,1571.90145,39.647212,0.66868,26.483495,1024.548959,32.008576,0.406702
KNN Regressor,13.855334,523.063873,22.87059,0.88975,28.148,1307.1104,36.153982,0.243075
Random Forest Regressor,3.676275,29.560737,5.436979,0.993769,24.6394,1323.413904,36.378756,0.233634
AdaBoost Regressor,32.184622,1474.914521,38.404616,0.689123,31.767638,1383.67858,37.19783,0.198736
XGBRegressor,4.353278,34.847675,5.903192,0.992655,29.567017,1571.758789,39.645413,0.089822
CatBoosting Regressor,4.256834,30.34055,5.508226,0.993605,31.37255,1775.54761,42.137247,-0.028189
Decision Tree,0.0,0.0,0.0,1.0,29.11,1783.89,42.236122,-0.03302
