# RESULTS

__Libraries__ 

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_columns', 100)

__Results__

In [2]:
# Imports the Results Dataframes
Linear_Regression_Models_Part1 =  pd.read_csv('Linear_Regression_Models_Part1_Results.csv', index_col = 'Metrics')
Linear_Regression_Models_Part2 =  pd.read_csv('Linear_Regression_Models_Part2_Results.csv', index_col = 'Metrics')
Linear_Regression_Models_Part3 =  pd.read_csv('Linear_Regression_Models_Part3_Results.csv', index_col = 'Metrics')
KNN_DT = pd.read_csv('KNN_DT_Results.csv', index_col = 'Metrics')
Ensemble_Methods_Part1 = pd.read_csv('Ensemble_Models_Part1_Results.csv', index_col = 'Metrics')
Ensemble_Methods_Part2 = pd.read_csv('Ensemble_Models_Part2_Results.csv', index_col = 'Metrics')
XGBoost = pd.read_csv('XGBoost_Results.csv', index_col = 'Metrics')
CatBoost = pd.read_csv('CatBoost_Results.csv', index_col = 'Metrics')
LightGBM = pd.read_csv('LightGBM_Results.csv', index_col = 'Metrics')
SVR = pd.read_csv('SVR_Results.csv', index_col = 'Metrics')

In [3]:
# Concatenating
Results = pd.concat([Linear_Regression_Models_Part1, Linear_Regression_Models_Part2, Linear_Regression_Models_Part3, 
                     KNN_DT, 
                     Ensemble_Methods_Part1,Ensemble_Methods_Part2, 
                     XGBoost, CatBoost, LightGBM, 
                     SVR ], axis = 1 )

Results = Results.transpose()
Results.fillna('-------', inplace = True)
Results

Metrics,R^2,Training_MSE,Training_MSE (KFold),Test_MSE
PCR,0.483,373.72,385.9,515.84
PCR (2nd Degree),0.497,363.41,396.3,473.87
Forward Selection,0.5,361.3,371.9,323.14
Backwards Elimination,0.507,356.0,366.5,311.26
Stepwise Selection,0.499,361.87,371.2,327.94
Ridge Regression,0.512,352.83,369.6,314.35
Lasso Regression,0.512,352.89,369.7,314.25
KNN,0.54,332.15,405.1,339.92
Decision Tree,0.331,483.48,524.4,445.63
Bagging,0.71,209.44,410.7,341.32


In [4]:
# Function the displays the top max or min values of a column, along their indices

def display_top_models(dataframe, column_name, n=5, mode='max'):
    # Retrieve the specified column from the DataFrame
    column = dataframe[column_name]
    
    # Sort the column based on mode (min or max) and retrieve the top n indices
    if mode == 'min':
        top_indices_values = column.nsmallest(n)
        top_indices = top_indices_values.index
        print("Min {} values':".format(n, column_name))
    elif mode == 'max':
        top_indices_values = column.nlargest(n)
        top_indices = top_indices_values.index
        print("Max {} values :".format(n, column_name))
    else:
        print("Invalid mode. Please choose 'min' or 'max'.")
        return
    
    # Create a DataFrame to store the top indices and their corresponding values
    top_indices_df = pd.DataFrame({column_name: top_indices_values, 'Value': dataframe.loc[top_indices, column_name]})
    top_indices_df.drop(columns = ['Value'], inplace = True)
    
    return top_indices_df

__Inspecting R^2__

In [5]:
display_top_models(Results, 'R^2')

Max 5 values :


Unnamed: 0,R^2
AdaBoost,1.0
XGBoost,0.886
LightGBM,0.839
Bagging,0.71
CatBoost,0.685


In [6]:
display_top_models(Results, 'R^2', mode = 'min')

Min 5 values':


Unnamed: 0,R^2
Gradient Boosting,0.196
Decision Tree,0.331
Random Forest,0.4
PCR,0.483
PCR (2nd Degree),0.497


__Inspecting Training_MSE__

In [7]:
display_top_models(Results, 'Training_MSE', mode = 'min')

Min 5 values':


Unnamed: 0,Training_MSE
AdaBoost,0.1
XGBoost,82.18
LightGBM,116.43
Bagging,209.44
CatBoost,228.04


In [8]:
display_top_models(Results, 'Training_MSE')

Max 5 values :


Unnamed: 0,Training_MSE
Gradient Boosting,580.95
Decision Tree,483.48
Random Forest,433.72
PCR,373.72
PCR (2nd Degree),363.41


__Inspecting  Training_MSE (KFold)__

In [9]:
display_top_models(Results, 'Training_MSE (KFold)', mode = 'min')

Min 5 values':


Unnamed: 0,Training_MSE (KFold)
LightGBM,275.2
XGBoost,319.1
CatBoost,336.0
AdaBoost,363.1
Backwards Elimination,366.5


In [10]:
display_top_models(Results, 'Training_MSE (KFold)')

Max 5 values :


Unnamed: 0,Training_MSE (KFold)
Gradient Boosting,635.7
Decision Tree,524.4
Random Forest,456.7
Bagging,410.7
KNN,405.1


__Inspecting  Test_MSE__

In [11]:
display_top_models(Results, 'Test_MSE', mode = 'min')

Min 5 values':


Unnamed: 0,Test_MSE
LightGBM,200.46
XGBoost,253.82
CatBoost,280.97
AdaBoost,295.11
SVR,309.22


In [12]:
display_top_models(Results, 'Test_MSE')

Max 5 values :


Unnamed: 0,Test_MSE
Gradient Boosting,534.67
PCR,515.84
PCR (2nd Degree),473.87
Decision Tree,445.63
Random Forest,375.03


__CONCLUSIONS__

The optimized boosting algorithms, namely XGBoost, LightGBM, and CatBoost, stand out as superior performers among the models evaluated. These algorithms demonstrate remarkable consistency in avoiding overfitting or underfitting, delivering high levels of prediction accuracy as evidenced by their cross-validated and test error metrics.

Among these, LightGBM emerges as particularly promising, as it achieves the smallest prediction error among the evaluated models. This underscores its potential as a robust and effective algorithm for the given task.