In [1]:
# Importing required libraries 
import pickle
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="darkgrid")
sns.set(style='ticks')
sns.set_style('white')
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")


# Load resampled_data from the pickle file
def load_from_pickle(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    return data

# Splitting the data into train and test sets
def create_train_test_split(resampled_data):
    
     # Create a RandomState instance with a specific seed
    random_state = np.random.RandomState(seed=42)
    
    # Train-test split without shuffling
    train_data, test_data = train_test_split(resampled_data, shuffle=False, random_state=random_state)
    
    # Define input and output columns
    input_columns = ['Month', 'Day', 'Hour','Precipitation', 'AirTemperature', 'WetBulbTemperature','DewTemperature','RelativeHumidity%','SeaPressure',
                     'StationPressure','WeekOfMonth', 'Quarter', 'DayOfYear', 'TemperatureDifference','PressureDifference','DayNightTempDifference',
                     'DayOfWeek_Friday', 'DayOfWeek_Monday', 'DayOfWeek_Saturday','DayOfWeek_Sunday', 'DayOfWeek_Thursday', 'DayOfWeek_Tuesday',
                     'DayOfWeek_Wednesday', 'Season_Autumn', 'Season_Spring','Season_Summer', 'Season_Winter','TimeOfDay_Afternoon','TimeOfDay_Evening',
                     'TimeOfDay_Morning', 'TimeOfDay_Night', 'IsWeekend_False', 'IsWeekend_True', 'TemperatureLevel_Cold', 'TemperatureLevel_Hot',
                     'TemperatureLevel_Mild', 'TemperatureLevel_Very Cold','TemperatureLevel_Warm', 'PrecipitationLevel_Heavy Precipitation',
                     'PrecipitationLevel_Light Precipitation','PrecipitationLevel_Moderate Precipitation','PrecipitationLevel_No Precipitation',
                     'PressureLevel_High Pressure','PressureLevel_Low Pressure','PressureLevel_Normal Pressure','HumidityLevel_High Moist',
                     'HumidityLevel_Low Moist','HumidityLevel_Moderate Moist']
    output_columns = ["TotalDemand"]

    # Select input and output data with "Region" for training data
    train_input_data = train_data[input_columns + ["Region"]]
    train_output_data = train_data[output_columns + ["Region"]]

    # Select input and output data with "Region" for testing data
    test_input_data = test_data[input_columns + ["Region"]]
    test_output_data = test_data[output_columns + ["Region"]]
    
    return train_input_data, train_output_data, test_input_data, test_output_data


def ExtraTrees(train_input_data, train_output_data):
    
    ET_trained = {}  # Dictionary to store trained models for each region
    
    # Iterate over each region in the training data
    for region, dataframe in train_input_data.groupby("Region"):
        
        # Select the data for the current region
        region_train_input = dataframe.drop(columns=["Region"])
        region_train_output = train_output_data[train_output_data["Region"] == region].drop(columns=["Region"])
        
        # Initialize and train the ExtraTreesRegressor model
        regressor = ExtraTreesRegressor(random_state=42, n_jobs=-1)
        regressor.fit(region_train_input, region_train_output.values.flatten())
        
        # Store the trained model in the dictionary
        ET_trained[region] = regressor
    
    return ET_trained

def get_predictions(trained_models_et, test_input_data, test_output_data, train_input_data=None):
    
    test_predictions_ET = {}  # Dictionary to store test predictions for each region
    train_predictions_ET = {}  # Dictionary to store train predictions for each region
    
    # Iterate over each region
    for region, model in trained_models_et.items():
        
        # Retrieve the test input and output data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        region_test_output = test_output_data[test_output_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data
        test_pred = model.predict(region_test_input)
        test_predictions_ET[region] = test_pred
        
        if train_input_data is not None:
            
            # If train data is provided, generate predictions on train data as well
            region_train_input = train_input_data[train_input_data["Region"] == region].drop(columns=["Region"])
            train_pred = model.predict(region_train_input)
            train_predictions_ET[region] = train_pred
    
    return test_predictions_ET, train_predictions_ET

def evaluate_model(test_output_data, test_predictions_ET, train_output_data, train_predictions_ET):
    
    # Initialize dictionaries to store evaluation metrics for test and train data
    evaluation_metrics_test_ET = {}
    evaluation_metrics_train_ET = {}

    # Calculate evaluation metrics for test data
    for region in test_output_data["Region"].unique():
        actual_test = test_output_data[test_output_data["Region"] == region]["TotalDemand"].values
        predicted_test = test_predictions_ET[region]
        if len(actual_test) == len(predicted_test):  # Ensure the same number of samples
            mse_test = mean_squared_error(actual_test, predicted_test)
            rmse_test = np.sqrt(mse_test)
            mae_test = mean_absolute_error(actual_test, predicted_test)
            r2_test = r2_score(actual_test, predicted_test)
            n = len(actual_test)
            # Number of predictors used in the model (excluding the intercept)
            p_test = test_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_test = 1 - (1 - r2_test) * (n - 1) / (n - p_test - 1)
            evaluation_metrics_test_ET[region] = {"MSE": mse_test, "RMSE": rmse_test, "MAE": mae_test, "R-squared": r2_test, "Adjusted R-squared": adjusted_r2_test}

    # Calculate evaluation metrics for train data
    for region in train_output_data["Region"].unique():
        actual_train = train_output_data[train_output_data["Region"] == region]["TotalDemand"].values
        predicted_train = train_predictions_ET[region]
        if len(actual_train) == len(predicted_train):  # Ensure the same number of samples
            mse_train = mean_squared_error(actual_train, predicted_train)
            rmse_train = np.sqrt(mse_train)
            mae_train = mean_absolute_error(actual_train, predicted_train)
            r2_train = r2_score(actual_train, predicted_train)
            n = len(actual_train)
            # Number of predictors used in the model (excluding the intercept)
            p_train = train_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_train = 1 - (1 - r2_train) * (n - 1) / (n - p_train - 1)
            evaluation_metrics_train_ET[region] = {"MSE": mse_train, "RMSE": rmse_train, "MAE": mae_train, "R-squared": r2_train, "Adjusted R-squared": adjusted_r2_train}

    return evaluation_metrics_test_ET, evaluation_metrics_train_ET

def GradientBoosting(train_input_data, train_output_data):
    
    GBR_trained = {}  # Dictionary to store trained models for each region
    
    # Iterate over each region in the training data
    for region, dataframe in train_input_data.groupby("Region"):
        
        # Select the data for the current region
        region_train_input = dataframe.drop(columns=["Region"])
        region_train_output = train_output_data[train_output_data["Region"] == region].drop(columns=["Region"])
        
        # Initialize and train the GradientBoostingRegressor model
        regressor = GradientBoostingRegressor(random_state=42)
        regressor.fit(region_train_input, region_train_output.values.flatten())
        
        # Store the trained model in the dictionary
        GBR_trained[region] = regressor
    
    return GBR_trained

def get_predictions(trained_models_gbr, test_input_data, test_output_data, train_input_data=None):
    
    test_predictions_GBR = {}  # Dictionary to store test predictions for each region
    train_predictions_GBR = {}  # Dictionary to store train predictions for each region
    
    # Iterate over each region
    for region, model in trained_models_gbr.items():
        
        # Retrieve the test input and output data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        region_test_output = test_output_data[test_output_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data
        test_pred = model.predict(region_test_input)
        test_predictions_GBR[region] = test_pred
        
        if train_input_data is not None:
            
            # If train data is provided, generate predictions on train data as well
            region_train_input = train_input_data[train_input_data["Region"] == region].drop(columns=["Region"])
            train_pred = model.predict(region_train_input)
            train_predictions_GBR[region] = train_pred
    
    return test_predictions_GBR, train_predictions_GBR

def evaluate_model(test_output_data, test_predictions_GBR, train_output_data, train_predictions_GBR):
    
    # Initialize dictionaries to store evaluation metrics for test and train data
    evaluation_metrics_test_GBR = {}
    evaluation_metrics_train_GBR = {}

    # Calculate evaluation metrics for test data
    for region in test_output_data["Region"].unique():
        actual_test = test_output_data[test_output_data["Region"] == region]["TotalDemand"].values
        predicted_test = test_predictions_GBR[region]
        if len(actual_test) == len(predicted_test):  # Ensure the same number of samples
            mse_test = mean_squared_error(actual_test, predicted_test)
            rmse_test = np.sqrt(mse_test)
            mae_test = mean_absolute_error(actual_test, predicted_test)
            r2_test = r2_score(actual_test, predicted_test)
            n = len(actual_test)
            # Number of predictors used in the model (excluding the intercept)
            p_test = test_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_test = 1 - (1 - r2_test) * (n - 1) / (n - p_test - 1)
            evaluation_metrics_test_GBR[region] = {"MSE": mse_test, "RMSE": rmse_test, "MAE": mae_test, "R-squared": r2_test, "Adjusted R-squared": adjusted_r2_test}

    # Calculate evaluation metrics for train data
    for region in train_output_data["Region"].unique():
        actual_train = train_output_data[train_output_data["Region"] == region]["TotalDemand"].values
        predicted_train = train_predictions_GBR[region]
        if len(actual_train) == len(predicted_train):  # Ensure the same number of samples
            mse_train = mean_squared_error(actual_train, predicted_train)
            rmse_train = np.sqrt(mse_train)
            mae_train = mean_absolute_error(actual_train, predicted_train)
            r2_train = r2_score(actual_train, predicted_train)
            n = len(actual_train)
            # Number of predictors used in the model (excluding the intercept)
            p_train = train_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_train = 1 - (1 - r2_train) * (n - 1) / (n - p_train - 1)
            evaluation_metrics_train_GBR[region] = {"MSE": mse_train, "RMSE": rmse_train, "MAE": mae_train, "R-squared": r2_train, "Adjusted R-squared": adjusted_r2_train}

    return evaluation_metrics_test_GBR, evaluation_metrics_train_GBR

def XGBoost(train_input_data, train_output_data):
    
    XGB_trained = {}  # Dictionary to store trained models for each region
    
    # Iterate over each region in the training data
    for region, dataframe in train_input_data.groupby("Region"):
        
        # Select the data for the current region
        region_train_input = dataframe.drop(columns=["Region"])
        region_train_output = train_output_data[train_output_data["Region"] == region].drop(columns=["Region"])
        
        # Initialize and train the XGBoostRegressor model
        regressor = XGBRegressor(objective='reg:squarederror', random_state=42, n_jobs=-1)
        regressor.fit(region_train_input, region_train_output.values.flatten())
        
        # Store the trained model in the dictionary
        XGB_trained[region] = regressor
    
    return XGB_trained

def get_predictions(trained_models_xgb, test_input_data, test_output_data, train_input_data=None):
    
    test_predictions_XGB = {}  # Dictionary to store test predictions for each region
    train_predictions_XGB = {}  # Dictionary to store train predictions for each region
    
    # Iterate over each region
    for region, model in trained_models_xgb.items():
        
        # Retrieve the test input and output data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        region_test_output = test_output_data[test_output_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data
        test_pred = model.predict(region_test_input)
        test_predictions_XGB[region] = test_pred
        
        if train_input_data is not None:
            
            # If train data is provided, generate predictions on train data as well
            region_train_input = train_input_data[train_input_data["Region"] == region].drop(columns=["Region"])
            train_pred = model.predict(region_train_input)
            train_predictions_XGB[region] = train_pred
    
    return test_predictions_XGB, train_predictions_XGB

def evaluate_model(test_output_data, test_predictions_XGB, train_output_data, train_predictions_XGB):
    
    # Initialize dictionaries to store evaluation metrics for test and train data
    evaluation_metrics_test_XGB = {}
    evaluation_metrics_train_XGB = {}

    # Calculate evaluation metrics for test data
    for region in test_output_data["Region"].unique():
        actual_test = test_output_data[test_output_data["Region"] == region]["TotalDemand"].values
        predicted_test = test_predictions_XGB[region]
        if len(actual_test) == len(predicted_test):  # Ensure the same number of samples
            mse_test = mean_squared_error(actual_test, predicted_test)
            rmse_test = np.sqrt(mse_test)
            mae_test = mean_absolute_error(actual_test, predicted_test)
            r2_test = r2_score(actual_test, predicted_test)
            n = len(actual_test)
            # Number of predictors used in the model (excluding the intercept)
            p_test = test_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_test = 1 - (1 - r2_test) * (n - 1) / (n - p_test - 1)
            evaluation_metrics_test_XGB[region] = {"MSE": mse_test, "RMSE": rmse_test, "MAE": mae_test, "R-squared": r2_test, "Adjusted R-squared": adjusted_r2_test}

    # Calculate evaluation metrics for train data
    for region in train_output_data["Region"].unique():
        actual_train = train_output_data[train_output_data["Region"] == region]["TotalDemand"].values
        predicted_train = train_predictions_XGB[region]
        if len(actual_train) == len(predicted_train):  # Ensure the same number of samples
            mse_train = mean_squared_error(actual_train, predicted_train)
            rmse_train = np.sqrt(mse_train)
            mae_train = mean_absolute_error(actual_train, predicted_train)
            r2_train = r2_score(actual_train, predicted_train)
            n = len(actual_train)
            # Number of predictors used in the model (excluding the intercept)
            p_train = train_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_train = 1 - (1 - r2_train) * (n - 1) / (n - p_train - 1)
            evaluation_metrics_train_XGB[region] = {"MSE": mse_train, "RMSE": rmse_train, "MAE": mae_train, "R-squared": r2_train, "Adjusted R-squared": adjusted_r2_train}

    return evaluation_metrics_test_XGB, evaluation_metrics_train_XGB

def LightGBM(train_input_data, train_output_data):
    
    LGBM_trained = {}  # Dictionary to store trained models for each region
    
    # Iterate over each region in the training data
    for region, dataframe in train_input_data.groupby("Region"):
        
        # Select the data for the current region
        region_train_input = dataframe.drop(columns=["Region"])
        region_train_output = train_output_data[train_output_data["Region"] == region].drop(columns=["Region"])
        
        # Initialize and train the LightGBM Regressor model
        regressor = LGBMRegressor(random_state=42, verbose=-1)
        regressor.fit(region_train_input, region_train_output.values.flatten())
        
        # Store the trained model in the dictionary
        LGBM_trained[region] = regressor
    
    return LGBM_trained

def get_predictions(trained_models_lgbm, test_input_data, test_output_data, train_input_data=None):
    
    test_predictions_LGBM = {}  # Dictionary to store test predictions for each region
    train_predictions_LGBM = {}  # Dictionary to store train predictions for each region
    
    # Iterate over each region
    for region, model in trained_models_lgbm.items():
        
        # Retrieve the test input and output data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        region_test_output = test_output_data[test_output_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data
        test_pred = model.predict(region_test_input)
        test_predictions_LGBM[region] = test_pred
        
        if train_input_data is not None:
            
            # If train data is provided, generate predictions on train data as well
            region_train_input = train_input_data[train_input_data["Region"] == region].drop(columns=["Region"])
            train_pred = model.predict(region_train_input)
            train_predictions_LGBM[region] = train_pred
    
    return test_predictions_LGBM, train_predictions_LGBM

def evaluate_model(test_output_data, test_predictions_LGBM, train_output_data, train_predictions_LGBM):
    
    # Initialize dictionaries to store evaluation metrics for test and train data
    evaluation_metrics_test_LGBM = {}
    evaluation_metrics_train_LGBM = {}

    # Calculate evaluation metrics for test data
    for region in test_output_data["Region"].unique():
        actual_test = test_output_data[test_output_data["Region"] == region]["TotalDemand"].values
        predicted_test = test_predictions_LGBM[region]
        if len(actual_test) == len(predicted_test):  # Ensure the same number of samples
            mse_test = mean_squared_error(actual_test, predicted_test)
            rmse_test = np.sqrt(mse_test)
            mae_test = mean_absolute_error(actual_test, predicted_test)
            r2_test = r2_score(actual_test, predicted_test)
            n = len(actual_test)
            # Number of predictors used in the model (excluding the intercept)
            p_test = test_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_test = 1 - (1 - r2_test) * (n - 1) / (n - p_test - 1)
            evaluation_metrics_test_LGBM[region] = {"MSE": mse_test, "RMSE": rmse_test, "MAE": mae_test, "R-squared": r2_test, "Adjusted R-squared": adjusted_r2_test}

    # Calculate evaluation metrics for train data
    for region in train_output_data["Region"].unique():
        actual_train = train_output_data[train_output_data["Region"] == region]["TotalDemand"].values
        predicted_train = train_predictions_LGBM[region]
        if len(actual_train) == len(predicted_train):  # Ensure the same number of samples
            mse_train = mean_squared_error(actual_train, predicted_train)
            rmse_train = np.sqrt(mse_train)
            mae_train = mean_absolute_error(actual_train, predicted_train)
            r2_train = r2_score(actual_train, predicted_train)
            n = len(actual_train)
            # Number of predictors used in the model (excluding the intercept)
            p_train = train_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_train = 1 - (1 - r2_train) * (n - 1) / (n - p_train - 1)
            evaluation_metrics_train_LGBM[region] = {"MSE": mse_train, "RMSE": rmse_train, "MAE": mae_train, "R-squared": r2_train, "Adjusted R-squared": adjusted_r2_train}

    return evaluation_metrics_test_LGBM, evaluation_metrics_train_LGBM

def CatBoost(train_input_data, train_output_data):
    
    CBR_trained = {}  # Dictionary to store trained models for each region
    
    # Iterate over each region in the training data
    for region, dataframe in train_input_data.groupby("Region"):
        
        # Select the data for the current region
        region_train_input = dataframe.drop(columns=["Region"])
        region_train_output = train_output_data[train_output_data["Region"] == region].drop(columns=["Region"])
        
        # Initialize and train the CatBoostRegressor model
        regressor = CatBoostRegressor(random_state=42, silent=True)
        regressor.fit(region_train_input, region_train_output.values.flatten())
        
        # Store the trained model in the dictionary
        CBR_trained[region] = regressor
    
    return CBR_trained

def get_predictions(trained_models_cbr, test_input_data, test_output_data, train_input_data=None):
    
    test_predictions_CBR = {}  # Dictionary to store test predictions for each region
    train_predictions_CBR = {}  # Dictionary to store train predictions for each region
    
    # Iterate over each region
    for region, model in trained_models_cbr.items():
        
        # Retrieve the test input and output data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        region_test_output = test_output_data[test_output_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data
        test_pred = model.predict(region_test_input)
        test_predictions_CBR[region] = test_pred
        
        if train_input_data is not None:
            
            # If train data is provided, generate predictions on train data as well
            region_train_input = train_input_data[train_input_data["Region"] == region].drop(columns=["Region"])
            train_pred = model.predict(region_train_input)
            train_predictions_CBR[region] = train_pred
    
    return test_predictions_CBR, train_predictions_CBR

def evaluate_model(test_output_data, test_predictions_CBR, train_output_data, train_predictions_CBR):
    
    # Initialize dictionaries to store evaluation metrics for test and train data
    evaluation_metrics_test_CBR = {}
    evaluation_metrics_train_CBR = {}

    # Calculate evaluation metrics for test data
    for region in test_output_data["Region"].unique():
        actual_test = test_output_data[test_output_data["Region"] == region]["TotalDemand"].values
        predicted_test = test_predictions_CBR[region]
        if len(actual_test) == len(predicted_test):  # Ensure the same number of samples
            mse_test = mean_squared_error(actual_test, predicted_test)
            rmse_test = np.sqrt(mse_test)
            mae_test = mean_absolute_error(actual_test, predicted_test)
            r2_test = r2_score(actual_test, predicted_test)
            n = len(actual_test)
            # Number of predictors used in the model (excluding the intercept)
            p_test = test_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_test = 1 - (1 - r2_test) * (n - 1) / (n - p_test - 1)
            evaluation_metrics_test_CBR[region] = {"MSE": mse_test, "RMSE": rmse_test, "MAE": mae_test, "R-squared": r2_test, "Adjusted R-squared": adjusted_r2_test}

    # Calculate evaluation metrics for train data
    for region in train_output_data["Region"].unique():
        actual_train = train_output_data[train_output_data["Region"] == region]["TotalDemand"].values
        predicted_train = train_predictions_CBR[region]
        if len(actual_train) == len(predicted_train):  # Ensure the same number of samples
            mse_train = mean_squared_error(actual_train, predicted_train)
            rmse_train = np.sqrt(mse_train)
            mae_train = mean_absolute_error(actual_train, predicted_train)
            r2_train = r2_score(actual_train, predicted_train)
            n = len(actual_train)
            # Number of predictors used in the model (excluding the intercept)
            p_train = train_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_train = 1 - (1 - r2_train) * (n - 1) / (n - p_train - 1)
            evaluation_metrics_train_CBR[region] = {"MSE": mse_train, "RMSE": rmse_train, "MAE": mae_train, "R-squared": r2_train, "Adjusted R-squared": adjusted_r2_train}

    return evaluation_metrics_test_CBR, evaluation_metrics_train_CBR

def ADABoost(train_input_data, train_output_data):
    
    ADAB_trained = {}  # Dictionary to store trained models for each region
    
    # Iterate over each region in the training data
    for region, dataframe in train_input_data.groupby("Region"):
        
        # Select the data for the current region
        region_train_input = dataframe.drop(columns=["Region"])
        region_train_output = train_output_data[train_output_data["Region"] == region].drop(columns=["Region"])
        
        # Initialize and train the ADABoost Regressor model
        regressor = AdaBoostRegressor(random_state=42)
        regressor.fit(region_train_input, region_train_output.values.flatten())
        
        # Store the trained model in the dictionary
        ADAB_trained[region] = regressor
    
    return ADAB_trained

def get_predictions(trained_models_adab, test_input_data, test_output_data, train_input_data=None):
    
    test_predictions_ADAB = {}  # Dictionary to store test predictions for each region
    train_predictions_ADAB = {}  # Dictionary to store train predictions for each region
    
    # Iterate over each region
    for region, model in trained_models_adab.items():
        
        # Retrieve the test input and output data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        region_test_output = test_output_data[test_output_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data
        test_pred = model.predict(region_test_input)
        test_predictions_ADAB[region] = test_pred
        
        if train_input_data is not None:
            
            # If train data is provided, generate predictions on train data as well
            region_train_input = train_input_data[train_input_data["Region"] == region].drop(columns=["Region"])
            train_pred = model.predict(region_train_input)
            train_predictions_ADAB[region] = train_pred
    
    return test_predictions_ADAB, train_predictions_ADAB

def evaluate_model(test_output_data, test_predictions_ADAB, train_output_data, train_predictions_ADAB):
    
    # Initialize dictionaries to store evaluation metrics for test and train data
    evaluation_metrics_test_ADAB = {}
    evaluation_metrics_train_ADAB = {}

    # Calculate evaluation metrics for test data
    for region in test_output_data["Region"].unique():
        actual_test = test_output_data[test_output_data["Region"] == region]["TotalDemand"].values
        predicted_test = test_predictions_ADAB[region]
        if len(actual_test) == len(predicted_test):  # Ensure the same number of samples
            mse_test = mean_squared_error(actual_test, predicted_test)
            rmse_test = np.sqrt(mse_test)
            mae_test = mean_absolute_error(actual_test, predicted_test)
            r2_test = r2_score(actual_test, predicted_test)
            n = len(actual_test)
            # Number of predictors used in the model (excluding the intercept)
            p_test = test_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_test = 1 - (1 - r2_test) * (n - 1) / (n - p_test - 1)
            evaluation_metrics_test_ADAB[region] = {"MSE": mse_test, "RMSE": rmse_test, "MAE": mae_test, "R-squared": r2_test, "Adjusted R-squared": adjusted_r2_test}

    # Calculate evaluation metrics for train data
    for region in train_output_data["Region"].unique():
        actual_train = train_output_data[train_output_data["Region"] == region]["TotalDemand"].values
        predicted_train = train_predictions_ADAB[region]
        if len(actual_train) == len(predicted_train):  # Ensure the same number of samples
            mse_train = mean_squared_error(actual_train, predicted_train)
            rmse_train = np.sqrt(mse_train)
            mae_train = mean_absolute_error(actual_train, predicted_train)
            r2_train = r2_score(actual_train, predicted_train)
            n = len(actual_train)
            # Number of predictors used in the model (excluding the intercept)
            p_train = train_output_data.shape[1] - 1  # Subtract 1 for the intercept
            adjusted_r2_train = 1 - (1 - r2_train) * (n - 1) / (n - p_train - 1)
            evaluation_metrics_train_ADAB[region] = {"MSE": mse_train, "RMSE": rmse_train, "MAE": mae_train, "R-squared": r2_train, "Adjusted R-squared": adjusted_r2_train}

    return evaluation_metrics_test_ADAB, evaluation_metrics_train_ADAB

def Stacked_Ensemble_Combine_Predictions(trained_models_xgb, trained_models_lgbm, test_input_data):
    
    # Dictionary to store test predictions for each region
    test_predictions_SE = {}  
    
    # Iterate over each region
    for region in trained_models_xgb.keys():
        # Retrieve the test input data for the current region
        region_test_input = test_input_data[test_input_data["Region"] == region].drop(columns=["Region"])
        
        # Generate predictions on test data using XGBoost model
        xgb_pred = trained_models_xgb[region].predict(region_test_input)
        
        # Generate predictions on test data using LightGBM model
        lgbm_pred = trained_models_lgbm[region].predict(region_test_input)
        
        # Combine predictions using a Random Forest regression meta-model
        ensemble_features = np.vstack((xgb_pred, lgbm_pred)).T
        meta_model = RandomForestRegressor()
        meta_model.fit(ensemble_features, xgb_pred)  # Using XGBoost predictions for training
        
        # Make predictions on test data
        ensemble_predictions = meta_model.predict(ensemble_features)
        test_predictions_SE[region] = ensemble_predictions
        
    return test_predictions_SE

def adjusted_r_model_comparison():
    
    # Set style
    sns.set(style="whitegrid")

    # Evaluation metrics data (R-squared for each model and region)
    regions = ['QLD', 'SA', 'TAS', 'VIC', 'NSW']
    models = ['ExtraTrees', 'Gradient Boosting', 'XGBoost', 'LightGBM', 'CatBoost', 'ADABoost', 'Stacked Ensemble']
    adj_r_squared_values = {
        'ExtraTrees': [0.7941652793981316, 0.39201131007634915, 0.7220470192873729, 0.4455541543409408, 0.8117077458148688],
        'Gradient Boosting': [0.7820569481377042, 0.3103845806089077, 0.7102251931692325, 0.37191294670349295, 0.7677917757114525],
        'XGBoost': [0.8234800188620719, 0.30479881442366774, 0.7178666881584719, 0.3655267180611894, 0.812876314028925],
        'LightGBM': [0.8268776836153175, 0.3851347340528738, 0.7411651844485945, 0.40101964061509543, 0.818068598082448],
        'CatBoost': [0.8404561562419313, 0.3720477987436638, 0.7339870551721315, 0.3807099585775112, 0.8279404458857167],
        'ADABoost': [0.6910033546058161, 0.24432329542299447, 0.6592185598678055, 0.22428957184409692, 0.5415415370185472],
        'Stacked Ensemble': [0.8234707592766461, 0.3047778966837662, 0.7178573604107441, 0.36553323830187334, 0.8128632983359723]
    }

    # Set the width of the bars
    bar_width = 0.16

    # Set the x locations for the groups
    x = np.arange(len(models))

    # Define a wider range of distinct colors for regions
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

    # Plotting
    plt.figure(figsize=(17, 8))

    for i, region in enumerate(regions):
        adj_r_squared_region = [adj_r_squared_values[model][i] for model in models]
        plt.bar(x + (i - 2) * bar_width, adj_r_squared_region, width=bar_width, label=region, color=colors[i])

    plt.xlabel('Models', fontsize=12, color='black')
    plt.ylabel('Adjusted R-squared value', fontsize=12, color='black')
    plt.title('Adjusted R-squared value Comparison for Different Models among Regions', fontweight="bold", fontsize=14, color='black')
    plt.xticks(x, models, color='black')
    plt.grid(True)
    plt.legend(title='Regions', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()