In [None]:
import os
import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import numpy as np

china_wacc_file_path = 'Middle_EAST_WACC.xlsx'
china_wacc_data = pd.read_excel(china_wacc_file_path)

china_wacc_data.columns = china_wacc_data.columns.str.strip()  

print(china_wacc_data.columns)

china_wacc_data['Month'] = pd.to_datetime(china_wacc_data['Month'], format='%Y-%m')


projects = ['Onshore', 'Offshore', 'Nuclear', 'CCS', 'Coal', 'Gas', 'Solar', 'Hydro', 'Biomass']
energy_types = ['Coal Price', 'Gas Price', 'Oil Price']  


scaler = MinMaxScaler()
china_wacc_data[energy_types] = scaler.fit_transform(china_wacc_data[energy_types])

results = []

for energy_type in energy_types:
    for project in projects:
        target = project
        train_data = china_wacc_data[[energy_type, 'inflation rate', target]].dropna()  

        output_dir = 'AutogluonModels' 

        predictor = TabularPredictor(label=target, path=output_dir).fit(
            train_data,
            time_limit=600, 
            presets='best_quality',  
            hyperparameters={
                'GBM': {
                    'max_depth': 3,  
                    'learning_rate': 0.01, 
                    'num_boost_round': 100,  
                    'min_child_weight': 5  
                },
                'CAT': {
                    'depth': 3,  
                    'learning_rate': 0.01,  
                    'iterations': 100, 
                    'l2_leaf_reg': 20  
                },
                'NN_TORCH': {
                    'num_layers': 1,  
                    'hidden_size': 16, 
                    'dropout': 0.3 
                }
            },
            num_bag_folds=10,  
            keep_only_best=True,  
            auto_stack=True 
        )


        performance = predictor.evaluate(train_data)
        

        predictions = predictor.predict(train_data)
        mape = mean_absolute_percentage_error(train_data[target], predictions)
        mse = mean_squared_error(train_data[target], predictions)  
        rmse = np.sqrt(mse)  
        results.append({
            'Energy Type': energy_type,
            'Project': project,
            'R²': performance['r2'],
            'MAPE': mape,
            'RMSE': rmse
        })


results_df = pd.DataFrame(results)
results_df.to_csv('Middle_EAST_results.csv', index=False)


print(results_df)


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.11.4
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 25.0.0: Mon Aug 25 21:17:51 PDT 2025; root:xnu-12377.1.9~3/RELEASE_ARM64_T6020
CPU Count:          10
Memory Avail:       4.83 GB / 16.00 GB (30.2%)
Disk Space Avail:   174.20 GB / 460.43 GB (37.8%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=10, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked

Index(['Month', 'Onshore', 'Offshore', 'Nuclear', 'CCS', 'Coal', 'Gas',
       'Solar', 'Hydro', 'Biomass', 'Coal Price', 'Gas Price', 'Oil Price',
       'inflation rate'],
      dtype='object')


Leaderboard on holdout data (DyStack):
                 model  score_holdout  score_val              eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      LightGBM_BAG_L2      -0.002519  -0.003708  root_mean_squared_error        0.681349       0.009436  2.347934                 0.008184                0.002709           0.770131            2       True          4
1  WeightedEnsemble_L3      -0.002519  -0.003708  root_mean_squared_error        0.681794       0.009572  2.349732                 0.000445                0.000136           0.001798            3       True          6
2      LightGBM_BAG_L1      -0.002823  -0.004385  root_mean_squared_error        0.343144       0.003133  0.832955                 0.343144                0.003133           0.832955            1       True          1
3      CatBoost_BAG_L1      -0.002897  -0.004282  root_mean_squared_error        0.330021

   Energy Type   Project        R²      MAPE      RMSE
0   Coal Price   Onshore  0.540160  0.030195  0.003426
1   Coal Price  Offshore  0.593177  0.025625  0.002467
2   Coal Price   Nuclear  0.541950  0.016570  0.001753
3   Coal Price       CCS  0.531644  0.017739  0.001861
4   Coal Price      Coal  0.541603  0.015588  0.001740
5   Coal Price       Gas  0.541603  0.022143  0.001740
6   Coal Price     Solar  0.575663  0.032321  0.003059
7   Coal Price     Hydro  0.542209  0.018198  0.001724
8   Coal Price   Biomass  0.541603  0.015588  0.001740
9    Gas Price   Onshore  0.537959  0.029698  0.003434
10   Gas Price  Offshore  0.613396  0.023242  0.002405
11   Gas Price   Nuclear  0.538070  0.016729  0.001761
12   Gas Price       CCS  0.525473  0.017911  0.001873
13   Gas Price      Coal  0.534054  0.015802  0.001755
14   Gas Price       Gas  0.534054  0.022450  0.001755
15   Gas Price     Solar  0.603539  0.030525  0.002957
16   Gas Price     Hydro  0.533545  0.018524  0.001741
17   Gas P