In [1]:
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the dataset you created
df = pd.read_csv("descriptor_based_dataset.csv")
df = df.copy()
df = df[df['Kd'].notnull()]  # Remove NaN
df = df[np.isfinite(df['Kd'])]  # Remove inf/-inf

# Split data into train/test
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)


In [3]:
# Create AutoGluon predictor for regression (use MSE as metric, for example)
predictor_baseline = TabularPredictor(
    label='Kd',  # or 'Kd' depending on your target transform
    problem_type='regression',
    eval_metric='mean_squared_error'
).fit(
    train_data=train_data,
    time_limit=3600,
    presets='best_quality'
)

predictor_baseline.leaderboard(silent=True)

No path specified. Models will be saved in: "AutogluonModels\ag-20250506_124642"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.0
Python Version:     3.11.7
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
CPU Count:          12
Memory Avail:       0.42 GB / 7.42 GB (5.6%)
Disk Space Avail:   15.41 GB / 262.54 GB (5.9%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-1.08317,mean_squared_error,522.601166,2070.606627,0.003002,3.060996,3,True,14
1,CatBoost_BAG_L2,-1.094616,mean_squared_error,509.099316,1747.464475,0.746022,313.035524,2,True,11
2,NeuralNetFastAI_BAG_L2,-1.096302,mean_squared_error,511.803428,1703.245174,3.450133,268.816223,2,True,13
3,WeightedEnsemble_L2,-1.116098,mean_squared_error,6.26197,1053.284036,0.00214,1.786626,2,True,7
4,RandomForestMSE_BAG_L2,-1.120546,mean_squared_error,514.881524,1477.55717,6.52823,43.128219,2,True,10
5,ExtraTreesMSE_BAG_L2,-1.127092,mean_squared_error,511.873779,1442.565665,3.520484,8.136714,2,True,12
6,RandomForestMSE_BAG_L1,-1.15146,mean_squared_error,4.241178,14.643198,4.241178,14.643198,1,True,5
7,CatBoost_BAG_L1,-1.216908,mean_squared_error,1.27806,1036.116752,1.27806,1036.116752,1,True,6
8,LightGBMXT_BAG_L2,-1.260292,mean_squared_error,511.187376,1488.59317,2.834081,54.164219,2,True,8
9,LightGBMXT_BAG_L1,-1.444756,mean_squared_error,321.162151,228.648296,321.162151,228.648296,1,True,3


In [4]:
custom_hyperparams = {
    'FASTAI': {},
    'GBM': {},        # Covers LightGBMLarge, LightGBMXT, etc.
    'CAT': {},
    'XGB': {},        # In case XGBoost is considered
    'RF': {}          # Random Forests as a backup
}

predictor_tuned = TabularPredictor(
    label='Kd', 
    problem_type='regression',
    eval_metric='mean_squared_error'
).fit(
    train_data=train_data,
    hyperparameters=custom_hyperparams,
    time_limit=3600,  # adjust based on compute
    presets='best_quality'
)

predictor_tuned.fit_summary()

No path specified. Models will be saved in: "AutogluonModels\ag-20250506_134738"
Verbosity: 2 (Standard Logging)


AutoGluon Version:  1.3.0
Python Version:     3.11.7
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
CPU Count:          12
Memory Avail:       1.92 GB / 7.42 GB (25.9%)
Disk Space Avail:   14.64 GB / 262.54 GB (5.6%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of remaining time (25%).
		Context path: "c:\Users\NongNam\Documen

*** Summary of fit() ***
Estimated performance of each model:
                    model  score_val         eval_metric  pred_time_val     fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     WeightedEnsemble_L3  -1.089994  mean_squared_error     181.899813  2254.122085                0.023304           2.096485            3       True         10
1         LightGBM_BAG_L2  -1.098402  mean_squared_error     170.312796  1607.085340                0.479897          42.837988            2       True          6
2  NeuralNetFastAI_BAG_L2  -1.098679  mean_squared_error     173.645092  1820.699138                3.812193         256.451786            2       True          9
3         CatBoost_BAG_L2  -1.099015  mean_squared_error     170.214036  1914.015124                0.381137         349.767773            2       True          8
4     WeightedEnsemble_L2  -1.112834  mean_squared_error       5.579888  1278.664041                0.007324           1.64



{'model_types': {'LightGBM_BAG_L1': 'StackerEnsembleModel_LGB',
  'RandomForest_BAG_L1': 'StackerEnsembleModel_RF',
  'CatBoost_BAG_L1': 'StackerEnsembleModel_CatBoost',
  'NeuralNetFastAI_BAG_L1': 'StackerEnsembleModel_NNFastAiTabular',
  'WeightedEnsemble_L2': 'WeightedEnsembleModel',
  'LightGBM_BAG_L2': 'StackerEnsembleModel_LGB',
  'RandomForest_BAG_L2': 'StackerEnsembleModel_RF',
  'CatBoost_BAG_L2': 'StackerEnsembleModel_CatBoost',
  'NeuralNetFastAI_BAG_L2': 'StackerEnsembleModel_NNFastAiTabular',
  'WeightedEnsemble_L3': 'WeightedEnsembleModel'},
 'model_performance': {'LightGBM_BAG_L1': -1.7953837250423779,
  'RandomForest_BAG_L1': -1.1525568446670469,
  'CatBoost_BAG_L1': -1.1977104409506025,
  'NeuralNetFastAI_BAG_L1': -1.703904400676673,
  'WeightedEnsemble_L2': -1.112833884915612,
  'LightGBM_BAG_L2': -1.0984022457410685,
  'RandomForest_BAG_L2': -1.128506813488998,
  'CatBoost_BAG_L2': -1.099014898348514,
  'NeuralNetFastAI_BAG_L2': -1.0986792784833732,
  'WeightedEnsemb

In [5]:
predictor_tuned.leaderboard(silent=True)

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-1.089994,mean_squared_error,181.899813,2254.122085,0.023304,2.096485,3,True,10
1,LightGBM_BAG_L2,-1.098402,mean_squared_error,170.312796,1607.08534,0.479897,42.837988,2,True,6
2,NeuralNetFastAI_BAG_L2,-1.098679,mean_squared_error,173.645092,1820.699138,3.812193,256.451786,2,True,9
3,CatBoost_BAG_L2,-1.099015,mean_squared_error,170.214036,1914.015124,0.381137,349.767773,2,True,8
4,WeightedEnsemble_L2,-1.112834,mean_squared_error,5.579888,1278.664041,0.007324,1.648161,2,True,5
5,RandomForest_BAG_L2,-1.128507,mean_squared_error,177.203282,1602.968053,7.370383,38.720701,2,True,7
6,RandomForest_BAG_L1,-1.152557,mean_squared_error,4.218316,13.18141,4.218316,13.18141,1,True,2
7,CatBoost_BAG_L1,-1.19771,mean_squared_error,1.354248,1263.83447,1.354248,1263.83447,1,True,3
8,NeuralNetFastAI_BAG_L1,-1.703904,mean_squared_error,3.568819,155.873268,3.568819,155.873268,1,True,4
9,LightGBM_BAG_L1,-1.795384,mean_squared_error,160.691516,131.358204,160.691516,131.358204,1,True,1


In [6]:
predictor_ensemble = TabularPredictor(
    label='Kd',
    problem_type='regression',
    eval_metric='mean_squared_error'
).fit(
    train_data=train_data,
    hyperparameters=custom_hyperparams,
    num_bag_folds=5,
    num_stack_levels=2,
    time_limit=7200
)

No path specified. Models will be saved in: "AutogluonModels\ag-20250506_144828"


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.0
Python Version:     3.11.7
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22621
CPU Count:          12
Memory Avail:       1.47 GB / 7.42 GB (19.8%)
Disk Space Avail:   14.05 GB / 262.54 GB (5.4%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accuracy with very fast inference speed.
	presets='medium'

In [7]:
predictor_tuned.leaderboard(silent=True)

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,-1.089994,mean_squared_error,181.899813,2254.122085,0.023304,2.096485,3,True,10
1,LightGBM_BAG_L2,-1.098402,mean_squared_error,170.312796,1607.08534,0.479897,42.837988,2,True,6
2,NeuralNetFastAI_BAG_L2,-1.098679,mean_squared_error,173.645092,1820.699138,3.812193,256.451786,2,True,9
3,CatBoost_BAG_L2,-1.099015,mean_squared_error,170.214036,1914.015124,0.381137,349.767773,2,True,8
4,WeightedEnsemble_L2,-1.112834,mean_squared_error,5.579888,1278.664041,0.007324,1.648161,2,True,5
5,RandomForest_BAG_L2,-1.128507,mean_squared_error,177.203282,1602.968053,7.370383,38.720701,2,True,7
6,RandomForest_BAG_L1,-1.152557,mean_squared_error,4.218316,13.18141,4.218316,13.18141,1,True,2
7,CatBoost_BAG_L1,-1.19771,mean_squared_error,1.354248,1263.83447,1.354248,1263.83447,1,True,3
8,NeuralNetFastAI_BAG_L1,-1.703904,mean_squared_error,3.568819,155.873268,3.568819,155.873268,1,True,4
9,LightGBM_BAG_L1,-1.795384,mean_squared_error,160.691516,131.358204,160.691516,131.358204,1,True,1


In [8]:
predictor_tuned.save("autogluon_kd_predictor")