In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from autogluon.tabular import TabularPredictor
import os

In [2]:
def compute_volume_weighted_component_features(X):
    """
    Computes individual volume-weighted features WjPk = Componentj_fraction * Componentj_Propertyk
    for j in 1..5 and k in 1..10 (total 50 features).
    """
    features = {}
    for comp_idx in range(1, 6):  # Components 1–5
        for prop_idx in range(1, 11):  # Properties 1–10
            vol_col = f'Component{comp_idx}_fraction'
            prop_col = f'Component{comp_idx}_Property{prop_idx}'
            feat_name = f'W{comp_idx}P{prop_idx}'
            features[feat_name] = X[vol_col] * X[prop_col]
    return pd.DataFrame(features)

In [3]:
targets = [f"BlendProperty{i}" for i in range(1, 11)]
BASE_PATH = "/pscratch/sd/r/ritesh11/temp_dir/dataset"
model_dir = "/pscratch/sd/r/ritesh11/temp_dir/_models"
fi_path = "/pscratch/sd/r/ritesh11/temp_dir/feature_importance"

In [4]:
from autogluon.common import space

nn_options = {
    'num_epochs': 1000,
    "epochs_wo_improve": 200,
    'learning_rate': space.Real(1e-4, 1e-2, log=True, default=5e-4),
    "num_layers": space.Categorical(2, 3, 4, 6),
    "hidden_size": space.Categorical(128, 256, 512, 64),
    'activation': space.Categorical('relu','elu','tanh'),
    'dropout_prob': space.Real(0.0, 0.5, default=0.1),
    "weight_decay": space.Real(1e-12, 0.1, default=1e-6, log=True),
    'batch_size': space.Categorical(8,16,32,64,128),
    'optimizer': space.Categorical('adam', 'sgd'),
    "proc.skew_threshold": space.Categorical(0.99, 0.2, 0.3, 0.5, 0.8, 0.9, 0.999, 1.0, 10.0, 100.0),
    "use_batchnorm": space.Categorical(False, True),

}


In [7]:
for t in targets[2:3]:
    X_train = pd.read_csv(f"{BASE_PATH}/train/{t}_X.csv")
    y_train = pd.read_csv(f"{BASE_PATH}/train/{t}_y.csv")
    X_val = pd.read_csv(f"{BASE_PATH}/val/{t}_X.csv")
    y_val = pd.read_csv(f"{BASE_PATH}/val/{t}_y.csv")

    df = pd.read_csv(os.path.join(fi_path, f"{t}.csv"))
    cols = df[df["importance"] > 0.1].iloc[:, 0].tolist()

    
    scaler = StandardScaler()
    
    blend_features = compute_volume_weighted_component_features(X_train)
    X_train = pd.concat([X_train, blend_features], axis=1)
    blend_features = compute_volume_weighted_component_features(X_val)
    X_val = pd.concat([X_val, blend_features], axis=1)
    
    X_train = X_train[cols]
    X_val = X_val[cols]
    
    X_train = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=X_train.columns,
        index=X_train.index
    )
    
    X_val = pd.DataFrame(
        scaler.transform(X_val),
        columns=X_val.columns,
        index=X_val.index
    )

    predictor = TabularPredictor(
        label=t,
        problem_type="regression",
        eval_metric="mean_absolute_percentage_error",  # You can use "rmse", "r2", etc.
        path=f"/pscratch/sd/r/ritesh11/temp_dir/NN_models/{t}_stacked",
    )

    predictor.fit(
        train_data=pd.concat([X_train,y_train],axis=1),
        # tuning_data=pd.concat([X_val,y_val],axis=1),
        # hyperparameters={"NN_TORCH": nn_options},
        included_model_types = ['TABPFN'],
        auto_stack=True,
        dynamic_stacking=False,
        # num_stack_levels = 3,
        # hyperparameter_tune_kwargs={
        #         'num_trials': 100, 
        #         'scheduler': 'local',
        #         'searcher': 'auto',
        #     },
        fit_weighted_ensemble=True,
        # use_bag_holdout = True,
        # fit_strategy='parallel'
    )

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.2b20250713
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       172.68 GB / 502.97 GB (34.3%)
Disk Space Avail:   15446274.52 GB / 45921523.47 GB (33.6%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Go

RuntimeError: No models were trained successfully during fit(). Inspect the log output or increase verbosity to determine why no models were fit. Alternatively, set `raise_on_no_models_fitted` to False during the fit call.

In [6]:
from autogluon.tabular import models

In [1]:
import autogluon.tabular.models.TabMModel

ModuleNotFoundError: No module named 'autogluon.tabular.models.TabMModel'

In [17]:
BX_train = predictor.predict(X_train)
BX_train = BX_train.values.reshape(-1,1)

BX_val = predictor.predict(X_val)
BX_val = BX_val.values.reshape(-1,1)

In [18]:
BX_y_val = y_val.values

In [3]:
targets = [f"BlendProperty{i}" for i in range(1, 11)]

In [4]:
data = pd.read_csv("/pscratch/sd/r/ritesh11/temp_dir/dataset/train.csv")
X_test = pd.read_csv("/pscratch/sd/r/ritesh11/temp_dir/dataset/test.csv")
X_train = data.iloc[:,:55]
y = data.iloc[:,55:]

In [5]:
scaler = StandardScaler()

In [6]:
blend_features = compute_volume_weighted_component_features(X_train)
X_train = pd.concat([X_train, blend_features], axis=1)
X_train = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=X_train.columns,
        index=X_train.index
    )

blend_features = compute_volume_weighted_component_features(X_test)
X_test = pd.concat([X_test, blend_features], axis=1)
X_test = pd.DataFrame(
        scaler.transform(X_test.iloc[:,1:]),
        columns=X_train.columns,
        index=X_test.index
    )

In [7]:
predictors = []

In [8]:
X_test

Unnamed: 0,Component1_fraction,Component2_fraction,Component3_fraction,Component4_fraction,Component5_fraction,Component1_Property1,Component2_Property1,Component3_Property1,Component4_Property1,Component5_Property1,...,W5P1,W5P2,W5P3,W5P4,W5P5,W5P6,W5P7,W5P8,W5P9,W5P10
0,-0.004229,-0.812096,0.843232,0.197826,-0.430053,-0.178197,-0.719470,0.769188,-0.866653,0.615500,...,0.367271,-1.242069,-0.671839,0.138527,-0.312304,-0.060883,-0.931487,-0.390980,0.726688,-0.424760
1,-1.107444,1.937458,-1.081681,0.197826,0.193393,2.503180,0.193472,-0.501139,-0.190820,-1.907956,...,-1.803644,0.294397,0.171549,1.480426,0.157005,-0.080499,1.176110,0.544844,0.488176,-1.214671
2,-0.126808,-1.117602,-0.059071,1.119267,0.692150,1.548360,0.903237,0.028964,-0.361620,-0.273995,...,-0.342203,-1.064627,1.739907,-2.341116,1.844205,0.739504,0.190984,-0.260478,0.094617,-0.196013
3,1.957042,-1.117602,-0.059071,-1.290655,0.692150,-0.425024,1.027852,-1.186331,-0.843959,-0.804655,...,-0.999000,-0.466778,-2.234812,-0.499662,-0.363466,-2.383778,-0.545705,2.142631,-0.948798,2.114722
4,-1.107444,-1.117602,1.925996,1.119267,-1.427567,-0.187462,-0.740295,-0.476025,2.065007,0.768160,...,0.019313,-0.001666,0.009909,-0.013205,0.018452,-0.013888,0.013271,-0.005846,-0.019667,0.019179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,1.589304,-1.056501,-0.600452,0.481346,-0.679432,1.037410,1.424216,0.792701,-0.439057,0.410072,...,0.190544,-0.109562,-0.174514,0.551778,0.097216,-0.882690,-0.362998,-0.191655,-0.599596,0.378399
496,0.057061,1.754155,-0.901220,-0.794494,-0.430053,-1.306463,-1.494420,-0.992620,0.901859,1.040874,...,0.615029,-0.114417,0.824026,0.114048,-0.355249,-0.712562,-0.709778,0.589479,0.191558,-0.928177
497,1.528014,-1.056501,-0.359838,-0.936255,1.440286,0.807013,0.620821,0.357852,0.286144,1.040874,...,1.731995,2.071313,-1.261354,2.560755,0.346698,-2.273392,-1.925717,0.384031,-2.687372,-0.016989
498,-0.923575,-0.873197,1.444767,0.552226,-0.305364,-0.793042,0.687361,-1.787675,0.847314,0.181414,...,0.126330,0.809500,-0.769716,-0.780904,-0.786315,0.050920,-0.765468,1.131583,-1.256999,-0.479139


In [9]:
import os
import shutil

In [10]:
for t in targets:
    model_dir = f"/pscratch/sd/r/ritesh11/temp_dir/NN_models/{t}"
    out_dir = f"/pscratch/sd/r/ritesh11/temp_dir/NN_models/{t}_full"
    if os.path.exists(out_dir):
        shutil.rmtree(out_dir)
        print(f"DELETED {out_dir}")
    predictor = TabularPredictor.load(model_dir)
    hps = predictor.model_hyperparameters(predictor.model_best,output_format='all')
    predictor = TabularPredictor(
        label=t,
        problem_type="regression",
        eval_metric="mean_absolute_percentage_error", 
        path=out_dir,
    )
    predictor.fit(
        train_data=pd.concat([X_train,y[[t]]],axis=1),
        hyperparameters={"NN_TORCH": hps},
        auto_stack=False,
        dynamic_stacking=False,
        fit_weighted_ensemble=False,
        holdout_frac = 0,
        use_bag_holdout = False,
    )
    predictors.append(predictor)

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty1_full


  import pkg_resources
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       133.88 GB / 502.97 GB (26.6%)
Disk Space Avail:   15342264.55 GB / 45921523.47 GB (33.4%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty2_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       138.15 GB / 502.97 GB (27.5%)
Disk Space Avail:   15340406.16 GB / 45921523.47 GB (33.4%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty3_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       140.60 GB / 502.97 GB (28.0%)
Disk Space Avail:   15338327.11 GB / 45921523.47 GB (33.4%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty4_full


Beginning AutoGluon training ...
AutoGluon will save models to "/pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty4_full"
Train Data Rows:    2000
Train Data Columns: 105
Label Column:       BlendProperty4
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    143033.47 MB
	Train Data (Original)  Memory Usage: 1.60 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', [])

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty5_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       139.56 GB / 502.97 GB (27.7%)
Disk Space Avail:   15337189.20 GB / 45921523.47 GB (33.4%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty6_full


Beginning AutoGluon training ...
AutoGluon will save models to "/pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty6_full"
Train Data Rows:    2000
Train Data Columns: 105
Label Column:       BlendProperty6
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    142089.77 MB
	Train Data (Original)  Memory Usage: 1.60 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', [])

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty7_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       141.08 GB / 502.97 GB (28.1%)
Disk Space Avail:   15207649.74 GB / 45921523.47 GB (33.1%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty8_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       144.95 GB / 502.97 GB (28.8%)
Disk Space Avail:   15076408.86 GB / 45921523.47 GB (32.8%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty9_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       142.52 GB / 502.97 GB (28.3%)
Disk Space Avail:   15075580.62 GB / 45921523.47 GB (32.8%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

DELETED /pscratch/sd/r/ritesh11/temp_dir/NN_models/BlendProperty10_full


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.12.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Mar 13 20:09:44 UTC 2025 (330b47d)
CPU Count:          256
Memory Avail:       141.48 GB / 502.97 GB (28.1%)
Disk Space Avail:   15074000.21 GB / 45921523.47 GB (32.8%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	presets='good'         : Good accura

In [14]:
predictors[0].leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,NeuralNetTorch,-0.041749,mean_absolute_percentage_error,0.006047,104.141014,0.006047,104.141014,1,True,1


In [12]:
from tqdm import tqdm

In [15]:
test_preds = pd.DataFrame()
idx = 0

for target in tqdm(targets, desc="Predicting on test set"):
    
    test_preds[target] = predictors[idx].predict(X_test)
    idx+=1

Predicting on test set: 100%|██████████| 10/10 [00:00<00:00, 55.62it/s]


In [18]:
test_preds.insert(0, 'ID', test_preds.index+1)
test_preds.to_csv("MLP_submission.csv", index=False)

In [19]:
test_preds

Unnamed: 0,ID,BlendProperty1,BlendProperty2,BlendProperty3,BlendProperty4,BlendProperty5,BlendProperty6,BlendProperty7,BlendProperty8,BlendProperty9,BlendProperty10
0,1,0.269499,0.181336,0.616583,0.590101,0.309642,0.759479,0.646484,0.301811,-0.218438,0.336122
1,2,-0.771319,-0.664547,-1.214896,0.059385,-0.753830,-0.054308,-1.145725,-1.064132,-0.901618,-0.021733
2,3,1.601888,1.000914,1.043142,1.074902,2.328620,1.785143,1.047917,1.976724,0.675748,2.246820
3,4,-0.438803,0.269513,0.834678,-0.642701,1.502412,-0.433490,0.748223,1.870233,1.048527,-0.937008
4,5,0.122862,-1.187016,1.043753,0.403289,2.416820,0.271279,1.059293,-0.103926,-0.579997,1.101275
...,...,...,...,...,...,...,...,...,...,...,...
495,496,0.162771,-0.863502,1.060396,-0.285125,-0.179507,-0.746345,1.096318,-0.486561,-1.434873,-0.477653
496,497,-1.853409,-1.344220,-1.043259,-2.193130,-0.642872,-2.138992,-0.976467,-1.863805,-1.520885,-1.317838
497,498,1.765158,2.149623,0.365237,1.181389,-0.102844,0.651619,0.235907,0.976983,0.350661,0.458840
498,499,-0.119224,0.752155,1.575431,-1.371880,-0.929406,0.115193,1.757926,0.574257,0.383757,1.322213
