# AutoGluon Multimodal Model

Using AutoGluon's TabularPredictor with multimodal features:
- Text features: request_title, request_text, request_text_edit_aware
- Categorical: requester_user_flair
- Numeric: all other features
- Handles class imbalance automatically
- Uses ensemble of multiple models including text transformers

In [3]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Create experiments directory
Path('/home/code/experiments').mkdir(parents=True, exist_ok=True)

print("Loading data...")

Loading data...


In [4]:
# Load training data - file appears to be a JSON array, not line-delimited
import json

with open('/home/data/train.json', 'r') as f:
    # Try to load as a single JSON array
    try:
        train_data = json.load(f)
    except:
        # If that fails, try line-delimited
        f.seek(0)
        train_data = [json.loads(line) for line in f if line.strip()]

train_df = pd.DataFrame(train_data)
print(f"Training data shape: {train_df.shape}")

# Load test data
with open('/home/data/test.json', 'r') as f:
    try:
        test_data = json.load(f)
    except:
        f.seek(0)
        test_data = [json.loads(line) for line in f if line.strip()]

test_df = pd.DataFrame(test_data)
print(f"Test data shape: {test_df.shape}")

# Check target distribution
target = 'requester_received_pizza'
print(f"\nTarget distribution:")
print(train_df[target].value_counts())
print(f"Positive rate: {train_df[target].mean():.3f}")

Training data shape: (4040, 32)
Test data shape: (1631, 17)

Target distribution:
requester_received_pizza
False    3046
True      994
Name: count, dtype: int64
Positive rate: 0.246


In [5]:
# Install AutoGluon if not available
try:
    from autogluon.tabular import TabularPredictor
    print("AutoGluon is available")
except ImportError:
    print("Installing AutoGluon...")
    !pip install -q autogluon
    from autogluon.tabular import TabularPredictor
    print("AutoGluon installed successfully")

AutoGluon is available


In [11]:
from autogluon.tabular import TabularPredictor

# Prepare data - AutoGluon can handle text directly
# Need to convert list columns to strings first

# Convert list columns to string representation
list_col = 'requester_subreddits_at_request'
if list_col in train_df.columns:
    train_df[list_col] = train_df[list_col].apply(lambda x: ','.join(x) if isinstance(x, list) else str(x))
    test_df[list_col] = test_df[list_col].apply(lambda x: ','.join(x) if isinstance(x, list) else str(x))

# Define feature columns (exclude ID columns and target)
exclude_cols = ['request_id', 'requester_received_pizza', 'giver_username_if_known']
feature_cols = [col for col in train_df.columns if col not in exclude_cols]

print(f"Using {len(feature_cols)} features")
print(f"Sample features:")
for col in feature_cols[:5]:
    print(f"  {col}: {train_df[col].dtype}")

Using 29 features
Sample features:
  number_of_downvotes_of_request_at_retrieval: int64
  number_of_upvotes_of_request_at_retrieval: int64
  post_was_edited: object
  request_number_of_comments_at_retrieval: int64
  request_text: object


In [12]:
# Investigate data types and potential issues
print("Data types:")
print(train_df[feature_cols].dtypes.value_counts())

# Check for object columns that might contain lists
object_cols = train_df[feature_cols].select_dtypes(include=['object']).columns
print(f"\nObject columns ({len(object_cols)}):")
for col in object_cols:
    sample_val = train_df[col].iloc[0]
    print(f"  {col}: {type(sample_val)} - {str(sample_val)[:100]}...")
    
# Check for list columns
list_cols = []
for col in feature_cols:
    if isinstance(train_df[col].iloc[0], list):
        list_cols.append(col)
        
if list_cols:
    print(f"\nColumns containing lists: {list_cols}")
else:
    print("\nNo list columns found")

Data types:
int64      16
object      7
float64     6
Name: count, dtype: int64

Object columns (7):
  post_was_edited: <class 'bool'> - False...
  request_text: <class 'str'> - Hi I am in need of food for my 4 children we are a military family that has really hit hard times an...
  request_text_edit_aware: <class 'str'> - Hi I am in need of food for my 4 children we are a military family that has really hit hard times an...
  request_title: <class 'str'> - Request Colorado Springs Help Us Please...
  requester_subreddits_at_request: <class 'str'> - ...
  requester_user_flair: <class 'NoneType'> - None...
  requester_username: <class 'str'> - nickylvst...

No list columns found


In [13]:
# Configure AutoGluon for class imbalance
# Use 'best_quality' preset for maximum performance
# Set time limit to avoid running too long

predictor = TabularPredictor(
    label=target,
    problem_type='binary',
    eval_metric='roc_auc',  # Good for imbalanced classification
    path='/home/code/experiments/autogluon_models'
).fit(
    train_data=train_df[feature_cols + [target]],
    presets='best_quality',
    time_limit=1200,  # 20 minutes
    verbosity=2
)



Verbosity: 2 (Standard Logging)


AutoGluon Version:  1.5.0
Python Version:     3.11.14
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #21~24.04.1-Ubuntu SMP Fri Oct 17 00:56:30 UTC 2025
CPU Count:          12
Pytorch Version:    2.9.1+cu128
CUDA Version:       12.8
GPU Memory:         GPU 0: 79.25/79.25 GB
Total GPU Memory:   Free: 79.25 GB, Allocated: 0.00 GB, Total: 79.25 GB
GPU Count:          1
Memory Avail:       161.45 GB / 167.04 GB (96.7%)
Disk Space Avail:   940.04 GB / 3389.36 GB (27.7%)


Presets specified: ['best_quality']


Using hyperparameters preset: hyperparameters='zeroshot'


Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)


Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1


DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.


	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.


	Running DyStack for up to 300s of the 1200s of remaining time (25%).


DyStack: Disabling memory safe fit mode in DyStack because GPUs were detected and num_gpus='auto' (GPUs cannot be used in memory safe fit mode). If you want to use memory safe fit mode, manually set `num_gpus=0`.


Running DyStack sub-fit ...


Beginning AutoGluon training ... Time limit = 300s


AutoGluon will save models to "/home/code/experiments/autogluon_models/ds_sub_fit/sub_fit_ho"


Train Data Rows:    3591


Train Data Columns: 29


Label Column:       requester_received_pizza


Problem Type:       binary


Preprocessing data ...


Selected class <--> label mapping:  class 1 = True, class 0 = False


Using Feature Generators to preprocess the data ...


Fitting AutoMLPipelineFeatureGenerator...


	Available Memory:                    165334.90 MB


	Train Data (Original)  Memory Usage: 5.51 MB (0.0% of available memory)


	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.


	Stage 1 Generators:


		Fitting AsTypeFeatureGenerator...


	Stage 2 Generators:


		Fitting FillNaFeatureGenerator...


	Stage 3 Generators:


		Fitting IdentityFeatureGenerator...


		Fitting CategoryFeatureGenerator...


			Fitting CategoryMemoryMinimizeFeatureGenerator...


		Fitting TextSpecialFeatureGenerator...


			Fitting BinnedFeatureGenerator...


			Fitting DropDuplicatesFeatureGenerator...


		Fitting TextNgramFeatureGenerator...


			Fitting CountVectorizer for text features: ['request_text', 'request_text_edit_aware', 'request_title']


			CountVectorizer fit with vocabulary size = 2209


	Stage 4 Generators:


		Fitting DropUniqueFeatureGenerator...


	Stage 5 Generators:


		Fitting DropDuplicatesFeatureGenerator...


	Unused Original Features (Count: 1): ['requester_username']


		These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.


		Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.


		These features do not need to be present at inference time.


		('object', []) : 1 | ['requester_username']


	Types of features in original data (raw dtype, special dtypes):


		('float', [])        :  6 | ['requester_account_age_in_days_at_request', 'requester_account_age_in_days_at_retrieval', 'requester_days_since_first_post_on_raop_at_request', 'requester_days_since_first_post_on_raop_at_retrieval', 'unix_timestamp_of_request', ...]


		('int', [])          : 16 | ['number_of_downvotes_of_request_at_retrieval', 'number_of_upvotes_of_request_at_retrieval', 'request_number_of_comments_at_retrieval', 'requester_number_of_comments_at_request', 'requester_number_of_comments_at_retrieval', ...]


		('object', [])       :  3 | ['post_was_edited', 'requester_subreddits_at_request', 'requester_user_flair']


		('object', ['text']) :  3 | ['request_text', 'request_text_edit_aware', 'request_title']


	Types of features in processed data (raw dtype, special dtypes):


		('category', [])                    :    3 | ['post_was_edited', 'requester_subreddits_at_request', 'requester_user_flair']


		('category', ['text_as_category'])  :    3 | ['request_text', 'request_text_edit_aware', 'request_title']


		('float', [])                       :    6 | ['requester_account_age_in_days_at_request', 'requester_account_age_in_days_at_retrieval', 'requester_days_since_first_post_on_raop_at_request', 'requester_days_since_first_post_on_raop_at_retrieval', 'unix_timestamp_of_request', ...]


		('int', [])                         :   16 | ['number_of_downvotes_of_request_at_retrieval', 'number_of_upvotes_of_request_at_retrieval', 'request_number_of_comments_at_retrieval', 'requester_number_of_comments_at_request', 'requester_number_of_comments_at_retrieval', ...]


		('int', ['binned', 'text_special']) :   86 | ['request_text.char_count', 'request_text.word_count', 'request_text.capital_ratio', 'request_text.lower_ratio', 'request_text.digit_ratio', ...]


		('int', ['text_ngram'])             : 2197 | ['__nlp__.10', '__nlp__.100', '__nlp__.11', '__nlp__.12', '__nlp__.15', ...]


	7.6s = Fit runtime


	28 features in original data used to generate 2311 features in processed data.


	Train Data (Processed) Memory Usage: 15.97 MB (0.0% of available memory)


Data preprocessing and feature engineering runtime = 7.89s ...


AutoGluon will gauge predictive performance using evaluation metric: 'roc_auc'


	This metric expects predicted probabilities rather than predicted class labels, so you'll need to use predict_proba() instead of predict()


	To change this, specify the eval_metric parameter of Predictor()


Large model count detected (110 configs) ... Only displaying the first 3 models of each family. To see all, set `verbosity=3`.
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': [{}, {'activation': 'elu', 'dropout_prob': 0.10077639529843717, 'hidden_size': 108, 'learning_rate': 0.002735937344002146, 'num_layers': 4, 'use_batchnorm': True, 'weight_decay': 1.356433327634438e-12, 'ag_args': {'name_suffix': '_r79', 'priority': -2}}, {'activation': 'elu', 'dropout_prob': 0.11897478034205347, 'hidden_size': 213, 'learning_rate': 0.0010474382260641949, 'num_layers': 4, 'use_batchnorm': False, 'weight_decay': 5.594471067786272e-10, 'ag_args': {'name_suffix': '_r22', 'priority': -7}}],
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0, 'hyperparameter_tune_kwargs': None}}],
	'CAT': [{}, {'depth': 6, 'grow_policy': 'Symmetric

AutoGluon will fit 2 stack levels (L1 to L2) ...


Fitting 108 L1 models, fit_strategy="sequential" ...


Fitting model: LightGBMXT_BAG_L1 ... Training model for up to 194.69s of the 292.10s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.32%)


	1.0	 = Validation score   (roc_auc)


	6.87s	 = Training   runtime


	0.33s	 = Validation runtime


Fitting model: LightGBM_BAG_L1 ... Training model for up to 177.74s of the 275.15s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.33%)


	1.0	 = Validation score   (roc_auc)


	4.81s	 = Training   runtime


	0.22s	 = Validation runtime


Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 170.18s of the 267.59s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.1 GB


	0.9994	 = Validation score   (roc_auc)


	2.08s	 = Training   runtime


	5.7s	 = Validation runtime


Fitting model: RandomForestEntr_BAG_L1 ... Training model for up to 161.78s of the 259.18s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.1 GB


	0.999	 = Validation score   (roc_auc)


	2.16s	 = Training   runtime


	5.96s	 = Validation runtime


Fitting model: CatBoost_BAG_L1 ... Training model for up to 153.05s of the 250.46s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.55%)


	1.0	 = Validation score   (roc_auc)


	122.36s	 = Training   runtime


	0.95s	 = Validation runtime


Fitting model: ExtraTreesGini_BAG_L1 ... Training model for up to 27.17s of the 124.58s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.2 GB


	1.0	 = Validation score   (roc_auc)


	2.23s	 = Training   runtime


	5.84s	 = Validation runtime


Fitting model: ExtraTreesEntr_BAG_L1 ... Training model for up to 18.50s of the 115.91s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.2 GB


	1.0	 = Validation score   (roc_auc)


	2.35s	 = Training   runtime


	6.08s	 = Validation runtime


Fitting model: NeuralNetFastAI_BAG_L1 ... Training model for up to 9.46s of the 106.87s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.11%)


	1.0	 = Validation score   (roc_auc)


	13.13s	 = Training   runtime


	0.22s	 = Validation runtime


Fitting model: WeightedEnsemble_L2 ... Training model for up to 292.11s of the 90.89s of remaining time.


	Fitting 1 model on all data | Fitting with cpus=12, gpus=0, mem=0.0/158.7 GB


	Ensemble Weights: {'LightGBMXT_BAG_L1': 1.0}


	1.0	 = Validation score   (roc_auc)


	0.18s	 = Training   runtime


	0.0s	 = Validation runtime


Fitting 108 L2 models, fit_strategy="sequential" ...


Fitting model: LightGBMXT_BAG_L2 ... Training model for up to 90.70s of the 90.67s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.33%)


	1.0	 = Validation score   (roc_auc)


	7.16s	 = Training   runtime


	0.32s	 = Validation runtime


Fitting model: LightGBM_BAG_L2 ... Training model for up to 80.05s of the 80.01s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.33%)


	1.0	 = Validation score   (roc_auc)


	5.55s	 = Training   runtime


	0.25s	 = Validation runtime


Fitting model: RandomForestGini_BAG_L2 ... Training model for up to 71.00s of the 70.96s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/159.9 GB


	1.0	 = Validation score   (roc_auc)


	2.24s	 = Training   runtime


	5.86s	 = Validation runtime


Fitting model: RandomForestEntr_BAG_L2 ... Training model for up to 62.31s of the 62.27s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.1 GB


	1.0	 = Validation score   (roc_auc)


	1.88s	 = Training   runtime


	5.73s	 = Validation runtime


Fitting model: CatBoost_BAG_L2 ... Training model for up to 54.11s of the 54.07s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.56%)


	1.0	 = Validation score   (roc_auc)


	43.82s	 = Training   runtime


	1.08s	 = Validation runtime


Fitting model: ExtraTreesGini_BAG_L2 ... Training model for up to 7.14s of the 7.10s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.2 GB


	1.0	 = Validation score   (roc_auc)


	1.88s	 = Training   runtime


	5.53s	 = Validation runtime


Fitting model: WeightedEnsemble_L3 ... Training model for up to 292.11s of the -1.34s of remaining time.


	Fitting 1 model on all data | Fitting with cpus=12, gpus=0, mem=0.0/160.3 GB


	Ensemble Weights: {'ExtraTreesGini_BAG_L2': 1.0}


	1.0	 = Validation score   (roc_auc)


	0.19s	 = Training   runtime


	0.0s	 = Validation runtime


AutoGluon training complete, total runtime = 301.59s ... Best model: WeightedEnsemble_L3 | Estimated inference throughput: 83.8 rows/s (449 batch size)


TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/code/experiments/autogluon_models/ds_sub_fit/sub_fit_ho")


Deleting DyStack predictor artifacts (clean_up_fits=True) ...


Leaderboard on holdout data (DyStack):


                      model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     ExtraTreesEntr_BAG_L1       1.000000   0.999955     roc_auc        0.120793       6.084077    2.348628                 0.120793                6.084077           2.348628            1       True          7
1     ExtraTreesGini_BAG_L1       1.000000   0.999979     roc_auc        0.130002       5.842413    2.225126                 0.130002                5.842413           2.225126            1       True          6
2           LightGBM_BAG_L1       1.000000   1.000000     roc_auc        0.162156       0.223068    4.807650                 0.162156                0.223068           4.807650            1       True          2
3         LightGBMXT_BAG_L1       1.000000   1.000000     roc_auc        0.426362       0.326572    6.867496                 0.426362                0.3

	0	 = Optimal   num_stack_levels (Stacked Overfitting Occurred: True)


	307s	 = DyStack   runtime |	893s	 = Remaining runtime


Starting main fit with num_stack_levels=0.
	For future fit calls on this dataset, you can skip DyStack to save time: `predictor.fit(..., dynamic_stacking=False, num_stack_levels=0)`


Beginning AutoGluon training ... Time limit = 893s


AutoGluon will save models to "/home/code/experiments/autogluon_models"


Train Data Rows:    4040


Train Data Columns: 29


Label Column:       requester_received_pizza


Problem Type:       binary


Preprocessing data ...


Selected class <--> label mapping:  class 1 = True, class 0 = False


Using Feature Generators to preprocess the data ...


Fitting AutoMLPipelineFeatureGenerator...


	Available Memory:                    164069.52 MB


	Train Data (Original)  Memory Usage: 6.37 MB (0.0% of available memory)


	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.


	Stage 1 Generators:


		Fitting AsTypeFeatureGenerator...


	Stage 2 Generators:


		Fitting FillNaFeatureGenerator...


	Stage 3 Generators:


		Fitting IdentityFeatureGenerator...


		Fitting CategoryFeatureGenerator...


			Fitting CategoryMemoryMinimizeFeatureGenerator...


		Fitting TextSpecialFeatureGenerator...


			Fitting BinnedFeatureGenerator...


			Fitting DropDuplicatesFeatureGenerator...


		Fitting TextNgramFeatureGenerator...


			Fitting CountVectorizer for text features: ['request_text', 'request_text_edit_aware', 'request_title']


			CountVectorizer fit with vocabulary size = 2475


	Stage 4 Generators:


		Fitting DropUniqueFeatureGenerator...


	Stage 5 Generators:


		Fitting DropDuplicatesFeatureGenerator...


	Unused Original Features (Count: 1): ['requester_username']


		These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.


		Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.


		These features do not need to be present at inference time.


		('object', []) : 1 | ['requester_username']


	Types of features in original data (raw dtype, special dtypes):


		('float', [])        :  6 | ['requester_account_age_in_days_at_request', 'requester_account_age_in_days_at_retrieval', 'requester_days_since_first_post_on_raop_at_request', 'requester_days_since_first_post_on_raop_at_retrieval', 'unix_timestamp_of_request', ...]


		('int', [])          : 16 | ['number_of_downvotes_of_request_at_retrieval', 'number_of_upvotes_of_request_at_retrieval', 'request_number_of_comments_at_retrieval', 'requester_number_of_comments_at_request', 'requester_number_of_comments_at_retrieval', ...]


		('object', [])       :  3 | ['post_was_edited', 'requester_subreddits_at_request', 'requester_user_flair']


		('object', ['text']) :  3 | ['request_text', 'request_text_edit_aware', 'request_title']


	Types of features in processed data (raw dtype, special dtypes):


		('category', [])                    :    3 | ['post_was_edited', 'requester_subreddits_at_request', 'requester_user_flair']


		('category', ['text_as_category'])  :    3 | ['request_text', 'request_text_edit_aware', 'request_title']


		('float', [])                       :    6 | ['requester_account_age_in_days_at_request', 'requester_account_age_in_days_at_retrieval', 'requester_days_since_first_post_on_raop_at_request', 'requester_days_since_first_post_on_raop_at_retrieval', 'unix_timestamp_of_request', ...]


		('int', [])                         :   16 | ['number_of_downvotes_of_request_at_retrieval', 'number_of_upvotes_of_request_at_retrieval', 'request_number_of_comments_at_retrieval', 'requester_number_of_comments_at_request', 'requester_number_of_comments_at_retrieval', ...]


		('int', ['binned', 'text_special']) :   86 | ['request_text.char_count', 'request_text.word_count', 'request_text.capital_ratio', 'request_text.lower_ratio', 'request_text.digit_ratio', ...]


		('int', ['text_ngram'])             : 2463 | ['__nlp__.10', '__nlp__.100', '__nlp__.11', '__nlp__.12', '__nlp__.15', ...]


	8.7s = Fit runtime


	28 features in original data used to generate 2577 features in processed data.


	Train Data (Processed) Memory Usage: 20.01 MB (0.0% of available memory)


Data preprocessing and feature engineering runtime = 9.4s ...


AutoGluon will gauge predictive performance using evaluation metric: 'roc_auc'


	This metric expects predicted probabilities rather than predicted class labels, so you'll need to use predict_proba() instead of predict()


	To change this, specify the eval_metric parameter of Predictor()


Large model count detected (110 configs) ... Only displaying the first 3 models of each family. To see all, set `verbosity=3`.
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': [{}, {'activation': 'elu', 'dropout_prob': 0.10077639529843717, 'hidden_size': 108, 'learning_rate': 0.002735937344002146, 'num_layers': 4, 'use_batchnorm': True, 'weight_decay': 1.356433327634438e-12, 'ag_args': {'name_suffix': '_r79', 'priority': -2}}, {'activation': 'elu', 'dropout_prob': 0.11897478034205347, 'hidden_size': 213, 'learning_rate': 0.0010474382260641949, 'num_layers': 4, 'use_batchnorm': False, 'weight_decay': 5.594471067786272e-10, 'ag_args': {'name_suffix': '_r22', 'priority': -7}}],
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0, 'hyperparameter_tune_kwargs': None}}],
	'CAT': [{}, {'depth': 6, 'grow_policy': 'Symmetric

Fitting 108 L1 models, fit_strategy="sequential" ...


Fitting model: LightGBMXT_BAG_L1 ... Training model for up to 883.60s of the 883.59s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.37%)


	1.0	 = Validation score   (roc_auc)


	9.24s	 = Training   runtime


	0.35s	 = Validation runtime


Fitting model: LightGBM_BAG_L1 ... Training model for up to 871.13s of the 871.12s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.37%)


	1.0	 = Validation score   (roc_auc)


	6.8s	 = Training   runtime


	0.26s	 = Validation runtime


Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 861.31s of the 861.30s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/159.8 GB


	0.9996	 = Validation score   (roc_auc)


	2.2s	 = Training   runtime


	4.25s	 = Validation runtime


Fitting model: RandomForestEntr_BAG_L1 ... Training model for up to 854.11s of the 854.10s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/159.9 GB


	0.9982	 = Validation score   (roc_auc)


	2.21s	 = Training   runtime


	4.55s	 = Validation runtime


Fitting model: CatBoost_BAG_L1 ... Training model for up to 846.64s of the 846.63s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.72%)


	1.0	 = Validation score   (roc_auc)


	677.91s	 = Training   runtime


	2.65s	 = Validation runtime


Fitting model: ExtraTreesGini_BAG_L1 ... Training model for up to 165.09s of the 165.08s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.1 GB


	1.0	 = Validation score   (roc_auc)


	2.97s	 = Training   runtime


	5.33s	 = Validation runtime


Fitting model: ExtraTreesEntr_BAG_L1 ... Training model for up to 156.05s of the 156.04s of remaining time.


	Fitting 1 model on all data (use_child_oof=True) | Fitting with cpus=12, gpus=0, mem=0.0/160.1 GB


	1.0	 = Validation score   (roc_auc)


	2.42s	 = Training   runtime


	5.36s	 = Validation runtime


Fitting model: NeuralNetFastAI_BAG_L1 ... Training model for up to 147.56s of the 147.55s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.13%)


	1.0	 = Validation score   (roc_auc)


	13.54s	 = Training   runtime


	0.18s	 = Validation runtime


Fitting model: XGBoost_BAG_L1 ... Training model for up to 130.78s of the 130.77s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.70%)


	1.0	 = Validation score   (roc_auc)


	32.1s	 = Training   runtime


	0.4s	 = Validation runtime


Fitting model: NeuralNetTorch_BAG_L1 ... Training model for up to 95.26s of the 95.25s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.07%)


	1.0	 = Validation score   (roc_auc)


	26.88s	 = Training   runtime


	0.51s	 = Validation runtime


Fitting model: LightGBMLarge_BAG_L1 ... Training model for up to 65.68s of the 65.67s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.30%)


	1.0	 = Validation score   (roc_auc)


	32.69s	 = Training   runtime


	0.57s	 = Validation runtime


Fitting model: CatBoost_r177_BAG_L1 ... Training model for up to 29.45s of the 29.44s of remaining time.


	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.72%)


	1.0	 = Validation score   (roc_auc)


	24.0s	 = Training   runtime


	2.55s	 = Validation runtime


Fitting model: WeightedEnsemble_L2 ... Training model for up to 360.00s of the 1.58s of remaining time.


	Fitting 1 model on all data | Fitting with cpus=12, gpus=0, mem=0.0/159.2 GB


	Ensemble Weights: {'CatBoost_r177_BAG_L1': 1.0}


	1.0	 = Validation score   (roc_auc)


	0.27s	 = Training   runtime


	0.0s	 = Validation runtime


AutoGluon training complete, total runtime = 891.77s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 198.3 rows/s (505 batch size)


TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/code/experiments/autogluon_models")


In [None]:
# Generate predictions on test set
test_predictions = predictor.predict_proba(test_df[feature_cols])

# Get the probability for the positive class (True)
if isinstance(test_predictions, pd.DataFrame):
    # AutoGluon returns a DataFrame with columns for each class
    positive_class = predictor.positive_class
    test_pred_proba = test_predictions[positive_class]
else:
    # If it's a Series or array
    test_pred_proba = test_predictions

print(f"Test predictions shape: {test_pred_proba.shape}")
print(f"Test predictions sample:\n{test_pred_proba.head()}")

# Get leaderboard to see model performance
leaderboard = predictor.leaderboard(silent=True)
print("\nLeaderboard (top 5 models):")
print(leaderboard.head())

# Get the best model's validation score
best_model_score = leaderboard.iloc[0]['score_val']
print(f"\nBest model validation score: {best_model_score:.4f}")

# Create submission
submission_df = pd.DataFrame({
    'request_id': test_df['request_id'],
    'requester_received_pizza': test_pred_proba
})

print(f"\nSubmission shape: {submission_df.shape}")
print(f"Submission sample:\n{submission_df.head()}")

# Save submission
submission_path = '/home/submission/autogluon_submission.csv'
submission_df.to_csv(submission_path, index=False)
print(f"\nSubmission saved to: {submission_path}")

In [None]:
# Make predictions on test set
test_predictions = predictor.predict_proba(test_df[feature_cols])[[1]]  # Get probability of positive class

# Create submission
submission = pd.DataFrame({
    'request_id': test_df['request_id'],
    'requester_received_pizza': test_predictions
})

print("Submission preview:")
print(submission.head())
print(f"\nSubmission shape: {submission.shape}")

# Check prediction distribution
print(f"\nPrediction distribution:")
print(f"Mean: {submission['requester_received_pizza'].mean():.4f}")
print(f"Std: {submission['requester_received_pizza'].std():.4f}")
print(f"Min: {submission['requester_received_pizza'].min():.4f}")
print(f"Max: {submission['requester_received_pizza'].max():.4f}")

# Save submission
submission_path = '/home/submission/submission_autogluon.csv'
submission.to_csv(submission_path, index=False)
print(f"\nSubmission saved to: {submission_path}")