# Goal: Predict whether a client will subscribe to a bank term deposit.

In [1]:
import pandas as pd
from autogluon.tabular import TabularPredictor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [3]:
for col in ["job","marital","education","default","housing","loan","contact","month","poutcome","y"]:
    train_df[col] = train_df[col].astype("category")

# 轉換數值型別
num_type_map = {
    "id": "int32",
    "age": "int16",
    "balance": "int32",
    "day": "int8",
    "duration": "int32",
    "campaign": "int16",
    "pdays": "int32",
    "previous": "int16",
}
train_df = train_df.astype(num_type_map)

train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750000 entries, 0 to 749999
Data columns (total 18 columns):
 #   Column     Non-Null Count   Dtype   
---  ------     --------------   -----   
 0   id         750000 non-null  int32   
 1   age        750000 non-null  int16   
 2   job        750000 non-null  category
 3   marital    750000 non-null  category
 4   education  750000 non-null  category
 5   default    750000 non-null  category
 6   balance    750000 non-null  int32   
 7   housing    750000 non-null  category
 8   loan       750000 non-null  category
 9   contact    750000 non-null  category
 10  day        750000 non-null  int8    
 11  month      750000 non-null  category
 12  duration   750000 non-null  int32   
 13  campaign   750000 non-null  int16   
 14  pdays      750000 non-null  int32   
 15  previous   750000 non-null  int16   
 16  poutcome   750000 non-null  category
 17  y          750000 non-null  category
dtypes: category(10), int16(3), int32(4), int8(1)

In [4]:
for col in ["job","marital","education","default","housing","loan","contact","month","poutcome"]:
    test_df[col] = test_df[col].astype("category")

# 轉換數值型別
num_type_map = {
    "id": "int32",
    "age": "int16",
    "balance": "int32",
    "day": "int8",
    "duration": "int32",
    "campaign": "int16",
    "pdays": "int32",
    "previous": "int16",
}
test_df = test_df.astype(num_type_map)

test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250000 entries, 0 to 249999
Data columns (total 17 columns):
 #   Column     Non-Null Count   Dtype   
---  ------     --------------   -----   
 0   id         250000 non-null  int32   
 1   age        250000 non-null  int16   
 2   job        250000 non-null  category
 3   marital    250000 non-null  category
 4   education  250000 non-null  category
 5   default    250000 non-null  category
 6   balance    250000 non-null  int32   
 7   housing    250000 non-null  category
 8   loan       250000 non-null  category
 9   contact    250000 non-null  category
 10  day        250000 non-null  int8    
 11  month      250000 non-null  category
 12  duration   250000 non-null  int32   
 13  campaign   250000 non-null  int16   
 14  pdays      250000 non-null  int32   
 15  previous   250000 non-null  int16   
 16  poutcome   250000 non-null  category
dtypes: category(9), int16(3), int32(4), int8(1)
memory usage: 7.6 MB


# Modeling

- Experiment 1 (baseline)
  - Used Model: XGBoost(XGB), Random Forest(RF), Neural Net Torch Model(NN_TORCH)
  - K-fold: 7, 5
  - hyperparameter tuning: True
  - time limit: 60 * 45, 60 * 25

- Experiment 2
  - Used Model: XGBoost(XGB), LightGBM(GBM), CatBoost(CAT), Neural Net Torch Model(NN_TORCH)
  - K-fold: 7, 5
  - hyperparameter tuning: True
  - time limit: 60 * 45, 60 * 25

In [5]:
train_df = train_df.drop(columns=["id"])
test_ids = test_df["id"]
test_df = test_df.drop(columns=["id"])
LABEL = "y"

In [6]:
SAVE_PATH = "./trained models/1107_baseline_autogluon_model"

baseline_predictor = TabularPredictor(label=LABEL, path=SAVE_PATH, problem_type="binary", eval_metric="roc_auc").fit(
    train_df,
    presets="best_quality",
    time_limit=60*25,
    hyperparameters={
        "XGB": {},
        "RF": {},
        "NN_TORCH": {},
    },
    num_bag_folds=5,
    num_stack_levels=1,
    refit_full=True
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.10.19
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 25.0.0: Wed Sep 17 21:42:08 PDT 2025; root:xnu-12377.1.9~141/RELEASE_ARM64_T8132
CPU Count:          10
Memory Avail:       8.42 GB / 24.00 GB (35.1%)
Disk Space Avail:   51.67 GB / 228.27 GB (22.6%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacke

In [7]:
baseline_predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,0.968825,roc_auc,46.190964,880.621063,0.096924,15.345325,3,True,8
1,XGBoost_BAG_L2,0.968646,roc_auc,34.226374,552.193393,2.449841,20.912236,2,True,6
2,NeuralNetTorch_BAG_L2,0.968335,roc_auc,35.770492,797.855365,3.993959,266.574207,2,True,7
3,WeightedEnsemble_L2,0.968255,roc_auc,31.875392,538.89448,0.098859,7.613323,2,True,4
4,RandomForest_BAG_L2,0.968068,roc_auc,39.65024,577.789295,7.873707,46.508138,2,True,5
5,XGBoost_BAG_L1,0.967902,roc_auc,19.201362,110.516884,19.201362,110.516884,1,True,2
6,RandomForest_BAG_L1,0.96352,roc_auc,9.118099,33.806196,9.118099,33.806196,1,True,1
7,NeuralNetTorch_BAG_L1,0.962776,roc_auc,3.457072,386.958077,3.457072,386.958077,1,True,3
8,RandomForest_BAG_L1_FULL,,roc_auc,9.118099,33.806196,9.118099,33.806196,1,True,9
9,XGBoost_BAG_L2_FULL,,roc_auc,,349.245993,,2.960336,2,True,14


In [8]:
baseline_predictor.fit_summary().get("model_paths")

*** Summary of fit() ***
Estimated performance of each model:
                         model  score_val eval_metric  pred_time_val    fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0          WeightedEnsemble_L3   0.968825     roc_auc      46.190964  880.621063                0.096924          15.345325            3       True          8
1               XGBoost_BAG_L2   0.968646     roc_auc      34.226374  552.193393                2.449841          20.912236            2       True          6
2        NeuralNetTorch_BAG_L2   0.968335     roc_auc      35.770492  797.855365                3.993959         266.574207            2       True          7
3          WeightedEnsemble_L2   0.968255     roc_auc      31.875392  538.894480                0.098859           7.613323            2       True          4
4          RandomForest_BAG_L2   0.968068     roc_auc      39.650240  577.789295                7.873707          46.508138            2       



{'RandomForest_BAG_L1': ['RandomForest_BAG_L1'],
 'XGBoost_BAG_L1': ['XGBoost_BAG_L1'],
 'NeuralNetTorch_BAG_L1': ['NeuralNetTorch_BAG_L1'],
 'WeightedEnsemble_L2': ['WeightedEnsemble_L2'],
 'RandomForest_BAG_L2': ['RandomForest_BAG_L2'],
 'XGBoost_BAG_L2': ['XGBoost_BAG_L2'],
 'NeuralNetTorch_BAG_L2': ['NeuralNetTorch_BAG_L2'],
 'WeightedEnsemble_L3': ['WeightedEnsemble_L3'],
 'RandomForest_BAG_L1_FULL': ['RandomForest_BAG_L1_FULL'],
 'XGBoost_BAG_L1_FULL': ['XGBoost_BAG_L1_FULL'],
 'NeuralNetTorch_BAG_L1_FULL': ['NeuralNetTorch_BAG_L1_FULL'],
 'WeightedEnsemble_L2_FULL': ['WeightedEnsemble_L2_FULL'],
 'RandomForest_BAG_L2_FULL': ['RandomForest_BAG_L2_FULL'],
 'XGBoost_BAG_L2_FULL': ['XGBoost_BAG_L2_FULL'],
 'NeuralNetTorch_BAG_L2_FULL': ['NeuralNetTorch_BAG_L2_FULL'],
 'WeightedEnsemble_L3_FULL': ['WeightedEnsemble_L3_FULL']}

In [9]:
baseline_predictor.feature_importance(train_df)

Computing feature importance via permutation shuffling for 16 features using 5000 rows with 5 shuffle sets...
	104.37s	= Expected runtime (20.87s per shuffle set)
	42.42s	= Actual runtime (Completed 5 of 5 shuffle sets)


Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
duration,0.276188,0.012271,4.664301e-07,5,0.301454,0.250921
month,0.025618,0.00151,1.442665e-06,5,0.028728,0.022508
contact,0.016127,0.002175,3.874311e-05,5,0.020605,0.011649
balance,0.015619,0.000477,1.042624e-07,5,0.016601,0.014636
day,0.014129,0.000711,7.687575e-07,5,0.015594,0.012664
age,0.007275,0.001585,0.0002538145,5,0.010538,0.004012
housing,0.006832,0.000822,2.462062e-05,5,0.008523,0.00514
campaign,0.006199,0.000585,9.403588e-06,5,0.007403,0.004994
job,0.004013,0.000763,0.000149683,5,0.005584,0.002441
pdays,0.003935,0.000412,1.418433e-05,5,0.004783,0.003087


In [13]:
results = baseline_predictor.evaluate(train_df, auxiliary_metrics=True)
results

{'roc_auc': 0.9843661245233828,
 'accuracy': 0.9576626666666667,
 'balanced_accuracy': 0.8844584437668928,
 'mcc': 0.7947045327416986,
 'f1': 0.8178851436994213,
 'precision': 0.8501591768114559,
 'recall': 0.7879718857749094}

In [14]:
for metric, value in results.items():
    print(f"{metric}: {value:.4f}")

roc_auc: 0.9844
accuracy: 0.9577
balanced_accuracy: 0.8845
mcc: 0.7947
f1: 0.8179
precision: 0.8502
recall: 0.7880


In [15]:
SAVE_PATH = "./trained models/1107_autogluon_model_ex2"

ex2_predictor = TabularPredictor(label=LABEL, path=SAVE_PATH, problem_type="binary", eval_metric="roc_auc").fit(
    train_df,
    presets="best_quality",
    time_limit=60*25,
    hyperparameters={
        "XGB": {},
        "GBM": {},
        "CAT": {},
        "NN_TORCH": {},
    },
    num_bag_folds=5,
    num_stack_levels=1,
    refit_full=True
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.10.19
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 25.0.0: Wed Sep 17 21:42:08 PDT 2025; root:xnu-12377.1.9~141/RELEASE_ARM64_T8132
CPU Count:          10
Memory Avail:       7.94 GB / 24.00 GB (33.1%)
Disk Space Avail:   49.00 GB / 228.27 GB (21.5%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=5, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacke

In [16]:
ex2_predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,0.969456,roc_auc,140.523956,947.246926,0.096376,17.960658,3,True,9
1,LightGBM_BAG_L2,0.969381,roc_auc,137.721718,643.902928,2.369484,9.14933,2,True,5
2,XGBoost_BAG_L2,0.969371,roc_auc,137.731912,658.199349,2.379678,23.445752,2,True,7
3,CatBoost_BAG_L2,0.969253,roc_auc,135.678418,896.691187,0.326184,261.937589,2,True,6
4,WeightedEnsemble_L2,0.969068,roc_auc,135.033135,173.538695,0.096088,7.845632,2,True,4
5,LightGBM_BAG_L1,0.968756,roc_auc,106.286152,73.451705,106.286152,73.451705,1,True,1
6,NeuralNetTorch_BAG_L2,0.968158,roc_auc,140.010281,665.234276,4.658047,30.480678,2,True,8
7,XGBoost_BAG_L1,0.967847,roc_auc,28.650896,92.241358,28.650896,92.241358,1,True,3
8,CatBoost_BAG_L1,0.964213,roc_auc,0.415186,469.060535,0.415186,469.060535,1,True,2
9,XGBoost_BAG_L2_FULL,,roc_auc,,144.911649,,3.078259,2,True,16


In [17]:
ex2_predictor.fit_summary().get("model_paths")

*** Summary of fit() ***
Estimated performance of each model:
                         model  score_val eval_metric  pred_time_val    fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0          WeightedEnsemble_L3   0.969456     roc_auc     140.523956  947.246926                0.096376          17.960658            3       True          9
1              LightGBM_BAG_L2   0.969381     roc_auc     137.721718  643.902928                2.369484           9.149330            2       True          5
2               XGBoost_BAG_L2   0.969371     roc_auc     137.731912  658.199349                2.379678          23.445752            2       True          7
3              CatBoost_BAG_L2   0.969253     roc_auc     135.678418  896.691187                0.326184         261.937589            2       True          6
4          WeightedEnsemble_L2   0.969068     roc_auc     135.033135  173.538695                0.096088           7.845632            2       



{'LightGBM_BAG_L1': ['LightGBM_BAG_L1'],
 'CatBoost_BAG_L1': ['CatBoost_BAG_L1'],
 'XGBoost_BAG_L1': ['XGBoost_BAG_L1'],
 'WeightedEnsemble_L2': ['WeightedEnsemble_L2'],
 'LightGBM_BAG_L2': ['LightGBM_BAG_L2'],
 'CatBoost_BAG_L2': ['CatBoost_BAG_L2'],
 'XGBoost_BAG_L2': ['XGBoost_BAG_L2'],
 'NeuralNetTorch_BAG_L2': ['NeuralNetTorch_BAG_L2'],
 'WeightedEnsemble_L3': ['WeightedEnsemble_L3'],
 'LightGBM_BAG_L1_FULL': ['LightGBM_BAG_L1_FULL'],
 'CatBoost_BAG_L1_FULL': ['CatBoost_BAG_L1_FULL'],
 'XGBoost_BAG_L1_FULL': ['XGBoost_BAG_L1_FULL'],
 'WeightedEnsemble_L2_FULL': ['WeightedEnsemble_L2_FULL'],
 'LightGBM_BAG_L2_FULL': ['LightGBM_BAG_L2_FULL'],
 'CatBoost_BAG_L2_FULL': ['CatBoost_BAG_L2_FULL'],
 'XGBoost_BAG_L2_FULL': ['XGBoost_BAG_L2_FULL'],
 'NeuralNetTorch_BAG_L2_FULL': ['NeuralNetTorch_BAG_L2_FULL'],
 'WeightedEnsemble_L3_FULL': ['WeightedEnsemble_L3_FULL']}

In [18]:
ex2_predictor.feature_importance(train_df)

Computing feature importance via permutation shuffling for 16 features using 5000 rows with 5 shuffle sets...
	71.5s	= Expected runtime (14.3s per shuffle set)
	57.98s	= Actual runtime (Completed 5 of 5 shuffle sets)


Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
duration,0.283252,0.011049,2.772456e-07,5,0.306002,0.260502
month,0.024044,0.001795,3.702733e-06,5,0.02774,0.020347
balance,0.017626,0.001158,2.219826e-06,5,0.020009,0.015242
contact,0.014525,0.001819,2.887844e-05,5,0.01827,0.010781
day,0.011735,0.000866,3.540713e-06,5,0.013519,0.009951
age,0.006278,0.001134,0.0001221518,5,0.008612,0.003944
campaign,0.0052,0.000591,1.97163e-05,5,0.006417,0.003982
housing,0.004026,0.000579,5.00897e-05,5,0.005219,0.002833
pdays,0.003546,0.000482,3.991511e-05,5,0.004539,0.002554
job,0.003211,0.000765,0.0003596475,5,0.004788,0.001635


In [19]:
ex2_results = ex2_predictor.evaluate(train_df, auxiliary_metrics=True)

for metric, value in ex2_results.items():
    print(f"{metric}: {value:.4f}")

roc_auc: 0.9800
accuracy: 0.9490
balanced_accuracy: 0.8623
mcc: 0.7518
f1: 0.7797
precision: 0.8142
recall: 0.7480


# Inference

In [11]:
baseline_results = baseline_predictor.predict_proba(test_df)[1]

In [20]:
ex2_results = ex2_predictor.predict_proba(test_df)[1]

# Export results

In [12]:
submission = pd.DataFrame({
    "id": test_ids,
    "y": baseline_results
})

submission.to_csv("./submissions/1107_submission_autogluon_baseline.csv", index=False)

In [21]:
submission = pd.DataFrame({
    "id": test_ids,
    "y": ex2_results
})

submission.to_csv("./submissions/1107_submission_autogluon_ex2.csv", index=False)