In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor
import matplotlib.pyplot as plt
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# remove models folder if it exists
import shutil
shutil.rmtree("../models", ignore_errors=True)

In [3]:
def prepare_data(true_data, counter_files, max_index):
    combined_data = {}
    # Define the column names to keep.
    cols = [f"B{i}" for i in range(1, max_index + 1)]

    # Process the true data: retain only the B columns and add the true label.
    true_data['label'] = "true"
    true_data = true_data[cols + ["label"]]

    for key, df_counter in counter_files.items():
        # Retain only the B columns and add the false label.
        df_counter['label'] = "false"
        df_counter = df_counter[cols + ["label"]]
        # Combine the true data with this counter example.
        combined = pd.concat([true_data, df_counter], ignore_index=True)
        combined_data[key] = combined

    return combined_data

In [4]:
presets = ['good_quality', 'optimize_for_deployment']

def generateModels(combined_data, prefix, max_index, time_limit=180):
    results = {}
    # Define the feature columns
    feature_cols = [f"B{i}" for i in range(1, max_index + 1)]

    for key, df in combined_data.items():
        # Split data into features and label.
        X = df[feature_cols]
        y = df["label"]

        # Train-test split: 70% train, 30% test.
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        train_data = pd.concat([X_train, y_train], axis=1)
        test_data = pd.concat([X_test, y_test], axis=1)

        # Define an output folder for saving models for this dataset variant.
        output_folder = f"../models/{prefix}/{key}"
        os.makedirs(output_folder, exist_ok=True)

        # Train the model using AutoGluon.
        predictor = TabularPredictor(label="label", path=output_folder, eval_metric='f1').fit(train_data, presets=presets, time_limit=time_limit)

        # Evaluate the model on the test data.
        leaderboard = predictor.leaderboard(test_data, silent=True)
        results[key] = leaderboard

        print(f"Results for {key}:")
        print(leaderboard)

    # Create results folder and save leaderboards.
    results_folder = f"../models/{prefix}/results"
    os.makedirs(results_folder, exist_ok=True)
    for key, result in results.items():
        result.to_csv(os.path.join(results_folder, f"{key}_leaderboard.csv"), index=False)

In [6]:
true_file = "../data/ei/data_ei.csv"
ei_true_data = pd.read_csv(true_file)

# List of counter example files for the EI zone
counter_files = {
    "combined": pd.read_csv("../data/ei/data_ei_negative_sample.csv")
}

# For EI zone, our CSV files have columns B1 to B12.
combined_data = prepare_data(ei_true_data, counter_files, max_index=12)

# Dictionary to store the combined DataFrames and later model results
generateModels(combined_data, 'ei', 12, time_limit=21600)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       6.45 GB / 15.35 GB (42.0%)
Disk Space Avail:   47.75 GB / 100.00 GB (47.7%)
Presets specified: ['good_quality', 'optimize_for_deployment']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stackin

[36m(_ray_fit pid=22400)[0m [1000]	valid_set's binary_logloss: 0.063705	valid_set's f1: 0.957524
[36m(_ray_fit pid=22400)[0m [2000]	valid_set's binary_logloss: 0.0596066	valid_set's f1: 0.962963[32m [repeated 8x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(_ray_fit pid=22400)[0m [3000]	valid_set's binary_logloss: 0.058624	valid_set's f1: 0.963412[32m [repeated 8x across cluster][0m
[36m(_ray_fit pid=22400)[0m [4000]	valid_set's binary_logloss: 0.0589464	valid_set's f1: 0.965462[32m [repeated 8x across cluster][0m
[36m(_ray_fit pid=22400)[0m [5000]	valid_set's binary_logloss: 0.0598116	valid_set's f1: 0.965489[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=22400)[0m [6000]	valid_set's binary_logloss: 0.0608523	valid_set's f1: 0.966505[32m [repeated 7x across cluster][0m

[36m(_dystack pid=14208)[0m 	0.9675	 = Validation score   (f1)
[36m(_dystack pid=14208)[0m 	132.8s	 = Training   runtime
[36m(_dystack pid=14208)[0m 	51.79s	 = Validation runtime
[36m(_dystack pid=14208)[0m Fitting model: LightGBM_BAG_L1 ... Training model for up to 3442.75s of the 5240.25s of remaining time.
[36m(_dystack pid=14208)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=2, gpus=0, memory=0.23%)


[36m(_ray_fit pid=8328)[0m [1000]	valid_set's binary_logloss: 0.0611877	valid_set's f1: 0.960582
[36m(_ray_fit pid=24548)[0m [1000]	valid_set's binary_logloss: 0.0657267	valid_set's f1: 0.955569
[36m(_ray_fit pid=8328)[0m [2000]	valid_set's binary_logloss: 0.0575243	valid_set's f1: 0.965253[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=8328)[0m [3000]	valid_set's binary_logloss: 0.0570503	valid_set's f1: 0.96631[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=8328)[0m [4000]	valid_set's binary_logloss: 0.0580491	valid_set's f1: 0.96713[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=8328)[0m [5000]	valid_set's binary_logloss: 0.059545	valid_set's f1: 0.966895[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=20476)[0m [6000]	valid_set's binary_logloss: 0.0584109	valid_set's f1: 0.966465[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=24256)[0m [7000]	valid_set's binary_logloss: 0.061186	valid_set's f1: 0.969855[32m [repeated 6x a

[36m(_dystack pid=14208)[0m 	0.9675	 = Validation score   (f1)
[36m(_dystack pid=14208)[0m 	135.31s	 = Training   runtime
[36m(_dystack pid=14208)[0m 	45.56s	 = Validation runtime
[36m(_dystack pid=14208)[0m Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 3294.71s of the 5092.22s of remaining time.
[36m(_dystack pid=14208)[0m 	0.9627	 = Validation score   (f1)
[36m(_dystack pid=14208)[0m 	2.53s	 = Training   runtime
[36m(_dystack pid=14208)[0m 	1.67s	 = Validation runtime
[36m(_dystack pid=14208)[0m Fitting model: RandomForestEntr_BAG_L1 ... Training model for up to 3290.31s of the 5087.81s of remaining time.
[36m(_dystack pid=14208)[0m 	0.9614	 = Validation score   (f1)
[36m(_dystack pid=14208)[0m 	2.03s	 = Training   runtime
[36m(_dystack pid=14208)[0m 	1.68s	 = Validation runtime
[36m(_dystack pid=14208)[0m Fitting model: CatBoost_BAG_L1 ... Training model for up to 3286.40s of the 5083.90s of remaining time.
[36m(_dystack pid=14208)[0m 

[36m(_ray_fit pid=16784)[0m [1000]	valid_set's binary_logloss: 0.0566452	valid_set's f1: 0.962888
[36m(_ray_fit pid=16784)[0m [2000]	valid_set's binary_logloss: 0.0558062	valid_set's f1: 0.965337[32m [repeated 8x across cluster][0m
[36m(_ray_fit pid=16784)[0m [3000]	valid_set's binary_logloss: 0.0568761	valid_set's f1: 0.967104[32m [repeated 7x across cluster][0m


[36m(_ray_fit pid=16784)[0m 	Ran out of time, early stopping on iteration 3371. Best iteration is:
[36m(_ray_fit pid=16784)[0m 	[3366]	valid_set's binary_logloss: 0.0576004	valid_set's f1: 0.968094
[36m(_ray_fit pid=23988)[0m 	Ran out of time, stopping training early. (Stopping on epoch 78)[32m [repeated 7x across cluster][0m
[36m(_dystack pid=14208)[0m 	0.9685	 = Validation score   (f1)
[36m(_dystack pid=14208)[0m 	61.51s	 = Training   runtime
[36m(_dystack pid=14208)[0m 	107.11s	 = Validation runtime
[36m(_ray_fit pid=13232)[0m 	Ran out of time, early stopping on iteration 3303. Best iteration is:[32m [repeated 6x across cluster][0m
[36m(_ray_fit pid=13232)[0m 	[2885]	valid_set's binary_logloss: 0.0579046	valid_set's f1: 0.965852[32m [repeated 6x across cluster][0m
[36m(_dystack pid=14208)[0m Fitting model: WeightedEnsemble_L2 ... Training model for up to 360.00s of the 1790.53s of remaining time.
[36m(_dystack pid=14208)[0m 	Ensemble Weights: {'NeuralNetTor

Results for combined:
                           model  score_test score_val eval_metric  \
0   RandomForestEntr_BAG_L2_FULL    0.973865      None          f1   
1      LightGBMLarge_BAG_L1_FULL    0.972153      None          f1   
2            XGBoost_BAG_L1_FULL    0.971640      None          f1   
3         LightGBMXT_BAG_L1_FULL    0.970114      None          f1   
4           LightGBM_BAG_L1_FULL    0.969751      None          f1   
5     NeuralNetTorch_BAG_L1_FULL    0.968885      None          f1   
6   RandomForestGini_BAG_L1_FULL    0.964867      None          f1   
7           CatBoost_BAG_L1_FULL    0.964247      None          f1   
8   RandomForestEntr_BAG_L1_FULL    0.962901      None          f1   
9     ExtraTreesGini_BAG_L1_FULL    0.956632      None          f1   
10    ExtraTreesEntr_BAG_L1_FULL    0.954728      None          f1   
11   NeuralNetFastAI_BAG_L1_FULL    0.915713      None          f1   

    pred_time_test  pred_time_val     fit_time  pred_time_test_marg

In [6]:
true_file = "../data/ie/data_ie.csv"
ie_true_data = pd.read_csv(true_file)

# List of counter example files for the EI zone
counter_files = {
    "combined": pd.read_csv("../data/ie/data_ie_negative_sample.csv")
}

# For EI zone, our CSV files have columns B1 to B105.
combined_data = prepare_data(ie_true_data, counter_files, max_index=105)

# Dictionary to store the combined DataFrames and later model results
generateModels(combined_data, 'ie', 105, time_limit=21000)

  ie_true_data = pd.read_csv(true_file)
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       5.91 GB / 15.35 GB (38.5%)
Disk Space Avail:   44.11 GB / 100.00 GB (44.1%)
Presets specified: ['good_quality', 'optimize_for_deployment']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked o

Results for combined:
                           model  score_test score_val eval_metric  \
0   RandomForestEntr_BAG_L2_FULL    0.985571      None          f1   
1   RandomForestGini_BAG_L2_FULL    0.985450      None          f1   
2       WeightedEnsemble_L3_FULL    0.985384      None          f1   
3    NeuralNetFastAI_BAG_L2_FULL    0.985351      None          f1   
4           LightGBM_BAG_L1_FULL    0.982848      None          f1   
5         LightGBMXT_BAG_L1_FULL    0.982693      None          f1   
6            XGBoost_BAG_L1_FULL    0.975840      None          f1   
7     ExtraTreesGini_BAG_L1_FULL    0.974455      None          f1   
8   RandomForestGini_BAG_L1_FULL    0.973444      None          f1   
9   RandomForestEntr_BAG_L1_FULL    0.973047      None          f1   
10    ExtraTreesEntr_BAG_L1_FULL    0.972725      None          f1   
11          CatBoost_BAG_L1_FULL    0.970850      None          f1   
12    NeuralNetTorch_BAG_L1_FULL    0.964076      None          f1  

In [9]:
true_file = "../data/ez/data_ez.csv"
ez_true_data = pd.read_csv(true_file)

# List of counter example files for the EI zone
counter_files = {
    "combined": pd.read_csv('../data/ez/data_ez_negative_sample.csv')
}

# For EI zone, our CSV files have columns B1 to B550.
combined_data = prepare_data(ez_true_data, counter_files, max_index=550)

# Dictionary to store the combined DataFrames and later model results
generateModels(combined_data, 'ez', 550, time_limit=21000)

  ez_true_data = pd.read_csv(true_file)
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       5.08 GB / 15.35 GB (33.1%)
Disk Space Avail:   45.24 GB / 100.00 GB (45.2%)
Presets specified: ['good_quality', 'optimize_for_deployment']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked o

[36m(_ray_fit pid=30228)[0m [1000]	valid_set's binary_logloss: 0.240281	valid_set's f1: 0.906331
[36m(_ray_fit pid=13476)[0m [1000]	valid_set's binary_logloss: 0.22908	valid_set's f1: 0.912018[32m [repeated 4x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m


[36m(_dystack pid=16424)[0m 	0.9114	 = Validation score   (f1)
[36m(_dystack pid=16424)[0m 	116.78s	 = Training   runtime
[36m(_dystack pid=16424)[0m 	4.86s	 = Validation runtime
[36m(_dystack pid=16424)[0m Fitting model: LightGBM_BAG_L1 ... Training model for up to 3347.16s of the 5087.03s of remaining time.
[36m(_dystack pid=16424)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=2, gpus=0, memory=3.21%)


[36m(_ray_fit pid=27592)[0m [1000]	valid_set's binary_logloss: 0.225228	valid_set's f1: 0.913907
[36m(_ray_fit pid=20084)[0m [1000]	valid_set's binary_logloss: 0.243158	valid_set's f1: 0.909091


[36m(_dystack pid=16424)[0m 	0.9086	 = Validation score   (f1)
[36m(_dystack pid=16424)[0m 	155.85s	 = Training   runtime
[36m(_dystack pid=16424)[0m 	4.5s	 = Validation runtime
[36m(_dystack pid=16424)[0m Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 3183.49s of the 4923.37s of remaining time.
[36m(_dystack pid=16424)[0m 	0.8727	 = Validation score   (f1)
[36m(_dystack pid=16424)[0m 	7.79s	 = Training   runtime
[36m(_dystack pid=16424)[0m 	6.29s	 = Validation runtime
[36m(_dystack pid=16424)[0m Fitting model: RandomForestEntr_BAG_L1 ... Training model for up to 3168.66s of the 4908.54s of remaining time.
[36m(_dystack pid=16424)[0m 	0.8768	 = Validation score   (f1)
[36m(_dystack pid=16424)[0m 	7.4s	 = Training   runtime
[36m(_dystack pid=16424)[0m 	6.26s	 = Validation runtime
[36m(_dystack pid=16424)[0m Fitting model: CatBoost_BAG_L1 ... Training model for up to 3154.29s of the 4894.16s of remaining time.
[36m(_dystack pid=16424)[0m 	Fi

Results for combined:
                           model  score_test score_val eval_metric  \
0       WeightedEnsemble_L3_FULL    0.929105      None          f1   
1           CatBoost_BAG_L2_FULL    0.928802      None          f1   
2      LightGBMLarge_BAG_L2_FULL    0.922367      None          f1   
3         LightGBMXT_BAG_L1_FULL    0.913837      None          f1   
4            XGBoost_BAG_L1_FULL    0.912855      None          f1   
5           LightGBM_BAG_L1_FULL    0.910082      None          f1   
6      LightGBMLarge_BAG_L1_FULL    0.909850      None          f1   
7           CatBoost_BAG_L1_FULL    0.909310      None          f1   
8   RandomForestEntr_BAG_L1_FULL    0.882910      None          f1   
9   RandomForestGini_BAG_L1_FULL    0.879931      None          f1   
10    ExtraTreesEntr_BAG_L1_FULL    0.875536      None          f1   
11   NeuralNetFastAI_BAG_L1_FULL    0.875286      None          f1   
12    ExtraTreesGini_BAG_L1_FULL    0.873179      None          f1  

In [5]:
true_file = "../data/ze/data_ze.csv"
ze_true_data = pd.read_csv(true_file)

# List of counter example files for the EI zone
counter_files = {
    "combined": pd.read_csv('../data/ze/data_ze_negative_sample.csv')
}

# For EZ zone, our CSV files have columns B1 to B550.
combined_data = prepare_data(ze_true_data, counter_files, max_index=550)

# Dictionary to store the combined DataFrames and later model results
generateModels(combined_data, 'ze', 550, time_limit=21000)

  ze_true_data = pd.read_csv(true_file)
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       4.46 GB / 15.35 GB (29.0%)
Disk Space Avail:   44.66 GB / 100.00 GB (44.7%)
Presets specified: ['good_quality', 'optimize_for_deployment']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked o

[36m(_ray_fit pid=14628)[0m [1000]	valid_set's binary_logloss: 0.244408	valid_set's f1: 0.911004


[36m(_dystack pid=16408)[0m 	0.9076	 = Validation score   (f1)
[36m(_dystack pid=16408)[0m 	87.82s	 = Training   runtime
[36m(_dystack pid=16408)[0m 	4.35s	 = Validation runtime
[36m(_dystack pid=16408)[0m Fitting model: LightGBM_BAG_L1 ... Training model for up to 3379.84s of the 5120.55s of remaining time.
[36m(_dystack pid=16408)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=2, gpus=0, memory=3.86%)


[36m(_ray_fit pid=27960)[0m [1000]	valid_set's binary_logloss: 0.237465	valid_set's f1: 0.9121[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m


[36m(_dystack pid=16408)[0m 	0.908	 = Validation score   (f1)
[36m(_dystack pid=16408)[0m 	104.13s	 = Training   runtime
[36m(_dystack pid=16408)[0m 	4.4s	 = Validation runtime
[36m(_dystack pid=16408)[0m Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 3267.84s of the 5008.55s of remaining time.
[36m(_dystack pid=16408)[0m 	0.8762	 = Validation score   (f1)
[36m(_dystack pid=16408)[0m 	6.96s	 = Training   runtime
[36m(_dystack pid=16408)[0m 	6.27s	 = Validation runtime
[36m(_dystack pid=16408)[0m Fitting model: RandomForestEntr_BAG_L1 ... Training model for up to 3253.88s of the 4994.58s of remaining time.
[36m(_dystack pid=16408)[0m 	0.8799	 = Validation score   (f1)
[36m(_dystack pid=16408)[0m 	7.08s	 = Training   runtime
[36m(_dystack pid=16408)[0m 	6.17s	 = Validation runtime
[36m(_dystack pid=16408)[0m Fitting model: CatBoost_BAG_L1 ... Training model for up to 3239.91s of the 4980.62s of remaining time.
[36m(_dystack pid=16408)[0m 	Fi

Results for combined:
                           model  score_test score_val eval_metric  \
0         LightGBMXT_BAG_L1_FULL    0.911691      None          f1   
1         LightGBMXT_BAG_L2_FULL    0.911343      None          f1   
2      LightGBMLarge_BAG_L1_FULL    0.911308      None          f1   
3            XGBoost_BAG_L1_FULL    0.910693      None          f1   
4           LightGBM_BAG_L1_FULL    0.909899      None          f1   
5           CatBoost_BAG_L1_FULL    0.901141      None          f1   
6    NeuralNetFastAI_BAG_L1_FULL    0.896934      None          f1   
7   RandomForestEntr_BAG_L1_FULL    0.884800      None          f1   
8   RandomForestGini_BAG_L1_FULL    0.884798      None          f1   
9     NeuralNetTorch_BAG_L1_FULL    0.880514      None          f1   
10    ExtraTreesEntr_BAG_L1_FULL    0.876652      None          f1   
11    ExtraTreesGini_BAG_L1_FULL    0.874442      None          f1   

    pred_time_test  pred_time_val     fit_time  pred_time_test_marg