In [1]:
# 1. Import library
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split
import pandas as pd
import warnings
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
warnings.filterwarnings("ignore", message=".*load_learner.*insecure pickle.*")

# 2. Load the data
train_df = pd.read_csv('../Data/original/train_2025.csv') 
test_df = pd.read_csv('../Data/original/test_2025.csv') 

train_df.drop(columns="claim_number", inplace=True)
test_id = test_df['claim_number']
test_df.drop(columns=["claim_number", "fraud"], inplace=True)

In [None]:
timestamp = datetime.now().strftime("%m%d_%H%M")
predictor = TabularPredictor(
    label="fraud",
    eval_metric="f1",
    problem_type="binary",
    path=f"../AutogluonModels/Model_{timestamp}"
).fit(
    train_data=train_df,
    presets="experimental",
    verbosity=2
)

Preset alias specified: 'experimental' maps to 'experimental_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.12
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       11.82 GB / 31.93 GB (37.0%)
Disk Space Avail:   293.75 GB / 935.97 GB (31.4%)
Presets specified: ['experimental']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Runn

In [8]:
predictor.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L3,0.373548,f1,36.478376,1407.055413,0.004996,4.449182,3,True,181
1,NeuralNetFastAI_r111_BAG_L2,0.370380,f1,34.194842,1240.653982,0.262485,14.064494,2,True,180
2,NeuralNetFastAI_r11_BAG_L2,0.365030,f1,34.801233,1307.620676,0.868876,81.031188,2,True,155
3,NeuralNetFastAI_r134_BAG_L2,0.362901,f1,34.401792,1265.479524,0.469435,38.890035,2,True,175
4,NeuralNetFastAI_r102_BAG_L2,0.362052,f1,34.366167,1244.828349,0.433811,18.238861,2,True,146
...,...,...,...,...,...,...,...,...,...,...
176,XGBoost_r98_BAG_L1,0.000000,f1,0.098057,7.134554,0.098057,7.134554,1,True,58
177,NeuralNetTorch_r121_BAG_L1,0.000000,f1,0.486609,43.042774,0.486609,43.042774,1,True,93
178,XGBoost_r31_BAG_L1,0.000000,f1,1.265317,4.208216,1.265317,4.208216,1,True,80
179,LightGBM_r196_BAG_L2,0.000000,f1,33.987560,1230.864747,0.055203,4.275259,2,True,163


In [11]:
# 4. Predict on the test set
predictions = predictor.predict(test_df)

test_df = pd.read_csv('../Data/original/test_2025.csv')
# 5. Save predictions to CSV
submission = pd.DataFrame({
    "claim_number": test_id,  # Important: use the original claim_number
    "fraud": predictions                      # Your predicted fraud labels (0 or 1)
})
submission.to_csv("../Submit/submissions/submission.csv", index=False)