# Necessary library installations

In [1]:
# Installation of AutoML SOTA library and dask
!pip install autogluon
!pip install dask



# Some simple Feature Engineering and Data Cleaning

In [None]:
import numpy as np
import pandas as pd
import os
from autogluon.tabular  import TabularDataset, TabularPredictor


X_train = pd.read_csv('X_train_Hi5.csv')


In [31]:
X_test = pd.read_csv('X_test_Hi5.csv',low_memory=False)

In [32]:
Final_predi=pd.read_csv("y_test_Hi5.csv", delimiter=',')
 # This is the file that contains the predictions of the models that we have trained in the previous steps

In [33]:
y_test=Final_predi['piezo_groundwater_level_category'].to_numpy()

In [34]:
# Remove columns with a missing rate higher than 50%
missing_rates = X_train.isnull().mean()
columns_to_keep = missing_rates[missing_rates <= 0.5].index

X_train=X_train[columns_to_keep[1:]]
X_test = X_test[columns_to_keep[1:-1]]

In [35]:
# Convert INSEE columns to float64 data type
insee_columns = X_train.filter(regex='^insee_').columns
X_train[insee_columns] = X_train[insee_columns].apply(pd.to_numeric, errors='coerce')
X_test[insee_columns] = X_test[insee_columns].apply(pd.to_numeric, errors='coerce')
print(X_train[insee_columns].dtypes)

insee_%_agri              float64
insee_pop_commune         float64
insee_med_living_level    float64
insee_%_ind               float64
insee_%_const             float64
dtype: object


In [36]:
non_numeric_cols = X_test.select_dtypes(include=['object', 'category']).columns
time_cols = [col for col in X_train.columns if 'date' in col.lower()]
categorical_cols = [col for col in non_numeric_cols if col not in time_cols]

In [37]:
# Define some preprocessing functions
def date(df):
    df['piezo_measurement_date'] = pd.to_datetime(df['piezo_measurement_date'])
    df['year'] = df['piezo_measurement_date'].dt.year
    df['month'] = df['piezo_measurement_date'].dt.month
    df['day'] = df['piezo_measurement_date'].dt.day
    df = df.drop(columns=['piezo_measurement_date'])
    return df

def engineer_features(df):

    for window in [7, 14]:
        df[f'temp_avg_{window}d'] = df.groupby('piezo_station_bss_code')['meteo_temperature_avg'] \
            .rolling(window=window, min_periods=1) \
            .mean() \
            .reset_index(0, drop=True)

        df[f'rain_sum_{window}d'] = df.groupby('piezo_station_bss_code')['meteo_rain_height'] \
            .rolling(window=window, min_periods=1) \
            .sum() \
            .reset_index(0, drop=True)
    df['rain_temp_interaction'] = df['meteo_rain_height'] * df['meteo_temperature_avg']
    return df

In [38]:
columns_to_drop = ['piezo_station_update_date', 'meteo_date', 'hydro_observation_date_elab']
X_train = X_train.drop(columns=columns_to_drop)
X_test = X_test.drop(columns=columns_to_drop)

# DateTime preprocessing
X_train=date(X_train)
X_test=date(X_test)

# Preliminary Feature Engineering
X_train=engineer_features(X_train)
X_test=engineer_features(X_test)

In [39]:
non_numeric_cols = X_test.select_dtypes(include=['object', 'category']).columns
numeric_cols=X_train.select_dtypes(include=['number']).columns
time_cols = [col for col in X_train.columns if 'date' in col.lower()]
categorical_cols = [col for col in non_numeric_cols if col not in time_cols]

In [40]:
for col in numeric_cols:
    X_train[col].fillna(X_train[col].median(), inplace=True)
    X_test[col].fillna(X_test[col].median(), inplace=True)
for col in categorical_cols:
    X_train[col].fillna('nan', inplace=True)
    X_test[col].fillna('nan', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train[col].fillna(X_train[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_test[col].fillna(X_test[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we 

# Model Training

In [44]:
print(X_train.dtypes)


piezo_station_department_code         object
piezo_station_investigation_depth    float64
piezo_station_department_name         object
piezo_station_commune_code_insee      object
piezo_station_pe_label                object
                                      ...   
temp_avg_7d                          float64
rain_sum_7d                          float64
temp_avg_14d                         float64
rain_sum_14d                         float64
rain_temp_interaction                float64
Length: 95, dtype: object


In [45]:
import optuna
from autogluon.tabular import TabularDataset, TabularPredictor

label_column ='piezo_groundwater_level_category'

def objective(trial):
    # Suggest hyperparameters for XGBoost
    hyperparameters = {
        
        'GBM': {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'early_stopping_rounds': trial.suggest_int('early_stopping_rounds', 10, 50),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        # Add additional hyperparameters if needed, based on GBM's supported parameters
            },
        'XGB': {
            'n_estimators': trial.suggest_int('n_estimators', 50, 200),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        },
    }

    # Train TabularPredictor
    predictor = TabularPredictor(label=label_column, eval_metric='f1_weighted')
    try:
        predictor.fit(train_data=X_train, hyperparameters=hyperparameters, verbosity=3)
        # Evaluate predictions
        predictions = predictor.predict(X_test)
        score = predictor.evaluate_predictions(y_true=X_test[label_column], y_pred=predictions)
        return -score['f1_weighted']  # Optuna minimizes, so return negative metric
    except Exception as e:
        print(f"Trial failed with exception: {e}")
        return float('inf')  # Return a high value for failed trials


# Create the Optuna study
study = optuna.create_study(direction='maximize')  # Maximize f1_weighted
study.optimize(objective, n_trials=10)



[I 2025-01-01 20:14:43,035] A new study created in memory with name: no-name-32a4e645-9630-472c-9db4-833669e472c0
No path specified. Models will be saved in: "AutogluonModels\ag-20250101_191443"
Verbosity: 3 (Detailed Logging)
AutoGluon Version:  1.2
Python Version:     3.11.7
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22631
CPU Count:          12
GPU Count:          0
Memory Avail:       13.15 GB / 39.32 GB (33.4%)
Disk Space Avail:   180.19 GB / 449.47 GB (40.1%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Rec

[0]	validation_0-mlogloss:1.56643	validation_0-_f1_weighted:-0.33302
[50]	validation_0-mlogloss:1.20327	validation_0-_f1_weighted:-0.54159
[100]	validation_0-mlogloss:1.10023	validation_0-_f1_weighted:-0.59996
[150]	validation_0-mlogloss:1.02983	validation_0-_f1_weighted:-0.63421
[177]	validation_0-mlogloss:0.99427	validation_0-_f1_weighted:-0.65219


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_191443\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_191443\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.6522	 = Validation score   (f1_weighted)
	401.44s	 = Training   runtime
	0.49s	 = Validation runtime
	57225.6	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_191443\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_191443\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.57540	validation_0-_f1_weighted:-0.42503
[50]	validation_0-mlogloss:0.99715	validation_0-_f1_weighted:-0.71147
[100]	validation_0-mlogloss:0.86415	validation_0-_f1_weighted:-0.75259
[109]	validation_0-mlogloss:0.85171	validation_0-_f1_weighted:-0.75648


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.7565	 = Validation score   (f1_weighted)
	406.87s	 = Training   runtime
	0.73s	 = Validation runtime
	38626.9	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_192545\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60597	validation_0-_f1_weighted:-0.36357
[50]	validation_0-mlogloss:1.48378	validation_0-_f1_weighted:-0.47441
[100]	validation_0-mlogloss:1.40605	validation_0-_f1_weighted:-0.50004
[141]	validation_0-mlogloss:1.36038	validation_0-_f1_weighted:-0.51125


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.5113	 = Validation score   (f1_weighted)
	376.17s	 = Training   runtime
	0.4s	 = Validation runtime
	71588.5	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_193711\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sav

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.54742	validation_0-_f1_weighted:-0.41228
[50]	validation_0-mlogloss:0.97479	validation_0-_f1_weighted:-0.68662
[100]	validation_0-mlogloss:0.87250	validation_0-_f1_weighted:-0.72950
[104]	validation_0-mlogloss:0.86401	validation_0-_f1_weighted:-0.73338


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.7334	 = Validation score   (f1_weighted)
	226.68s	 = Training   runtime
	0.4s	 = Validation runtime
	70193.1	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_194809\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sav

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60122	validation_0-_f1_weighted:-0.28396
[50]	validation_0-mlogloss:1.46330	validation_0-_f1_weighted:-0.36249
[93]	validation_0-mlogloss:1.42857	validation_0-_f1_weighted:-0.38255


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.3826	 = Validation score   (f1_weighted)
	263.76s	 = Training   runtime
	0.35s	 = Validation runtime
	80336.3	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_195511\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60708	validation_0-_f1_weighted:-0.36692
[50]	validation_0-mlogloss:1.51380	validation_0-_f1_weighted:-0.43772
[100]	validation_0-mlogloss:1.45017	validation_0-_f1_weighted:-0.45462
[116]	validation_0-mlogloss:1.43345	validation_0-_f1_weighted:-0.45945


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.4595	 = Validation score   (f1_weighted)
	451.63s	 = Training   runtime
	0.51s	 = Validation runtime
	55977.9	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_200300\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60745	validation_0-_f1_weighted:-0.34307
[50]	validation_0-mlogloss:1.52882	validation_0-_f1_weighted:-0.40021
[100]	validation_0-mlogloss:1.47616	validation_0-_f1_weighted:-0.41887
[150]	validation_0-mlogloss:1.43665	validation_0-_f1_weighted:-0.43212
[189]	validation_0-mlogloss:1.41250	validation_0-_f1_weighted:-0.44037


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.4404	 = Validation score   (f1_weighted)
	467.75s	 = Training   runtime
	0.43s	 = Validation runtime
	66219.2	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_201531\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60798	validation_0-_f1_weighted:-0.30512
[50]	validation_0-mlogloss:1.54940	validation_0-_f1_weighted:-0.34338
[72]	validation_0-mlogloss:1.53164	validation_0-_f1_weighted:-0.34795


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.3479	 = Validation score   (f1_weighted)
	227.19s	 = Training   runtime
	0.32s	 = Validation runtime
	88535.0	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_202803\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60187	validation_0-_f1_weighted:-0.36514
[50]	validation_0-mlogloss:1.38382	validation_0-_f1_weighted:-0.48957
[100]	validation_0-mlogloss:1.28702	validation_0-_f1_weighted:-0.52939
[150]	validation_0-mlogloss:1.23165	validation_0-_f1_weighted:-0.55481
[154]	validation_0-mlogloss:1.22818	validation_0-_f1_weighted:-0.55593


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.5559	 = Validation score   (f1_weighted)
	480.49s	 = Training   runtime
	0.58s	 = Validation runtime
	49211.2	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_203629\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60376	validation_0-_f1_weighted:-0.28329
[50]	validation_0-mlogloss:1.46727	validation_0-_f1_weighted:-0.38011
[100]	validation_0-mlogloss:1.41502	validation_0-_f1_weighted:-0.40392
[150]	validation_0-mlogloss:1.38257	validation_0-_f1_weighted:-0.42479
[195]	validation_0-mlogloss:1.36197	validation_0-_f1_weighted:-0.43859


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.4386	 = Validation score   (f1_weighted)
	463.49s	 = Training   runtime
	0.46s	 = Validation runtime
	62191.5	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_204927\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.57120	validation_0-_f1_weighted:-0.31393
[50]	validation_0-mlogloss:1.28608	validation_0-_f1_weighted:-0.48602
[100]	validation_0-mlogloss:1.20377	validation_0-_f1_weighted:-0.53575
[150]	validation_0-mlogloss:1.14739	validation_0-_f1_weighted:-0.56410
[162]	validation_0-mlogloss:1.13323	validation_0-_f1_weighted:-0.57116


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.5712	 = Validation score   (f1_weighted)
	388.68s	 = Training   runtime
	0.49s	 = Validation runtime
	58175.8	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_210206\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.57021	validation_0-_f1_weighted:-0.43683
[50]	validation_0-mlogloss:0.96461	validation_0-_f1_weighted:-0.71544
[54]	validation_0-mlogloss:0.94838	validation_0-_f1_weighted:-0.72102


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.721	 = Validation score   (f1_weighted)
	254.36s	 = Training   runtime
	0.44s	 = Validation runtime
	64811.1	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_211326\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sav

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.56092	validation_0-_f1_weighted:-0.41161
[50]	validation_0-mlogloss:1.00973	validation_0-_f1_weighted:-0.67579
[100]	validation_0-mlogloss:0.91671	validation_0-_f1_weighted:-0.71338
[132]	validation_0-mlogloss:0.87007	validation_0-_f1_weighted:-0.73114


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.7311	 = Validation score   (f1_weighted)
	397.57s	 = Training   runtime
	0.59s	 = Validation runtime
	47766.4	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_212239\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.59245	validation_0-_f1_weighted:-0.38967
[50]	validation_0-mlogloss:1.24468	validation_0-_f1_weighted:-0.56547
[100]	validation_0-mlogloss:1.14372	validation_0-_f1_weighted:-0.60898
[150]	validation_0-mlogloss:1.09370	validation_0-_f1_weighted:-0.63214
[174]	validation_0-mlogloss:1.07674	validation_0-_f1_weighted:-0.63928


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.6393	 = Validation score   (f1_weighted)
	512.65s	 = Training   runtime
	0.66s	 = Validation runtime
	43150.6	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_213408\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.59864	validation_0-_f1_weighted:-0.43692
[50]	validation_0-mlogloss:1.26265	validation_0-_f1_weighted:-0.64664
[90]	validation_0-mlogloss:1.13096	validation_0-_f1_weighted:-0.68061


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.6806	 = Validation score   (f1_weighted)
	389.61s	 = Training   runtime
	0.55s	 = Validation runtime
	51057.3	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_214731\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.58916	validation_0-_f1_weighted:-0.34496
[50]	validation_0-mlogloss:1.29227	validation_0-_f1_weighted:-0.49763
[100]	validation_0-mlogloss:1.22060	validation_0-_f1_weighted:-0.53825
[120]	validation_0-mlogloss:1.19964	validation_0-_f1_weighted:-0.55122


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.5512	 = Validation score   (f1_weighted)
	351.59s	 = Training   runtime
	0.4s	 = Validation runtime
	71447.4	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_215858\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sav

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.54272	validation_0-_f1_weighted:-0.39108
[50]	validation_0-mlogloss:1.01566	validation_0-_f1_weighted:-0.65381
[100]	validation_0-mlogloss:0.90116	validation_0-_f1_weighted:-0.70696
[143]	validation_0-mlogloss:0.82919	validation_0-_f1_weighted:-0.73735


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.7373	 = Validation score   (f1_weighted)
	417.91s	 = Training   runtime
	0.55s	 = Validation runtime
	51887.0	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_220934\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.59867	validation_0-_f1_weighted:-0.32626
[50]	validation_0-mlogloss:1.39389	validation_0-_f1_weighted:-0.42757
[100]	validation_0-mlogloss:1.32825	validation_0-_f1_weighted:-0.46467
[150]	validation_0-mlogloss:1.29256	validation_0-_f1_weighted:-0.48691
[173]	validation_0-mlogloss:1.27953	validation_0-_f1_weighted:-0.49673


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.4967	 = Validation score   (f1_weighted)
	352.29s	 = Training   runtime
	0.53s	 = Validation runtime
	53288.9	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_222115\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.56190	validation_0-_f1_weighted:-0.38918
[50]	validation_0-mlogloss:1.09378	validation_0-_f1_weighted:-0.62391
[73]	validation_0-mlogloss:1.04061	validation_0-_f1_weighted:-0.64981


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.6498	 = Validation score   (f1_weighted)
	223.89s	 = Training   runtime
	0.32s	 = Validation runtime
	87203.9	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223127\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949\learner.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949\predictor.pkl
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quant

[0]	validation_0-mlogloss:1.60476	validation_0-_f1_weighted:-0.32547
[50]	validation_0-mlogloss:1.46532	validation_0-_f1_weighted:-0.39349
[100]	validation_0-mlogloss:1.40555	validation_0-_f1_weighted:-0.42005
[105]	validation_0-mlogloss:1.40098	validation_0-_f1_weighted:-0.42307


Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949\models\XGBoost\model.pkl
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949\utils\attr\XGBoost\y_pred_proba_val.pkl
	0.4231	 = Validation score   (f1_weighted)
	246.54s	 = Training   runtime
	0.33s	 = Validation runtime
	85413.4	 = Inference  throughput (rows/s | 28304 batch size)
Saving c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949\models\trainer.pkl
Loading: c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_223949\utils\attr\XGBoost\y_pred_proba_val.pkl
Model configs that will be trained (in order):
	WeightedEnsemble_L2: 	{'ag_args': {'valid_base': False, 'name_bag_suffix': '', 'model_type': <class 'autogluon.core.models.greedy_ensemble.greedy_weighted_ensemble_model.GreedyWeightedEnsembleModel'>, 'priority': 0}, 'ag_args_ensemble': {'save_bag_folds': True}}
Fitting model: WeightedEnsemble_L2 ...
	Fitting WeightedEnsemble_L2 with 'num_gpus': 0, 'num_cpus': 12
Sa

Trial failed with exception: 'piezo_groundwater_level_category'
Best hyperparameters: {'n_estimators': 178, 'max_depth': 6, 'learning_rate': 0.28333206115755916, 'subsample': 0.5198366253240017, 'colsample_bytree': 0.8522255121629868}
Best score: inf


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "c:\Users\JLASSI\Downloads\HAck\AutogluonModels\ag-20250101_224716"
Train Data Rows:    2830316
Train Data Columns: 94
Label Column:       piezo_groundwater_level_category
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == object).
	5 unique label values:  ['High', 'Very High', 'Very Low', 'Low', 'Average']
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quantile'])
Problem Type:       multiclass
Preprocessing data ...
Train Data Class Count: 5
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGene

AssertionError: Unknown model type specified in hyperparameters: 'XGBoost'. Valid model types: ['RF', 'XT', 'KNN', 'GBM', 'CAT', 'XGB', 'NN_TORCH', 'LR', 'FASTAI', 'TRANSF', 'AG_TEXT_NN', 'AG_IMAGE_NN', 'AG_AUTOMM', 'FT_TRANSFORMER', 'TABPFN', 'TABPFNMIX', 'FASTTEXT', 'ENS_WEIGHTED', 'SIMPLE_ENS_WEIGHTED', 'IM_RULEFIT', 'IM_GREEDYTREE', 'IM_FIGS', 'IM_HSTREE', 'IM_BOOSTEDRULES', 'VW', 'DUMMY']

In [47]:
# Print the best parameters and score
print("Best hyperparameters:", study.best_params)
print("Best score:", study.best_value)



Best hyperparameters: {'n_estimators': 178, 'max_depth': 6, 'learning_rate': 0.28333206115755916, 'subsample': 0.5198366253240017, 'colsample_bytree': 0.8522255121629868}
Best score: inf


In [48]:
# Extract best hyperparameters for each model type
best_hyperparameters = {
    'XGB': {
        'n_estimators': study.best_params.get('n_estimators', 100),
        'max_depth': study.best_params.get('max_depth', 6),
        'learning_rate': study.best_params.get('learning_rate', 0.1),
        'subsample': study.best_params.get('subsample', 1.0),
        'colsample_bytree': study.best_params.get('colsample_bytree', 1.0),
    },
    'GBM': {
        'n_estimators': study.best_params.get('n_estimators', 100),
        'early_stopping_rounds': study.best_params.get('early_stopping_rounds', 20),
        'learning_rate': study.best_params.get('learning_rate', 0.1),
    },
}

# Train the final model with the best hyperparameters
final_predictor = TabularPredictor(label=label_column, eval_metric='f1_weighted')
final_predictor.fit(train_data=X_train, hyperparameters=best_hyperparameters, verbosity=3)


No path specified. Models will be saved in: "AutogluonModels\ag-20250101_225659"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.7
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22631
CPU Count:          12
Memory Avail:       14.42 GB / 39.32 GB (36.7%)
Disk Space Avail:   148.42 GB / 449.47 GB (33.0%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.
	presets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.
	presets='high'         : Strong accuracy with fast inference speed.
	prese

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x25b23f62c50>

In [49]:
predictions = final_predictor.predict(X_test)

In [52]:
accuracy_score = np.mean(y_test==predictions)

In [53]:
accuracy_score

0.5222886480543448