# TabNet




In [None]:
pip install pandas scikit-learn openml

Collecting openml
  Downloading openml-0.15.0-py3-none-any.whl.metadata (9.9 kB)
Collecting liac-arff>=2.4.0 (from openml)
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xmltodict (from openml)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting minio (from openml)
  Downloading minio-7.2.10-py3-none-any.whl.metadata (6.5 kB)
Collecting pycryptodome (from minio->openml)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading openml-0.15.0-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.0/158.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading minio-7.2.10-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.9/93.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Downloading pyc

In [None]:
pip install torch pytorch_tabnet

Collecting pytorch_tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytorch_tabnet
Successfully installed pytorch_tabnet-4.1.0


In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": trial.suggest_int("cat_emb_dim", 1, 3),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": trial.suggest_categorical("mask_type", ["sparsemax", "entmax"]),
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": rs.randint(1, 4),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": rs.choice(["sparsemax", "entmax"]),
    }
    return params

# Load data from OpenML, remove classes, and encode categorical features
def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Remove class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify categorical columns and encode them
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_dims = []  # Track max values to set embedding dimensions correctly

    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])  # Ensure all categorical columns are integer-encoded
        cat_dims.append(len(uniques))  # Track unique counts as embedding dimensions

    # Replace any remaining non-numeric entries with NaN and drop them or fill appropriately
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Or replace with another valid category ID if needed

    # Encode target variable
    y, _ = pd.factorize(y)  # Converts labels to numeric format

    return X, y, cat_dims

# Example configuration and arguments
class Args:
    objective = "classification"  # Or "regression" based on task
    batch_size = 32
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, X, cat_dims):
        # Get categorical column indices based on factorized object columns
        self.cat_idx = [X.columns.get_loc(col) for col in X.select_dtypes(include=['object']).columns]
        self.cat_dims = cat_dims  # Use dimensions from factorization for embedding sizes

# Task ID1: 14965

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14965)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.3465  | eval_logloss: 0.89848 |  0:01:38s
epoch 1  | loss: 0.28701 | eval_logloss: 0.42651 |  0:03:17s
epoch 2  | loss: 0.28254 | eval_logloss: 0.29386 |  0:04:55s
epoch 3  | loss: 0.28017 | eval_logloss: 0.30636 |  0:06:34s
epoch 4  | loss: 0.28278 | eval_logloss: 0.31493 |  0:08:13s
epoch 5  | loss: 0.28208 | eval_logloss: 0.39377 |  0:09:54s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.29386




Validation Accuracy: 0.8836


# Task ID2: 9977

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9977)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.3963  | eval_logloss: 1.45557 |  0:01:18s
epoch 1  | loss: 0.17881 | eval_logloss: 1.02047 |  0:02:49s
epoch 2  | loss: 0.1493  | eval_logloss: 1.04512 |  0:04:06s
epoch 3  | loss: 0.1381  | eval_logloss: 0.92759 |  0:05:23s
epoch 4  | loss: 0.12986 | eval_logloss: 0.53905 |  0:06:40s
epoch 5  | loss: 0.12656 | eval_logloss: 0.87485 |  0:07:58s
epoch 6  | loss: 0.12038 | eval_logloss: 0.46256 |  0:09:20s
epoch 7  | loss: 0.11966 | eval_logloss: 0.25007 |  0:10:38s
epoch 8  | loss: 0.11339 | eval_logloss: 1.29685 |  0:11:56s
epoch 9  | loss: 0.11283 | eval_logloss: 0.92155 |  0:13:13s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.25007




Validation Accuracy: 0.9367


# Task ID3: 34539

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=34539)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.26029 | eval_logloss: 0.68683 |  0:01:12s
epoch 1  | loss: 0.22643 | eval_logloss: 0.25801 |  0:02:26s
epoch 2  | loss: 0.22433 | eval_logloss: 0.24328 |  0:03:38s
epoch 3  | loss: 0.22227 | eval_logloss: 0.21793 |  0:04:51s
epoch 4  | loss: 0.22344 | eval_logloss: 0.21722 |  0:06:04s
epoch 5  | loss: 0.2225  | eval_logloss: 0.23177 |  0:07:18s
epoch 6  | loss: 0.2221  | eval_logloss: 0.2354  |  0:08:30s
epoch 7  | loss: 0.22352 | eval_logloss: 0.28985 |  0:09:42s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.21722




Validation Accuracy: 0.9435


# Task ID4: 146606

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146606)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.71186 | eval_logloss: 0.7747  |  0:03:32s
epoch 1  | loss: 0.67884 | eval_logloss: 0.708   |  0:07:51s
epoch 2  | loss: 0.67799 | eval_logloss: 0.69193 |  0:11:32s
epoch 3  | loss: 0.67771 | eval_logloss: 0.71725 |  0:15:06s
epoch 4  | loss: 0.67498 | eval_logloss: 0.67667 |  0:18:39s
epoch 5  | loss: 0.67353 | eval_logloss: 0.7062  |  0:22:16s
epoch 6  | loss: 0.66881 | eval_logloss: 0.72076 |  0:25:55s
epoch 7  | loss: 0.66763 | eval_logloss: 0.67615 |  0:29:32s
epoch 8  | loss: 0.66067 | eval_logloss: 0.65701 |  0:33:05s
epoch 9  | loss: 0.66065 | eval_logloss: 0.65639 |  0:36:40s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.65639




Validation Accuracy: 0.6150


# Task ID5: 7592



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=7592)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.51344 | eval_logloss: 0.8332  |  0:01:46s
epoch 1  | loss: 0.41641 | eval_logloss: 0.57792 |  0:03:35s
epoch 2  | loss: 0.40977 | eval_logloss: 0.51473 |  0:05:23s
epoch 3  | loss: 0.41555 | eval_logloss: 0.91587 |  0:07:14s
epoch 4  | loss: 0.41224 | eval_logloss: 1.37989 |  0:09:06s
epoch 5  | loss: 0.41125 | eval_logloss: 0.81665 |  0:10:55s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.51473




Validation Accuracy: 0.7848


# Task ID6: 146195



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146195)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.896   | eval_logloss: 2.01412 |  0:02:33s
epoch 1  | loss: 0.8415  | eval_logloss: 0.84725 |  0:05:04s
epoch 2  | loss: 0.83912 | eval_logloss: 6.05471 |  0:07:37s
epoch 3  | loss: 0.83894 | eval_logloss: 0.84646 |  0:10:08s
epoch 4  | loss: 0.83915 | eval_logloss: 1.60132 |  0:12:42s
epoch 5  | loss: 0.83728 | eval_logloss: 1.00331 |  0:15:13s
epoch 6  | loss: 0.83939 | eval_logloss: 0.88437 |  0:17:49s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 0.84646




Validation Accuracy: 0.6569


# Task ID7: 167119



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167119)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.77246 | eval_logloss: 1.57999 |  0:01:38s
epoch 1  | loss: 0.64025 | eval_logloss: 1.18152 |  0:03:27s
epoch 2  | loss: 0.60071 | eval_logloss: 0.81802 |  0:05:05s
epoch 3  | loss: 0.55812 | eval_logloss: 0.7299  |  0:06:43s
epoch 4  | loss: 0.54603 | eval_logloss: 1.11812 |  0:08:21s
epoch 5  | loss: 0.53139 | eval_logloss: 0.81699 |  0:09:59s
epoch 6  | loss: 0.52363 | eval_logloss: 0.4834  |  0:11:38s
epoch 7  | loss: 0.49867 | eval_logloss: 0.4671  |  0:13:18s
epoch 8  | loss: 0.48945 | eval_logloss: 0.47089 |  0:14:57s
epoch 9  | loss: 0.48366 | eval_logloss: 0.46573 |  0:16:37s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.46573




Validation Accuracy: 0.7952


# Task ID8: 167120



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167120)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.71209 | eval_logloss: 0.69413 |  0:03:37s
epoch 1  | loss: 0.69438 | eval_logloss: 0.69306 |  0:07:14s
epoch 2  | loss: 0.69331 | eval_logloss: 0.69404 |  0:10:53s
epoch 3  | loss: 0.69327 | eval_logloss: 0.69329 |  0:14:28s
epoch 4  | loss: 0.69302 | eval_logloss: 0.69295 |  0:18:02s
epoch 5  | loss: 0.69318 | eval_logloss: 0.71189 |  0:21:38s
epoch 6  | loss: 0.69329 | eval_logloss: 0.69334 |  0:25:12s
epoch 7  | loss: 0.69327 | eval_logloss: 0.70784 |  0:28:50s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.69295




Validation Accuracy: 0.5071


# Task ID11: 	168331



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168331)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.6942  | eval_logloss: 3.93489 |  0:02:32s
epoch 1  | loss: 1.38955 | eval_logloss: 2.20657 |  0:04:52s
epoch 2  | loss: 1.31262 | eval_logloss: 3.18054 |  0:07:12s
epoch 3  | loss: 1.2644  | eval_logloss: 1.50697 |  0:09:35s
epoch 4  | loss: 1.27018 | eval_logloss: 1.88719 |  0:11:54s
epoch 5  | loss: 1.21719 | eval_logloss: 1.46682 |  0:14:13s
epoch 6  | loss: 1.1868  | eval_logloss: 1.45254 |  0:16:30s
epoch 7  | loss: 1.17425 | eval_logloss: 1.47863 |  0:18:50s
epoch 8  | loss: 1.16154 | eval_logloss: 1.43581 |  0:21:39s
epoch 9  | loss: 1.14858 | eval_logloss: 1.45584 |  0:24:00s
Stop training because you reached max_epochs = 10 with best_epoch = 8 and best_eval_logloss = 1.43581




Validation Accuracy: 0.5475


# Task ID12: 	168330



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168330)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.99482 | eval_logloss: 1.24022 |  0:03:05s
epoch 1  | loss: 0.84874 | eval_logloss: 1.46023 |  0:06:13s
epoch 2  | loss: 0.80891 | eval_logloss: 0.87615 |  0:09:22s
epoch 3  | loss: 0.77896 | eval_logloss: 0.85331 |  0:12:31s
epoch 4  | loss: 0.76773 | eval_logloss: 1.10859 |  0:15:41s
epoch 5  | loss: 0.75482 | eval_logloss: 0.79825 |  0:18:58s
epoch 6  | loss: 0.75109 | eval_logloss: 0.96151 |  0:22:10s
epoch 7  | loss: 0.74612 | eval_logloss: 0.72795 |  0:25:19s
epoch 8  | loss: 0.73993 | eval_logloss: 0.77799 |  0:28:33s
epoch 9  | loss: 0.73496 | eval_logloss: 0.82595 |  0:31:47s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.72795




Validation Accuracy: 0.6983


# Task ID13: 	168335



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168335)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.31082 | eval_logloss: 0.3181  |  0:04:08s
epoch 1  | loss: 0.2099  | eval_logloss: 0.41314 |  0:08:21s
epoch 2  | loss: 0.19644 | eval_logloss: 1.19324 |  0:12:31s
epoch 3  | loss: 0.20571 | eval_logloss: 0.36119 |  0:16:54s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 0.3181




Validation Accuracy: 0.8663



# Task ID16: 	146212



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146212)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.16955 | eval_logloss: 2.33135 |  0:02:24s
epoch 1  | loss: 0.05515 | eval_logloss: 0.78685 |  0:04:48s
epoch 2  | loss: 0.0538  | eval_logloss: 0.26912 |  0:07:12s
epoch 3  | loss: 0.04524 | eval_logloss: 0.13735 |  0:09:38s
epoch 4  | loss: 0.02799 | eval_logloss: 0.10543 |  0:12:03s
epoch 5  | loss: 0.02399 | eval_logloss: 0.20159 |  0:14:27s
epoch 6  | loss: 0.03056 | eval_logloss: 0.25784 |  0:16:49s
epoch 7  | loss: 0.0208  | eval_logloss: 0.11338 |  0:19:08s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.10543




Validation Accuracy: 0.9764


# Task ID19: 	168868



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168868)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.10852 | eval_logloss: 0.09223 |  0:02:37s
epoch 1  | loss: 0.0514  | eval_logloss: 0.13615 |  0:05:18s
epoch 2  | loss: 0.04712 | eval_logloss: 0.06024 |  0:08:08s
epoch 3  | loss: 0.0495  | eval_logloss: 0.08095 |  0:11:00s
epoch 4  | loss: 0.05274 | eval_logloss: 0.05522 |  0:13:52s
epoch 5  | loss: 0.05017 | eval_logloss: 0.06555 |  0:16:45s
epoch 6  | loss: 0.04313 | eval_logloss: 0.06066 |  0:19:26s
epoch 7  | loss: 0.04361 | eval_logloss: 0.05076 |  0:22:05s
epoch 8  | loss: 0.04403 | eval_logloss: 0.59572 |  0:24:43s
epoch 9  | loss: 0.04406 | eval_logloss: 0.05768 |  0:27:22s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.05076




Validation Accuracy: 0.9809


# Task ID20: 31

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=31)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.34689 | eval_logloss: 9.24658 |  0:00:05s
epoch 1  | loss: 1.18631 | eval_logloss: 4.94214 |  0:00:08s
epoch 2  | loss: 0.83039 | eval_logloss: 6.37695 |  0:00:10s
epoch 3  | loss: 0.66874 | eval_logloss: 11.23938|  0:00:12s
epoch 4  | loss: 0.63382 | eval_logloss: 9.88428 |  0:00:14s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 4.94214




Validation Accuracy: 0.6900


# Task ID21: 10101

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=10101)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.3636  | eval_logloss: 4.46387 |  0:00:01s
epoch 1  | loss: 0.9386  | eval_logloss: 6.05811 |  0:00:03s
epoch 2  | loss: 0.60565 | eval_logloss: 4.67643 |  0:00:04s
epoch 3  | loss: 0.55462 | eval_logloss: 5.46162 |  0:00:06s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 4.46387




Validation Accuracy: 0.7200


# Task ID22: 	3913

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3913)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.92756 | eval_logloss: 8.50322 |  0:00:01s
epoch 1  | loss: 0.93712 | eval_logloss: 8.19894 |  0:00:02s
epoch 2  | loss: 1.14424 | eval_logloss: 10.02093|  0:00:03s
epoch 3  | loss: 0.73191 | eval_logloss: 11.84291|  0:00:04s
epoch 4  | loss: 1.05149 | eval_logloss: 6.07329 |  0:00:05s
epoch 5  | loss: 0.67271 | eval_logloss: 9.71726 |  0:00:06s
epoch 6  | loss: 0.6837  | eval_logloss: 5.92657 |  0:00:07s
epoch 7  | loss: 0.53478 | eval_logloss: 12.29841|  0:00:09s
epoch 8  | loss: 0.44473 | eval_logloss: 10.40043|  0:00:10s
epoch 9  | loss: 0.38226 | eval_logloss: 3.59684 |  0:00:11s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 3.59684




Validation Accuracy: 0.7714


# Task ID23: 3

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69673 | eval_logloss: 7.47299 |  0:00:28s
epoch 1  | loss: 0.69673 | eval_logloss: 7.47299 |  0:00:48s
epoch 2  | loss: 0.69673 | eval_logloss: 7.47299 |  0:01:05s
epoch 3  | loss: 0.69673 | eval_logloss: 7.47299 |  0:01:22s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 7.47299




Validation Accuracy: 0.5312


# Task ID24: 3917

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3917)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.07655 | eval_logloss: 9.02898 |  0:00:05s
epoch 1  | loss: 0.77614 | eval_logloss: 2.7201  |  0:00:10s
epoch 2  | loss: 0.49718 | eval_logloss: 3.00605 |  0:00:14s
epoch 3  | loss: 0.42752 | eval_logloss: 3.83157 |  0:00:20s
epoch 4  | loss: 0.37198 | eval_logloss: 2.68079 |  0:00:24s
epoch 5  | loss: 0.37401 | eval_logloss: 1.75649 |  0:00:29s
epoch 6  | loss: 0.3726  | eval_logloss: 1.96631 |  0:00:35s
epoch 7  | loss: 0.37644 | eval_logloss: 2.03858 |  0:00:39s
epoch 8  | loss: 0.3669  | eval_logloss: 1.95649 |  0:00:43s

Early stopping occurred at epoch 8 with best_epoch = 5 and best_eval_logloss = 1.75649




Validation Accuracy: 0.8270


# Task ID25: 9957

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9957)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.42965 | eval_logloss: 5.81784 |  0:00:02s
epoch 1  | loss: 1.3272  | eval_logloss: 5.41684 |  0:00:05s
epoch 2  | loss: 1.25499 | eval_logloss: 6.04451 |  0:00:08s
epoch 3  | loss: 0.69689 | eval_logloss: 5.74228 |  0:00:10s
epoch 4  | loss: 0.61616 | eval_logloss: 5.31628 |  0:00:12s
epoch 5  | loss: 0.56039 | eval_logloss: 6.27827 |  0:00:15s
epoch 6  | loss: 0.55947 | eval_logloss: 5.6358  |  0:00:17s
epoch 7  | loss: 0.53881 | eval_logloss: 5.55946 |  0:00:20s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 5.31628




Validation Accuracy: 0.6635


# Task ID26: 9946

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9946)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.9977  | eval_logloss: 6.53087 |  0:00:02s
epoch 1  | loss: 0.92732 | eval_logloss: 8.67054 |  0:00:05s
epoch 2  | loss: 0.86623 | eval_logloss: 9.2298  |  0:00:06s
epoch 3  | loss: 0.46641 | eval_logloss: 9.50949 |  0:00:07s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 6.53087




Validation Accuracy: 0.5877


# Task ID27: 3918

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3918)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.04891 | eval_logloss: 4.18315 |  0:00:07s
epoch 1  | loss: 1.19057 | eval_logloss: 8.25844 |  0:00:12s
epoch 2  | loss: 0.46498 | eval_logloss: 4.0222  |  0:00:18s
epoch 3  | loss: 0.29515 | eval_logloss: 12.44818|  0:00:22s
epoch 4  | loss: 0.25252 | eval_logloss: 2.88959 |  0:00:24s
epoch 5  | loss: 0.31126 | eval_logloss: 9.04523 |  0:00:27s
epoch 6  | loss: 0.26592 | eval_logloss: 1.36444 |  0:00:29s
epoch 7  | loss: 0.27141 | eval_logloss: 1.44562 |  0:00:33s
epoch 8  | loss: 0.25278 | eval_logloss: 3.28764 |  0:00:36s
epoch 9  | loss: 0.24221 | eval_logloss: 3.89003 |  0:00:39s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 1.36444




Validation Accuracy: 0.9144


# Task ID28: 3903

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3903)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.71368 | eval_logloss: 2.03737 |  0:00:04s
epoch 1  | loss: 0.52013 | eval_logloss: 1.83363 |  0:00:08s
epoch 2  | loss: 0.41295 | eval_logloss: 1.73176 |  0:00:11s
epoch 3  | loss: 0.37941 | eval_logloss: 3.11145 |  0:00:15s
epoch 4  | loss: 0.34096 | eval_logloss: 1.68083 |  0:00:20s
epoch 5  | loss: 0.33321 | eval_logloss: 1.62989 |  0:00:24s
epoch 6  | loss: 0.29487 | eval_logloss: 1.76104 |  0:00:28s
epoch 7  | loss: 0.30232 | eval_logloss: 1.59179 |  0:00:32s
epoch 8  | loss: 0.29718 | eval_logloss: 8.36491 |  0:00:36s
epoch 9  | loss: 0.2992  | eval_logloss: 1.24993 |  0:00:40s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 1.24993




Validation Accuracy: 0.8882


# Task ID29: 37

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=37)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.17244 | eval_logloss: 6.83245 |  0:00:01s
epoch 1  | loss: 0.95167 | eval_logloss: 7.76415 |  0:00:03s
epoch 2  | loss: 0.61299 | eval_logloss: 7.86767 |  0:00:05s
epoch 3  | loss: 0.61923 | eval_logloss: 10.14676|  0:00:07s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 6.83245




Validation Accuracy: 0.5714


# Task ID30: 9971

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9971)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.31847 | eval_logloss: 6.13169 |  0:00:01s
epoch 1  | loss: 1.14787 | eval_logloss: 5.58665 |  0:00:02s
epoch 2  | loss: 0.93563 | eval_logloss: 7.78689 |  0:00:04s
epoch 3  | loss: 0.76167 | eval_logloss: 6.13169 |  0:00:05s
epoch 4  | loss: 0.63493 | eval_logloss: 6.67673 |  0:00:06s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 5.58665




Validation Accuracy: 0.6496


# Task ID31: 9952

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9952)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69858 | eval_logloss: 4.06578 |  0:00:13s
epoch 1  | loss: 0.50012 | eval_logloss: 5.10602 |  0:00:27s
epoch 2  | loss: 0.46375 | eval_logloss: 3.98746 |  0:00:40s
epoch 3  | loss: 0.45129 | eval_logloss: 2.53219 |  0:00:54s
epoch 4  | loss: 0.43466 | eval_logloss: 1.06911 |  0:01:07s
epoch 5  | loss: 0.43551 | eval_logloss: 0.88662 |  0:01:21s
epoch 6  | loss: 0.42693 | eval_logloss: 0.6498  |  0:01:35s
epoch 7  | loss: 0.42591 | eval_logloss: 0.49176 |  0:01:48s
epoch 8  | loss: 0.43433 | eval_logloss: 0.62525 |  0:02:02s
epoch 9  | loss: 0.424   | eval_logloss: 0.56668 |  0:02:15s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.49176




Validation Accuracy: 0.7900


# Task ID32: 3902

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3902)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.84881 | eval_logloss: 5.9511  |  0:00:04s
epoch 1  | loss: 0.73736 | eval_logloss: 4.53803 |  0:00:08s
epoch 2  | loss: 0.46176 | eval_logloss: 2.29308 |  0:00:11s
epoch 3  | loss: 0.35295 | eval_logloss: 2.62067 |  0:00:16s
epoch 4  | loss: 0.32215 | eval_logloss: 2.29308 |  0:00:20s
epoch 5  | loss: 0.29483 | eval_logloss: 2.29308 |  0:00:24s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 2.29308




Validation Accuracy: 0.8562


# Task ID33: 49

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 4  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=49)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69534 | eval_logloss: 0.69315 |  0:00:29s
epoch 1  | loss: 0.69534 | eval_logloss: 0.69315 |  0:01:05s
epoch 2  | loss: 0.69534 | eval_logloss: 0.69315 |  0:01:24s
epoch 3  | loss: 0.69534 | eval_logloss: 0.69315 |  0:01:43s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 0.69315


  feature_importances_ = sum_explain / np.sum(sum_explain)


Validation Accuracy: 0.6510


# Task ID34: 43

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=43)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.89573 | eval_logloss: 3.83524 |  0:00:51s
epoch 1  | loss: 0.48869 | eval_logloss: 1.58229 |  0:01:35s
epoch 2  | loss: 0.3461  | eval_logloss: 1.10953 |  0:02:19s
epoch 3  | loss: 0.30336 | eval_logloss: 1.20901 |  0:03:03s
epoch 4  | loss: 0.31972 | eval_logloss: 0.9515  |  0:03:46s
epoch 5  | loss: 0.29299 | eval_logloss: 0.55348 |  0:04:31s
epoch 6  | loss: 0.2637  | eval_logloss: 0.53266 |  0:05:14s
epoch 7  | loss: 0.26325 | eval_logloss: 0.61412 |  0:05:57s
epoch 8  | loss: 0.25624 | eval_logloss: 1.01671 |  0:06:39s
epoch 9  | loss: 0.23249 | eval_logloss: 0.65936 |  0:07:22s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 0.53266




Validation Accuracy: 0.8208


# Task ID35: 9978

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9978)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.67168 | eval_logloss: 1.03767 |  0:00:23s
epoch 1  | loss: 0.25097 | eval_logloss: 1.03496 |  0:00:47s
epoch 2  | loss: 0.25197 | eval_logloss: 0.74863 |  0:01:13s
epoch 3  | loss: 0.22699 | eval_logloss: 1.88177 |  0:01:37s
epoch 4  | loss: 0.23745 | eval_logloss: 0.97193 |  0:02:01s
epoch 5  | loss: 0.23823 | eval_logloss: 0.96236 |  0:02:25s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.74863




Validation Accuracy: 0.8955


# Task ID36: 10093

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=10093)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.97136 | eval_logloss: 4.34792 |  0:00:13s
epoch 1  | loss: 0.43185 | eval_logloss: 3.86977 |  0:00:25s
epoch 2  | loss: 0.36574 | eval_logloss: 4.06272 |  0:00:38s
epoch 3  | loss: 0.32282 | eval_logloss: 3.34202 |  0:00:52s
epoch 4  | loss: 0.29568 | eval_logloss: 2.94016 |  0:01:05s
epoch 5  | loss: 0.26915 | eval_logloss: 5.25189 |  0:01:18s
epoch 6  | loss: 0.29462 | eval_logloss: 2.68352 |  0:01:31s
epoch 7  | loss: 0.23989 | eval_logloss: 1.66723 |  0:01:44s
epoch 8  | loss: 0.23303 | eval_logloss: 1.09113 |  0:01:57s
epoch 9  | loss: 0.17572 | eval_logloss: 0.81969 |  0:02:10s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.81969




Validation Accuracy: 0.9164


# Task ID37: 219

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=219)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.55812 | eval_logloss: 1.24437 |  0:02:12s
epoch 1  | loss: 0.50988 | eval_logloss: 1.71687 |  0:04:11s
epoch 2  | loss: 0.51261 | eval_logloss: 0.64444 |  0:06:08s
epoch 3  | loss: 0.50404 | eval_logloss: 0.51315 |  0:08:07s
epoch 4  | loss: 0.50175 | eval_logloss: 1.15444 |  0:10:07s
epoch 5  | loss: 0.49845 | eval_logloss: 0.54822 |  0:12:06s
epoch 6  | loss: 0.50422 | eval_logloss: 1.59942 |  0:14:03s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 0.51315




Validation Accuracy: 0.7526


# Task ID38: 9976

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9976)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.35332 | eval_logloss: 7.78724 |  0:00:09s
epoch 1  | loss: 0.99687 | eval_logloss: 8.04319 |  0:00:19s
epoch 2  | loss: 0.797   | eval_logloss: 8.06317 |  0:00:27s
epoch 3  | loss: 0.72739 | eval_logloss: 7.84856 |  0:00:36s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 7.78724




Validation Accuracy: 0.5115


# Task ID39: 6

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=6)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.52516 | eval_logloss: 13.27656|  0:00:51s
epoch 1  | loss: 1.43021 | eval_logloss: 10.25438|  0:01:43s
epoch 2  | loss: 1.20465 | eval_logloss: 5.88078 |  0:02:35s
epoch 3  | loss: 1.10063 | eval_logloss: 4.37772 |  0:03:26s
epoch 4  | loss: 1.02321 | eval_logloss: 5.01252 |  0:04:19s
epoch 5  | loss: 0.97172 | eval_logloss: 2.61749 |  0:05:11s
epoch 6  | loss: 0.94548 | eval_logloss: 2.53675 |  0:06:04s
epoch 7  | loss: 0.89046 | eval_logloss: 2.87962 |  0:06:56s
epoch 8  | loss: 0.89461 | eval_logloss: 3.54118 |  0:07:48s
epoch 9  | loss: 0.85949 | eval_logloss: 2.82158 |  0:08:41s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 2.53675




Validation Accuracy: 0.5430


# Task ID40: 53

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=53)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.35915 | eval_logloss: 11.53478|  0:00:02s
epoch 1  | loss: 2.02485 | eval_logloss: 11.53478|  0:00:04s
epoch 2  | loss: 2.17864 | eval_logloss: 12.19124|  0:00:07s
epoch 3  | loss: 1.42329 | eval_logloss: 12.00368|  0:00:10s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 11.53478




Validation Accuracy: 0.2765


# Task ID41: 11

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=11)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.14849 | eval_logloss: 9.31035 |  0:00:01s
epoch 1  | loss: 1.11661 | eval_logloss: 7.77988 |  0:00:04s
epoch 2  | loss: 0.83861 | eval_logloss: 6.76513 |  0:00:06s
epoch 3  | loss: 0.6374  | eval_logloss: 4.97402 |  0:00:07s
epoch 4  | loss: 0.54761 | eval_logloss: 4.71895 |  0:00:09s
epoch 5  | loss: 0.46219 | eval_logloss: 9.18281 |  0:00:11s
epoch 6  | loss: 0.47785 | eval_logloss: 8.29004 |  0:00:12s
epoch 7  | loss: 0.47396 | eval_logloss: 7.26973 |  0:00:14s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 4.71895




Validation Accuracy: 0.7040


# Task ID42: 15

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=15)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.51653 | eval_logloss: 10.93192|  0:00:02s
epoch 1  | loss: 0.41641 | eval_logloss: 8.65444 |  0:00:04s
epoch 2  | loss: 0.32008 | eval_logloss: 10.81805|  0:00:06s
epoch 3  | loss: 0.27131 | eval_logloss: 7.74344 |  0:00:08s
epoch 4  | loss: 0.21542 | eval_logloss: 9.22381 |  0:00:10s
epoch 5  | loss: 0.13686 | eval_logloss: 10.5903 |  0:00:12s
epoch 6  | loss: 0.23555 | eval_logloss: 8.14845 |  0:00:13s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 7.74344




Validation Accuracy: 0.5143


# Task ID43: 16

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=16)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.68524 | eval_logloss: 13.91963|  0:00:06s
epoch 1  | loss: 2.71258 | eval_logloss: 13.45409|  0:00:11s
epoch 2  | loss: 2.07086 | eval_logloss: 13.51117|  0:00:16s
epoch 3  | loss: 1.58317 | eval_logloss: 13.21697|  0:00:23s
epoch 4  | loss: 1.37061 | eval_logloss: 13.72293|  0:00:28s
epoch 5  | loss: 1.15076 | eval_logloss: 12.34968|  0:00:34s
epoch 6  | loss: 0.99943 | eval_logloss: 10.6877 |  0:00:39s
epoch 7  | loss: 0.92738 | eval_logloss: 9.86958 |  0:00:45s
epoch 8  | loss: 0.86861 | eval_logloss: 10.69403|  0:00:51s
epoch 9  | loss: 0.8236  | eval_logloss: 9.75459 |  0:00:56s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 9.75459




Validation Accuracy: 0.3375


# Task ID44: 14

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.63054 | eval_logloss: 14.54598|  0:00:05s
epoch 1  | loss: 2.79112 | eval_logloss: 13.8594 |  0:00:10s
epoch 2  | loss: 2.19693 | eval_logloss: 14.39966|  0:00:15s
epoch 3  | loss: 1.57906 | eval_logloss: 14.03264|  0:00:21s
epoch 4  | loss: 1.32264 | eval_logloss: 13.58695|  0:00:26s
epoch 5  | loss: 1.18568 | eval_logloss: 13.43258|  0:00:32s
epoch 6  | loss: 0.96883 | eval_logloss: 9.66555 |  0:00:37s
epoch 7  | loss: 0.92166 | eval_logloss: 9.94002 |  0:00:43s
epoch 8  | loss: 0.92694 | eval_logloss: 10.09771|  0:00:48s
epoch 9  | loss: 0.8296  | eval_logloss: 8.79039 |  0:00:54s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 8.79039




Validation Accuracy: 0.3400


# Task ID45: 32

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=32)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.62185 | eval_logloss: 12.88854|  0:00:28s
epoch 1  | loss: 0.65008 | eval_logloss: 11.81195|  0:00:57s
epoch 2  | loss: 0.46543 | eval_logloss: 7.9902  |  0:01:25s
epoch 3  | loss: 0.36477 | eval_logloss: 4.86628 |  0:01:55s
epoch 4  | loss: 0.28835 | eval_logloss: 4.84269 |  0:02:24s
epoch 5  | loss: 0.28201 | eval_logloss: 3.60895 |  0:02:53s
epoch 6  | loss: 0.23222 | eval_logloss: 3.13824 |  0:03:22s
epoch 7  | loss: 0.21231 | eval_logloss: 2.6245  |  0:03:51s
epoch 8  | loss: 0.18237 | eval_logloss: 1.74174 |  0:04:19s
epoch 9  | loss: 0.16706 | eval_logloss: 2.71475 |  0:04:47s
Stop training because you reached max_epochs = 10 with best_epoch = 8 and best_eval_logloss = 1.74174




Validation Accuracy: 0.7563


# Task ID46: 3549

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3549)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.10331 | eval_logloss: 11.98037|  0:00:02s
epoch 1  | loss: 2.22585 | eval_logloss: 9.8107  |  0:00:04s
epoch 2  | loss: 1.68331 | eval_logloss: 8.96228 |  0:00:06s
epoch 3  | loss: 1.15805 | eval_logloss: 10.3767 |  0:00:08s
epoch 4  | loss: 1.02533 | eval_logloss: 9.91327 |  0:00:11s
epoch 5  | loss: 0.8891  | eval_logloss: 9.2447  |  0:00:13s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 8.96228




Validation Accuracy: 0.4379


# Task ID47: 12

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=12)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.78448 | eval_logloss: 13.0329 |  0:00:16s
epoch 1  | loss: 2.02954 | eval_logloss: 13.48574|  0:00:28s
epoch 2  | loss: 1.35196 | eval_logloss: 11.50661|  0:00:37s
epoch 3  | loss: 1.13319 | eval_logloss: 11.61802|  0:00:48s
epoch 4  | loss: 1.09804 | eval_logloss: 11.83884|  0:00:58s
epoch 5  | loss: 1.00785 | eval_logloss: 8.07718 |  0:01:09s
epoch 6  | loss: 0.90997 | eval_logloss: 7.63674 |  0:01:19s
epoch 7  | loss: 0.83216 | eval_logloss: 8.50177 |  0:01:29s
epoch 8  | loss: 0.78182 | eval_logloss: 5.64768 |  0:01:40s
epoch 9  | loss: 0.68053 | eval_logloss: 4.96465 |  0:01:50s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 4.96465




Validation Accuracy: 0.4800


# Task ID48: 9981

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9981)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 4.18252 | eval_logloss: 14.61385|  0:00:13s
epoch 1  | loss: 3.98198 | eval_logloss: 13.72817|  0:00:24s
epoch 2  | loss: 2.66812 | eval_logloss: 14.61564|  0:00:30s
epoch 3  | loss: 2.2368  | eval_logloss: 13.7897 |  0:00:38s
epoch 4  | loss: 1.97772 | eval_logloss: 10.91763|  0:00:45s
epoch 5  | loss: 1.70386 | eval_logloss: 8.8964  |  0:00:53s
epoch 6  | loss: 1.5747  | eval_logloss: 9.02813 |  0:01:00s
epoch 7  | loss: 1.36443 | eval_logloss: 6.73812 |  0:01:08s
epoch 8  | loss: 1.17264 | eval_logloss: 8.93185 |  0:01:15s
epoch 9  | loss: 1.16976 | eval_logloss: 4.89783 |  0:01:22s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 4.89783




Validation Accuracy: 0.5324


# Task ID49: 18

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=18)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.21757 | eval_logloss: 12.87348|  0:00:08s
epoch 1  | loss: 1.34322 | eval_logloss: 14.82642|  0:00:19s
epoch 2  | loss: 1.0787  | eval_logloss: 12.65845|  0:00:29s
epoch 3  | loss: 1.02473 | eval_logloss: 9.31076 |  0:00:38s
epoch 4  | loss: 0.94744 | eval_logloss: 13.22309|  0:00:47s
epoch 5  | loss: 0.91763 | eval_logloss: 11.69855|  0:00:57s
epoch 6  | loss: 0.91225 | eval_logloss: 12.40859|  0:01:07s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 9.31076




Validation Accuracy: 0.3725


# Task ID50: 28

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=28)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.51465 | eval_logloss: 10.40119|  0:00:28s
epoch 1  | loss: 1.215   | eval_logloss: 9.918   |  0:00:56s
epoch 2  | loss: 0.86968 | eval_logloss: 8.05036 |  0:01:23s
epoch 3  | loss: 0.74891 | eval_logloss: 5.88487 |  0:01:51s
epoch 4  | loss: 0.62198 | eval_logloss: 2.33301 |  0:02:19s
epoch 5  | loss: 0.57125 | eval_logloss: 2.11888 |  0:02:47s
epoch 6  | loss: 0.49438 | eval_logloss: 3.0191  |  0:03:15s
epoch 7  | loss: 0.44801 | eval_logloss: 2.85334 |  0:03:43s
epoch 8  | loss: 0.4076  | eval_logloss: 0.75577 |  0:04:10s
epoch 9  | loss: 0.36435 | eval_logloss: 0.71362 |  0:04:38s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.71362




Validation Accuracy: 0.8025


# Task ID51: 2074

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=2074)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.51129 | eval_logloss: 12.15818|  0:00:32s
epoch 1  | loss: 0.75999 | eval_logloss: 6.77683 |  0:01:03s
epoch 2  | loss: 0.68029 | eval_logloss: 4.79691 |  0:01:33s
epoch 3  | loss: 0.66508 | eval_logloss: 5.62337 |  0:02:05s
epoch 4  | loss: 0.6356  | eval_logloss: 1.38191 |  0:02:37s
epoch 5  | loss: 0.58669 | eval_logloss: 1.71945 |  0:03:08s
epoch 6  | loss: 0.53544 | eval_logloss: 1.31969 |  0:03:39s
epoch 7  | loss: 0.54407 | eval_logloss: 0.66251 |  0:04:11s
epoch 8  | loss: 0.53615 | eval_logloss: 0.71303 |  0:04:41s
epoch 9  | loss: 0.54719 | eval_logloss: 1.65011 |  0:05:12s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.66251




Validation Accuracy: 0.7893


# Task ID52: 29

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=29)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.48005 | eval_logloss: 8.08672 |  0:00:04s
epoch 1  | loss: 1.37652 | eval_logloss: 6.00728 |  0:00:07s
epoch 2  | loss: 0.99528 | eval_logloss: 7.62462 |  0:00:10s
epoch 3  | loss: 0.86614 | eval_logloss: 7.77241 |  0:00:13s
epoch 4  | loss: 0.6973  | eval_logloss: 8.59213 |  0:00:17s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 6.00728




Validation Accuracy: 0.6232


# Task ID53: 45

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=45)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.10271 | eval_logloss: 11.46952|  0:00:15s
epoch 1  | loss: 1.10271 | eval_logloss: 7.97119 |  0:00:32s
epoch 2  | loss: 1.10271 | eval_logloss: 11.46952|  0:00:49s
epoch 3  | loss: 1.10271 | eval_logloss: 7.97119 |  0:01:06s
epoch 4  | loss: 1.10271 | eval_logloss: 7.97119 |  0:01:23s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 7.97119




Validation Accuracy: 0.5000


# Task ID54: 125922

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=125922)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.25871 | eval_logloss: 13.14887|  0:00:25s
epoch 1  | loss: 1.11513 | eval_logloss: 9.44277 |  0:00:52s
epoch 2  | loss: 0.94684 | eval_logloss: 5.33629 |  0:01:17s
epoch 3  | loss: 0.71211 | eval_logloss: 5.09027 |  0:01:44s
epoch 4  | loss: 0.64502 | eval_logloss: 6.74302 |  0:02:10s
epoch 5  | loss: 0.63858 | eval_logloss: 6.14965 |  0:02:37s
epoch 6  | loss: 0.58242 | eval_logloss: 3.33172 |  0:03:05s
epoch 7  | loss: 0.52517 | eval_logloss: 2.09625 |  0:03:32s
epoch 8  | loss: 0.48328 | eval_logloss: 1.80142 |  0:04:00s
epoch 9  | loss: 0.46713 | eval_logloss: 1.96301 |  0:04:26s
Stop training because you reached max_epochs = 10 with best_epoch = 8 and best_eval_logloss = 1.80142




Validation Accuracy: 0.7155


# Task ID55: 9960

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9960)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.54984 | eval_logloss: 9.47903 |  0:00:26s
epoch 1  | loss: 0.82261 | eval_logloss: 9.10091 |  0:00:52s
epoch 2  | loss: 0.65935 | eval_logloss: 8.04586 |  0:01:20s
epoch 3  | loss: 0.56649 | eval_logloss: 7.88895 |  0:01:47s
epoch 4  | loss: 0.5372  | eval_logloss: 6.3676  |  0:02:14s
epoch 5  | loss: 0.52447 | eval_logloss: 4.94183 |  0:02:41s
epoch 6  | loss: 0.53178 | eval_logloss: 3.8235  |  0:03:08s
epoch 7  | loss: 0.48892 | eval_logloss: 2.3727  |  0:03:36s
epoch 8  | loss: 0.49948 | eval_logloss: 3.48711 |  0:04:02s
epoch 9  | loss: 0.48963 | eval_logloss: 3.17181 |  0:04:30s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 2.3727




Validation Accuracy: 0.6081


# Task ID56: 9964

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9964)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.68528 | eval_logloss: 14.05106|  0:00:10s
epoch 1  | loss: 2.98703 | eval_logloss: 14.24641|  0:00:19s
epoch 2  | loss: 2.43497 | eval_logloss: 14.04329|  0:00:26s
epoch 3  | loss: 2.15597 | eval_logloss: 14.29339|  0:00:34s
epoch 4  | loss: 1.93001 | eval_logloss: 13.65193|  0:00:38s
epoch 5  | loss: 1.58914 | eval_logloss: 13.45126|  0:00:43s
epoch 6  | loss: 1.50048 | eval_logloss: 12.56901|  0:00:48s
epoch 7  | loss: 1.27909 | eval_logloss: 11.61492|  0:00:53s
epoch 8  | loss: 1.21407 | eval_logloss: 10.66038|  0:00:58s
epoch 9  | loss: 1.15783 | eval_logloss: 8.9243  |  0:01:03s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 8.9243




Validation Accuracy: 0.4263


# Task ID57: 22

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=22)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.62943 | eval_logloss: 14.41203|  0:00:05s
epoch 1  | loss: 2.63318 | eval_logloss: 14.388  |  0:00:10s
epoch 2  | loss: 2.01606 | eval_logloss: 12.73724|  0:00:15s
epoch 3  | loss: 1.66916 | eval_logloss: 12.03187|  0:00:21s
epoch 4  | loss: 1.40582 | eval_logloss: 12.62031|  0:00:26s
epoch 5  | loss: 1.29474 | eval_logloss: 9.84051 |  0:00:32s
epoch 6  | loss: 1.17072 | eval_logloss: 10.6358 |  0:00:37s
epoch 7  | loss: 1.08807 | eval_logloss: 7.63036 |  0:00:42s
epoch 8  | loss: 1.01745 | eval_logloss: 9.25853 |  0:00:48s
epoch 9  | loss: 1.00206 | eval_logloss: 8.77457 |  0:00:53s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 7.63036




Validation Accuracy: 0.4625


# Task ID58: 2079

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=2079)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.33494 | eval_logloss: 12.81854|  0:00:05s
epoch 1  | loss: 2.76566 | eval_logloss: 13.57257|  0:00:12s
epoch 2  | loss: 1.84611 | eval_logloss: 13.62319|  0:00:16s
epoch 3  | loss: 1.27614 | eval_logloss: 12.17223|  0:00:21s
epoch 4  | loss: 1.24854 | eval_logloss: 10.01785|  0:00:28s
epoch 5  | loss: 1.16541 | eval_logloss: 12.5888 |  0:00:32s
epoch 6  | loss: 1.20899 | eval_logloss: 12.61539|  0:00:35s
epoch 7  | loss: 1.04628 | eval_logloss: 12.37821|  0:00:39s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 10.01785




Validation Accuracy: 0.3716


# Task ID59: 14969

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14969)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.78831 | eval_logloss: 9.95755 |  0:00:49s
epoch 1  | loss: 1.33728 | eval_logloss: 1.69743 |  0:01:38s
epoch 2  | loss: 1.31625 | eval_logloss: 2.08021 |  0:02:27s
epoch 3  | loss: 1.32187 | eval_logloss: 3.15533 |  0:03:17s
epoch 4  | loss: 1.31574 | eval_logloss: 1.50279 |  0:04:08s
epoch 5  | loss: 1.29438 | eval_logloss: 1.73998 |  0:04:57s
epoch 6  | loss: 1.31121 | eval_logloss: 3.35892 |  0:05:47s
epoch 7  | loss: 1.29905 | eval_logloss: 3.95843 |  0:06:35s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 1.50279




Validation Accuracy: 0.4203


# Task ID60: 3560

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3560)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.45934 | eval_logloss: 13.67008|  0:00:04s
epoch 1  | loss: 2.43579 | eval_logloss: 11.94223|  0:00:08s
epoch 2  | loss: 1.89646 | eval_logloss: 13.75031|  0:00:12s
epoch 3  | loss: 1.84084 | eval_logloss: 13.39133|  0:00:16s
epoch 4  | loss: 1.79361 | eval_logloss: 12.15877|  0:00:20s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 11.94223




Validation Accuracy: 0.2500


# Task ID61: 14952

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14952)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.60541 | eval_logloss: 2.28642 |  0:00:54s
epoch 1  | loss: 0.27768 | eval_logloss: 1.92621 |  0:01:50s
epoch 2  | loss: 0.28006 | eval_logloss: 0.48772 |  0:02:46s
epoch 3  | loss: 0.25894 | eval_logloss: 0.27911 |  0:03:41s
epoch 4  | loss: 0.22448 | eval_logloss: 0.42473 |  0:04:36s
epoch 5  | loss: 0.20491 | eval_logloss: 0.89658 |  0:05:30s
epoch 6  | loss: 0.18057 | eval_logloss: 1.24414 |  0:06:25s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 0.27911




Validation Accuracy: 0.8960


# Task ID62: 125920

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 6  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=125920)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.31841 | eval_logloss: 8.44946 |  0:00:10s
epoch 1  | loss: 1.1383  | eval_logloss: 7.77588 |  0:00:19s
epoch 2  | loss: 0.74923 | eval_logloss: 9.24658 |  0:00:29s
epoch 3  | loss: 0.70811 | eval_logloss: 4.90499 |  0:00:41s
epoch 4  | loss: 0.69134 | eval_logloss: 7.42831 |  0:00:49s
epoch 5  | loss: 0.71446 | eval_logloss: 2.69128 |  0:00:59s
epoch 6  | loss: 0.71875 | eval_logloss: 3.57783 |  0:01:05s
epoch 7  | loss: 0.69537 | eval_logloss: 4.63117 |  0:01:12s
epoch 8  | loss: 0.70034 | eval_logloss: 0.9218  |  0:01:18s
epoch 9  | loss: 0.69606 | eval_logloss: 0.73823 |  0:01:25s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.73823




Validation Accuracy: 0.4200


# Task ID63: 23

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=23)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.3021  | eval_logloss: 9.88968 |  0:00:04s
epoch 1  | loss: 1.4079  | eval_logloss: 9.56543 |  0:00:08s
epoch 2  | loss: 1.11022 | eval_logloss: 9.85276 |  0:00:11s
epoch 3  | loss: 1.06278 | eval_logloss: 9.04951 |  0:00:15s
epoch 4  | loss: 1.01892 | eval_logloss: 9.56996 |  0:00:22s
epoch 5  | loss: 1.04256 | eval_logloss: 9.00102 |  0:00:27s
epoch 6  | loss: 0.99035 | eval_logloss: 7.67257 |  0:00:35s
epoch 7  | loss: 0.98769 | eval_logloss: 8.34191 |  0:00:40s
epoch 8  | loss: 0.99289 | eval_logloss: 7.85675 |  0:00:49s
epoch 9  | loss: 0.98539 | eval_logloss: 8.16619 |  0:00:52s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 7.67257




Validation Accuracy: 0.4746


# Task ID64: 3904

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3904)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69085 | eval_logloss: 3.02211 |  0:00:29s
epoch 1  | loss: 0.47165 | eval_logloss: 0.64944 |  0:00:57s
epoch 2  | loss: 0.47162 | eval_logloss: 1.28842 |  0:01:26s
epoch 3  | loss: 0.47037 | eval_logloss: 0.85295 |  0:01:55s
epoch 4  | loss: 0.46402 | eval_logloss: 0.47787 |  0:02:25s
epoch 5  | loss: 0.46248 | eval_logloss: 0.47846 |  0:02:52s
epoch 6  | loss: 0.46463 | eval_logloss: 0.49844 |  0:03:21s
epoch 7  | loss: 0.46658 | eval_logloss: 0.55034 |  0:03:50s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.47787




Validation Accuracy: 0.8075


# Task ID65: 3022

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3022)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.83868 | eval_logloss: 14.33204|  0:00:02s
epoch 1  | loss: 3.06355 | eval_logloss: 13.44636|  0:00:05s
epoch 2  | loss: 2.74071 | eval_logloss: 14.25153|  0:00:07s
epoch 3  | loss: 2.4456  | eval_logloss: 13.26899|  0:00:10s
epoch 4  | loss: 2.03694 | eval_logloss: 13.60739|  0:00:13s
epoch 5  | loss: 1.82103 | eval_logloss: 12.95499|  0:00:16s
epoch 6  | loss: 1.62725 | eval_logloss: 12.83425|  0:00:18s
epoch 7  | loss: 1.4455  | eval_logloss: 13.4795 |  0:00:20s
epoch 8  | loss: 1.34725 | eval_logloss: 13.1243 |  0:00:23s
epoch 9  | loss: 1.39547 | eval_logloss: 12.96336|  0:00:25s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 12.83425




Validation Accuracy: 0.1919


# Task ID66: 9985

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9985)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.42217 | eval_logloss: 10.8249 |  0:00:16s
epoch 1  | loss: 1.60615 | eval_logloss: 10.70007|  0:00:32s
epoch 2  | loss: 1.53296 | eval_logloss: 10.67791|  0:00:49s
epoch 3  | loss: 1.50847 | eval_logloss: 7.18164 |  0:01:06s
epoch 4  | loss: 1.49401 | eval_logloss: 3.21664 |  0:01:22s
epoch 5  | loss: 1.48047 | eval_logloss: 3.2488  |  0:01:38s
epoch 6  | loss: 1.49348 | eval_logloss: 4.74702 |  0:01:53s
epoch 7  | loss: 1.49008 | eval_logloss: 5.15324 |  0:02:09s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 3.21664




Validation Accuracy: 0.3595


# Task ID67: 9910

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9910)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.29178 | eval_logloss: 9.00076 |  0:00:25s
epoch 1  | loss: 0.89262 | eval_logloss: 9.06445 |  0:00:51s
epoch 2  | loss: 0.71167 | eval_logloss: 5.5455  |  0:01:18s
epoch 3  | loss: 0.70148 | eval_logloss: 5.43918 |  0:01:43s
epoch 4  | loss: 0.68886 | eval_logloss: 2.96545 |  0:02:08s
epoch 5  | loss: 0.68351 | eval_logloss: 1.76203 |  0:02:35s
epoch 6  | loss: 0.66777 | eval_logloss: 1.22934 |  0:03:00s
epoch 7  | loss: 0.65735 | eval_logloss: 1.32232 |  0:03:26s
epoch 8  | loss: 0.62609 | eval_logloss: 3.20282 |  0:03:52s
epoch 9  | loss: 0.58995 | eval_logloss: 3.42794 |  0:04:19s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 1.22934




Validation Accuracy: 0.5593


# Task ID68: 14970

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14970)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.64427 | eval_logloss: 11.77286|  0:00:37s
epoch 1  | loss: 0.89124 | eval_logloss: 9.11257 |  0:01:13s
epoch 2  | loss: 0.45323 | eval_logloss: 3.97269 |  0:01:49s
epoch 3  | loss: 0.2724  | eval_logloss: 2.01379 |  0:02:27s
epoch 4  | loss: 0.1953  | eval_logloss: 3.0766  |  0:03:04s
epoch 5  | loss: 0.19732 | eval_logloss: 4.46687 |  0:03:40s
epoch 6  | loss: 0.13282 | eval_logloss: 6.04901 |  0:04:17s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 2.01379




Validation Accuracy: 0.7568


# Task ID69: 3021

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 64  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=3021)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.39779 | eval_logloss: 0.9362  |  0:00:08s
epoch 1  | loss: 0.30459 | eval_logloss: 2.02867 |  0:00:18s
epoch 2  | loss: 0.17925 | eval_logloss: 0.95467 |  0:00:32s
epoch 3  | loss: 0.12473 | eval_logloss: 1.06857 |  0:00:39s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 0.9362




Validation Accuracy: 0.9404


# Task ID70: 3481

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3481)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.64021 | eval_logloss: 15.17622|  0:00:33s
epoch 1  | loss: 1.94732 | eval_logloss: 14.67334|  0:01:25s
epoch 2  | loss: 1.36603 | eval_logloss: 12.75947|  0:01:56s
epoch 3  | loss: 1.0548  | eval_logloss: 10.73966|  0:02:24s
epoch 4  | loss: 0.82436 | eval_logloss: 10.94935|  0:02:53s
epoch 5  | loss: 0.77931 | eval_logloss: 9.17639 |  0:03:21s
epoch 6  | loss: 0.6501  | eval_logloss: 6.36518 |  0:03:49s
epoch 7  | loss: 0.58232 | eval_logloss: 6.5604  |  0:04:18s
epoch 8  | loss: 0.52974 | eval_logloss: 8.6821  |  0:04:47s
epoch 9  | loss: 0.47891 | eval_logloss: 7.94506 |  0:05:16s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 6.36518




Validation Accuracy: 0.3782


# Task ID71: 3573

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3573)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')


# Task ID72: 146824

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146824)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.47736 | eval_logloss: 13.63074|  0:00:06s
epoch 1  | loss: 2.97089 | eval_logloss: 13.98944|  0:00:12s
epoch 2  | loss: 2.20004 | eval_logloss: 13.34332|  0:00:18s
epoch 3  | loss: 1.59475 | eval_logloss: 11.96605|  0:00:24s
epoch 4  | loss: 1.31504 | eval_logloss: 10.83907|  0:00:29s
epoch 5  | loss: 0.97346 | eval_logloss: 10.56946|  0:00:35s
epoch 6  | loss: 0.83513 | eval_logloss: 8.05052 |  0:00:41s
epoch 7  | loss: 0.72657 | eval_logloss: 9.93368 |  0:00:46s
epoch 8  | loss: 0.64297 | eval_logloss: 9.11678 |  0:00:53s
epoch 9  | loss: 0.58012 | eval_logloss: 5.32921 |  0:00:58s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 5.32921




Validation Accuracy: 0.6300


# Task ID73: 146820

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146820)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.25688 | eval_logloss: 0.85641 |  0:00:12s
epoch 1  | loss: 0.14395 | eval_logloss: 1.88568 |  0:00:24s
epoch 2  | loss: 0.13104 | eval_logloss: 0.80956 |  0:00:36s
epoch 3  | loss: 0.13393 | eval_logloss: 1.69857 |  0:00:48s
epoch 4  | loss: 0.11677 | eval_logloss: 1.0874  |  0:01:00s
epoch 5  | loss: 0.07932 | eval_logloss: 2.48067 |  0:01:13s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.80956




Validation Accuracy: 0.9432


# Task ID74: 146822

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146822)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.11134 | eval_logloss: 11.90503|  0:00:05s
epoch 1  | loss: 1.22501 | eval_logloss: 13.21631|  0:00:12s
epoch 2  | loss: 1.03007 | eval_logloss: 11.3184 |  0:00:17s
epoch 3  | loss: 0.82236 | eval_logloss: 11.73652|  0:00:24s
epoch 4  | loss: 0.83585 | eval_logloss: 11.02817|  0:00:30s
epoch 5  | loss: 0.69213 | eval_logloss: 11.18421|  0:00:36s
epoch 6  | loss: 0.6192  | eval_logloss: 10.03292|  0:00:42s
epoch 7  | loss: 0.66025 | eval_logloss: 9.87935 |  0:00:47s
epoch 8  | loss: 0.60797 | eval_logloss: 10.61304|  0:00:54s
epoch 9  | loss: 0.52615 | eval_logloss: 9.25311 |  0:00:59s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 9.25311




Validation Accuracy: 0.3831


# Task ID75: 146195

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 64  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=146195)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.88735 | eval_logloss: 1.85102 |  0:01:56s
epoch 1  | loss: 0.83127 | eval_logloss: 1.09154 |  0:03:34s
epoch 2  | loss: 0.79927 | eval_logloss: 0.96378 |  0:05:13s
epoch 3  | loss: 0.79278 | eval_logloss: 1.04925 |  0:06:52s
epoch 4  | loss: 0.79345 | eval_logloss: 0.8551  |  0:08:32s
epoch 5  | loss: 0.75154 | eval_logloss: 0.92834 |  0:10:11s
epoch 6  | loss: 0.71754 | eval_logloss: 1.10864 |  0:11:51s
epoch 7  | loss: 0.7025  | eval_logloss: 0.80745 |  0:13:31s
epoch 8  | loss: 0.69249 | eval_logloss: 0.79891 |  0:15:08s
epoch 9  | loss: 0.66537 | eval_logloss: 0.73321 |  0:16:47s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.73321




Validation Accuracy: 0.7370


# Task ID76: 146800

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 16  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=146800)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.64069 | eval_logloss: 13.72817|  0:00:08s
epoch 1  | loss: 3.06553 | eval_logloss: 13.65436|  0:00:18s
epoch 2  | loss: 2.12764 | eval_logloss: 14.17106|  0:00:25s
epoch 3  | loss: 1.75403 | eval_logloss: 12.99573|  0:00:36s
epoch 4  | loss: 1.51718 | eval_logloss: 13.03454|  0:00:45s
epoch 5  | loss: 1.49023 | eval_logloss: 12.67445|  0:00:54s
epoch 6  | loss: 1.43927 | eval_logloss: 10.23553|  0:01:02s
epoch 7  | loss: 1.3487  | eval_logloss: 12.07331|  0:01:07s
epoch 8  | loss: 1.31162 | eval_logloss: 10.41501|  0:01:12s
epoch 9  | loss: 1.23692 | eval_logloss: 10.73824|  0:01:17s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 10.23553




Validation Accuracy: 0.2731


# Task ID77: 146817

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146817)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.68165 | eval_logloss: 11.99211|  0:00:08s
epoch 1  | loss: 1.74952 | eval_logloss: 12.22625|  0:00:18s
epoch 2  | loss: 1.43355 | eval_logloss: 11.65258|  0:00:24s
epoch 3  | loss: 1.29062 | eval_logloss: 10.65536|  0:00:30s
epoch 4  | loss: 1.18507 | eval_logloss: 9.73483 |  0:00:34s
epoch 5  | loss: 1.14264 | eval_logloss: 10.2273 |  0:00:39s
epoch 6  | loss: 1.09982 | eval_logloss: 9.07718 |  0:00:45s
epoch 7  | loss: 1.14554 | eval_logloss: 9.05169 |  0:00:49s
epoch 8  | loss: 1.06585 | eval_logloss: 11.81899|  0:00:55s
epoch 9  | loss: 1.07118 | eval_logloss: 8.62176 |  0:01:00s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 8.62176




Validation Accuracy: 0.3445


# Task ID78: 146819

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146819)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.78554 | eval_logloss: 2.65706 |  0:00:01s
epoch 1  | loss: 1.10755 | eval_logloss: 7.23312 |  0:00:03s
epoch 2  | loss: 0.82675 | eval_logloss: 1.62376 |  0:00:05s
epoch 3  | loss: 0.47711 | eval_logloss: 2.65706 |  0:00:06s
epoch 4  | loss: 0.60157 | eval_logloss: 2.93957 |  0:00:07s
epoch 5  | loss: 0.36914 | eval_logloss: 1.62376 |  0:00:09s
epoch 6  | loss: 0.36235 | eval_logloss: 2.25002 |  0:00:10s
epoch 7  | loss: 0.27615 | eval_logloss: 4.65629 |  0:00:11s
epoch 8  | loss: 0.29527 | eval_logloss: 7.1624  |  0:00:13s

Early stopping occurred at epoch 8 with best_epoch = 5 and best_eval_logloss = 1.62376




Validation Accuracy: 0.8981


# Task ID79: 146821

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146821)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.84776 | eval_logloss: 11.79552|  0:00:04s
epoch 1  | loss: 1.05541 | eval_logloss: 8.20157 |  0:00:08s
epoch 2  | loss: 0.85181 | eval_logloss: 5.11446 |  0:00:13s
epoch 3  | loss: 0.75032 | eval_logloss: 5.11446 |  0:00:17s
epoch 4  | loss: 0.80587 | eval_logloss: 5.11446 |  0:00:22s
epoch 5  | loss: 0.79653 | eval_logloss: 5.11446 |  0:00:27s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 5.11446




Validation Accuracy: 0.6792


# Task ID80: 14954

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14954)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.13078 | eval_logloss: 9.44734 |  0:00:01s
epoch 1  | loss: 1.13367 | eval_logloss: 7.97119 |  0:00:02s
epoch 2  | loss: 1.37395 | eval_logloss: 7.4926  |  0:00:04s
epoch 3  | loss: 0.83519 | eval_logloss: 6.79028 |  0:00:06s
epoch 4  | loss: 0.70683 | eval_logloss: 7.0855  |  0:00:08s
epoch 5  | loss: 0.77105 | eval_logloss: 6.64266 |  0:00:09s
epoch 6  | loss: 0.72537 | eval_logloss: 7.0855  |  0:00:10s
epoch 7  | loss: 0.63287 | eval_logloss: 8.41404 |  0:00:12s
epoch 8  | loss: 0.67329 | eval_logloss: 7.23312 |  0:00:13s

Early stopping occurred at epoch 8 with best_epoch = 5 and best_eval_logloss = 6.64266




Validation Accuracy: 0.5833


# Task ID81: 167141

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167141)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.761   | eval_logloss: 6.77967 |  0:00:13s
epoch 1  | loss: 0.41278 | eval_logloss: 4.91764 |  0:00:27s
epoch 2  | loss: 0.38843 | eval_logloss: 11.72693|  0:00:43s
epoch 3  | loss: 0.34273 | eval_logloss: 4.4863  |  0:00:56s
epoch 4  | loss: 0.33398 | eval_logloss: 2.83512 |  0:01:10s
epoch 5  | loss: 0.33208 | eval_logloss: 2.75758 |  0:01:23s
epoch 6  | loss: 0.31043 | eval_logloss: 2.91404 |  0:01:36s
epoch 7  | loss: 0.30053 | eval_logloss: 2.2786  |  0:01:49s
epoch 8  | loss: 0.29898 | eval_logloss: 6.11773 |  0:02:03s
epoch 9  | loss: 0.27517 | eval_logloss: 1.32208 |  0:02:17s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 1.32208




Validation Accuracy: 0.7270


# Task ID82: 167140

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167140)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.91516 | eval_logloss: 9.66    |  0:00:09s
epoch 1  | loss: 1.24728 | eval_logloss: 8.02218 |  0:00:18s
epoch 2  | loss: 1.03928 | eval_logloss: 8.29849 |  0:00:27s
epoch 3  | loss: 0.89406 | eval_logloss: 7.43972 |  0:00:37s
epoch 4  | loss: 0.72824 | eval_logloss: 8.38782 |  0:00:45s
epoch 5  | loss: 0.62032 | eval_logloss: 4.78904 |  0:00:55s
epoch 6  | loss: 0.55244 | eval_logloss: 2.50983 |  0:01:04s
epoch 7  | loss: 0.46628 | eval_logloss: 3.89368 |  0:01:13s
epoch 8  | loss: 0.35348 | eval_logloss: 2.46433 |  0:01:22s
epoch 9  | loss: 0.30849 | eval_logloss: 2.10161 |  0:01:31s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 2.10161




Validation Accuracy: 0.8182


# Task ID83: 167125

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167125)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.42006 | eval_logloss: 6.82898 |  0:00:19s
epoch 1  | loss: 0.63548 | eval_logloss: 2.79478 |  0:00:41s
epoch 2  | loss: 0.37057 | eval_logloss: 2.72187 |  0:01:02s
epoch 3  | loss: 0.30258 | eval_logloss: 1.99227 |  0:01:23s
epoch 4  | loss: 0.20639 | eval_logloss: 2.03623 |  0:01:44s
epoch 5  | loss: 0.1757  | eval_logloss: 1.50746 |  0:02:05s
epoch 6  | loss: 0.1877  | eval_logloss: 2.17526 |  0:02:25s
epoch 7  | loss: 0.16209 | eval_logloss: 0.72361 |  0:02:45s
epoch 8  | loss: 0.14063 | eval_logloss: 1.62026 |  0:03:06s
epoch 9  | loss: 0.12295 | eval_logloss: 2.25064 |  0:03:28s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.72361




Validation Accuracy: 0.5960


# Task ID84: 167124

# Task ID85: 167121

In [None]:
pip freeze > requirements.txt