In [None]:
!pip install --no-deps -r requirements.txt

Collecting catboost==1.2.7 (from -r requirements.txt (line 40))
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting dask-expr==1.1.16 (from -r requirements.txt (line 70))
  Downloading dask_expr-1.1.16-py3-none-any.whl.metadata (2.5 kB)
Collecting liac-arff==2.5.0 (from -r requirements.txt (line 245))
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting minio==7.2.12 (from -r requirements.txt (line 265))
  Downloading minio-7.2.12-py3-none-any.whl.metadata (6.5 kB)
Collecting openml==0.15.0 (from -r requirements.txt (line 316))
  Downloading openml-0.15.0-py3-none-any.whl.metadata (9.9 kB)
Collecting pycryptodome==3.21.0 (from -r requirements.txt (line 375))
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting pytorch-ranger==0.1.1 (from -r requirements.txt (line 412))
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl.me

# Baselines: LR, KNN, SVM, DT, RF

In [None]:
import os
import numpy as np
import pandas as pd
import openml
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold


# Set OpenML configuration directory to a temporary location
os.environ["OPENML_CONFIG"] = "/tmp/openml_config"



# Preprocessing

In [None]:
def load_preprocess_task(task_id, task_type="classification", target_encode=None, cat_feature_encode=True):
    """
    Load and preprocess dataset from OpenML based on task type (classification or regression).
    Args:
        task_id (int): OpenML task ID
        task_type (str): Either 'classification' or 'regression'
        target_encode (bool or None): Encode target if classification task (default: True if not regression)
        cat_feature_encode (bool): Whether to one-hot encode categorical features (default: True)
    Returns:
        X_preprocessed (ndarray): Preprocessed feature set
        y (ndarray): Target values (encoded for classification tasks)
    """
    # Load task from OpenML using the task ID
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=task.target_name)

    print(f"Dataset shape: {X.shape}")

    # Infer and apply target encoding based on task type and target_encode flag
    is_regression = (task_type == "regression")
    if (target_encode is None and not is_regression) or target_encode:
        le = LabelEncoder()
        y = le.fit_transform(y)

    # Detect and preprocess categorical features
    categorical_cols = X.select_dtypes(include=['object', 'category']).columns
    numeric_cols = X.select_dtypes(include=['number']).columns

    # Preprocessing pipelines for numeric and categorical features
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())])

    if cat_feature_encode:
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_cols),
                ('cat', categorical_transformer, categorical_cols)])
    else:
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_cols)])

    # Apply transformations
    X_preprocessed = preprocessor.fit_transform(X)

    return X_preprocessed, y



# Cross Validation

In [None]:
def cross_validate_model(model, X, y, task_type="classification", n_folds=10):
    """
    Cross-validate model based on task type (classification or regression).
    Args:
        model: Machine learning model to train and evaluate
        X (ndarray): Feature matrix
        y (ndarray): Target vector
        task_type (str): 'classification' or 'regression'
        n_folds (int): Number of cross-validation folds
    Returns:
        avg_score (float): Average cross-validated score
    """
    if task_type == "classification":
        cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = accuracy_score
    elif task_type == "regression":
        cv = KFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = mean_squared_error
    else:
        raise ValueError("Invalid task type. Use 'classification' or 'regression'.")

    scores = []
    for train_index, test_index in cv.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Calculate accuracy for classification or RMSE for regression
        score = scoring_func(y_test, y_pred)
        if task_type == "regression":
            score = np.sqrt(score)  # RMSE

        scores.append(score)

    avg_score = np.mean(scores)
    metric_name = "Accuracy" if task_type == "classification" else "RMSE"
    print(f"Average {metric_name}: {avg_score:.4f}")

    return avg_score



# Logistic Regression

In [None]:
def linear_model_trial(trial, task_type="classification"):
    if task_type == "classification":
        model = LogisticRegression(n_jobs=-1)
    else:
        raise NotImplementedError("Linear regression not implemented.")
    return model

def run_linear_model(X, y, task_type="classification"):
    model = linear_model_trial(None, task_type)
    scoring = 'accuracy' if task_type == "classification" else 'neg_root_mean_squared_error'
    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")


# KNN

In [None]:
def get_random_knn_parameters(seed, task_type="classification"):
    rs = np.random.RandomState(seed)
    params = {
        "n_neighbors": 1 + 2 * rs.randint(1, 21),
        "knn_alg": rs.choice(["kd_tree", "ball_tree"]),
        "leaf_size": rs.choice([30, 50, 70, 100, 300]),
    }
    return params

def run_knn(X, y, seed=42, task_type="classification"):
    params = get_random_knn_parameters(seed, task_type)
    if task_type == "classification":
        knn = KNeighborsClassifier(n_neighbors=params["n_neighbors"],
                                   algorithm=params["knn_alg"],
                                   leaf_size=params["leaf_size"],
                                   n_jobs=-1)
        scoring = 'accuracy'
    else:
        knn = KNeighborsRegressor(n_neighbors=params["n_neighbors"],
                                  algorithm=params["knn_alg"],
                                  leaf_size=params["leaf_size"],
                                  n_jobs=-1)
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(knn, X, y, cv=10, scoring=scoring)
    print(f"KNN with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")

# SVM

In [None]:
def get_random_svm_parameters(seed: int):
    # Generate random parameters for SVM
    rs = np.random.RandomState(seed)
    params = {"C": np.power(10, rs.uniform(-10, 10))}
    return params


def run_svm(X, y, seed=42, task_type="classification"):
    params = get_random_svm_parameters(seed)

    if task_type == "classification":
        svm_model = SVC(C=params["C"], probability=True)
        scoring = 'accuracy'
    else:
        svm_model = SVR(C=params["C"])
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(svm_model, X, y, cv=10, scoring=scoring)
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")

# Decision Tree

In [None]:
def get_random_decision_tree_parameters(seed, task_type="classification"):
    rs = np.random.RandomState(seed)
    params = {"max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12)))))}
    return params

def run_decision_tree(X, y, seed=42, task_type="classification"):
    params = get_random_decision_tree_parameters(seed, task_type)
    if task_type == "classification":
        dt = DecisionTreeClassifier(max_depth=params["max_depth"], random_state=seed)
        scoring = 'accuracy'
    else:
        dt = DecisionTreeRegressor(max_depth=params["max_depth"], random_state=seed)
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(dt, X, y, cv=10, scoring=scoring)
    print(f"Decision Tree with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")


# Random Forest

In [None]:
def get_random_forest_parameters(seed, task_type="classification"):
    rs = np.random.RandomState(seed)
    params = {
        "max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12))))),
        "n_estimators": int(np.round(np.power(5, rs.uniform(1, np.log2(100) / np.log2(5)))))
    }
    return params

def run_random_forest(X, y, seed=42, task_type="classification"):
    params = get_random_forest_parameters(seed, task_type)
    if task_type == "classification":
        rf = RandomForestClassifier(n_estimators=params["n_estimators"],
                                    max_depth=params["max_depth"],
                                    random_state=seed,
                                    n_jobs=-1)
        scoring = 'accuracy'
    else:
        rf = RandomForestRegressor(n_estimators=params["n_estimators"],
                                   max_depth=params["max_depth"],
                                   random_state=seed,
                                   n_jobs=-1)
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(rf, X, y, cv=10, scoring=scoring)
    print(f"Random Forest with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")

# Task ID1: 14965

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14965)  # Replace with an actual task ID


Dataset shape: (45211, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8142 ± 0.1492


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8398 ± 0.0424


In [None]:
run_svm(X, y, seed=42)


10-fold CV accuracy: 0.7432 ± 0.1386


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8235 ± 0.1123


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8769 ± 0.0159


# Task ID2: 9977

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9977)  # Replace with an actual task ID


Dataset shape: (34465, 118)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.9493


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.9274 ± 0.0276


In [None]:
run_svm(X, y, seed=42)


10-fold CV Accuracy: 0.9408 ± 0.0296


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.8235 ± 0.1123


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.8769 ± 0.0159


# Task ID3: 34539

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(34539)  # Replace with an actual task ID


Dataset shape: (32769, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.9472


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.9460 ± 0.0018


In [None]:
run_svm(X, y, seed=42)


10-fold CV Accuracy: 0.9482 ± 0.0011


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.9437 ± 0.0009


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.9421 ± 0.0001


# Task ID4: 146606

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146606)  # Replace with an actual task ID


Dataset shape: (98050, 28)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.6410


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6434 ± 0.0053


In [None]:
run_svm(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.6586 ± 0.0030


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.6645 ± 0.0038


# Task ID5: 7592



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(7592)  # Replace with an actual task ID


Dataset shape: (48842, 14)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.8516


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.8434 ± 0.0033


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.8432 ± 0.0032


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.8257 ± 0.0040


# Task ID6: 146195



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run classification models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.7571


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6227 ± 0.0459


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.5900 ± 0.1039


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.6601 ± 0.0034


# Task ID7: 167119



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167119)  # Replace with an actual task ID


Dataset shape: (44819, 6)


In [None]:
# Choose and run classification models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6539 ± 0.0666


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7219 ± 0.0975


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6412 ± 0.0753


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7051 ± 0.0600


# Task ID8: 167120



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167120)  # Replace with an actual task ID


Dataset shape: (96320, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.5234


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.5088 ± 0.0045


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.5177 ± 0.0036


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.5193 ± 0.0034


# Task ID10: 	3945



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3945)  # Replace with an actual task ID


Dataset shape: (50000, 230)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.9806


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.9822 ± 0.0000


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.9821 ± 0.0004


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.9822 ± 0.0000


# Task ID11: 	168331



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168331)  # Replace with an actual task ID


Dataset shape: (58310, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.5827


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6704 ± 0.0050


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.4678 ± 0.0045


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.4994 ± 0.0040


# Task ID12: 	168330



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168330)  # Replace with an actual task ID


Dataset shape: (83733, 54)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.6436


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6266 ± 0.0041


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.6169 ± 0.0041


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.6377 ± 0.0032





# Task ID13: 	168335



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168335)  # Replace with an actual task ID


Dataset shape: (130064, 50)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8829 ± 0.0028


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8966 ± 0.0025


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8779 ± 0.0030


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8846 ± 0.0039


# Task ID16: 	146212



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146212)  # Replace with an actual task ID


Dataset shape: (58000, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9665 ± 0.0021


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9981 ± 0.0004


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9979 ± 0.0004


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9971 ± 0.0003


# Task ID19: 	168868



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168868)  # Replace with an actual task ID


Dataset shape: (76000, 170)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9910 ± 0.0014


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9892 ± 0.0010


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9888 ± 0.0011


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9896 ± 0.0008


#  OpenML-CC18 Curated Classification benchmark

# Task ID20: 31

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(31)  # Replace with an actual task ID


Dataset shape: (1000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7500 ± 0.0498


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7320 ± 0.0232


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7080 ± 0.0199


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7190 ± 0.0255


# Task ID21: 10101

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(10101)  # Replace with an actual task ID


Dataset shape: (748, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7687 ± 0.0119


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7301 ± 0.0881


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7488 ± 0.1405


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7674 ± 0.0698


# Task ID22: 	3913

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3913)  # Replace with an actual task ID


Dataset shape: (522, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8406 ± 0.0733


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8216 ± 0.0625


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7851 ± 0.0811


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8272 ± 0.0782


# Task ID23: 3

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3)  # Replace with an actual task ID


Dataset shape: (3196, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9393 ± 0.0471


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7844 ± 0.0701


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9409 ± 0.0568


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9324 ± 0.0642


# Task ID24: 3917

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3917)  # Replace with an actual task ID


Dataset shape: (2109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8521 ± 0.0241


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8379 ± 0.0212


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8312 ± 0.0342


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8492 ± 0.0202


# Task ID25: 9957

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9957)  # Replace with an actual task ID


Dataset shape: (1055, 41)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8606 ± 0.0387


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8454 ± 0.0580


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7809 ± 0.0668


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8170 ± 0.0715


# Task ID26: 9946

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9946)  # Replace with an actual task ID


Dataset shape: (569, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9807 ± 0.0146


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9613 ± 0.0233


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9158 ± 0.0449


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9596 ± 0.0342


# Task ID27: 3918

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3918)  # Replace with an actual task ID


Dataset shape: (1109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9297 ± 0.0184


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9270 ± 0.0102


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9315 ± 0.0128


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9297 ± 0.0053


# Task ID28: 3903

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3903)  # Replace with an actual task ID


Dataset shape: (1563, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9021 ± 0.0112


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8938 ± 0.0109


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8925 ± 0.0080


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8957 ± 0.0042


# Task ID29: 37

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(37)  # Replace with an actual task ID


Dataset shape: (768, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7722 ± 0.0362


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7474 ± 0.0413


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7226 ± 0.0444


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7643 ± 0.0380


# Task ID30: 9971

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9971)  # Replace with an actual task ID


Dataset shape: (583, 10)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7222 ± 0.0191


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6724 ± 0.0427


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6810 ± 0.0272


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7084 ± 0.0337


# Task ID31: 9952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9952)  # Replace with an actual task ID


Dataset shape: (5404, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7513 ± 0.0092


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8618 ± 0.0150


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7966 ± 0.0105


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8168 ± 0.0105


# Task ID32: 3902

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3902)  # Replace with an actual task ID


Dataset shape: (1458, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9095 ± 0.0147


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8937 ± 0.0140


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8944 ± 0.0192


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8923 ± 0.0061


# Task ID33: 49

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(49)  # Replace with an actual task ID


Dataset shape: (958, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9759 ± 0.0492


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8613 ± 0.0866


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6870 ± 0.1060


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7464 ± 0.0594


# Task ID34: 43

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(43)  # Replace with an actual task ID


Dataset shape: (4601, 57)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9170 ± 0.0287


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8935 ± 0.0367


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8907 ± 0.0374


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9178 ± 0.0314


# Task ID35: 9978

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9978)  # Replace with an actual task ID


Dataset shape: (2534, 72)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9357 ± 0.0217


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9369 ± 0.0058


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9037 ± 0.0376


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9317 ± 0.0168


# Task ID36: 10093

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(10093)  # Replace with an actual task ID


Dataset shape: (1372, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9810 ± 0.0099


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9993 ± 0.0022


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9519 ± 0.0199


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9723 ± 0.0112


# Task ID37: 219

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(219)  # Replace with an actual task ID


Dataset shape: (45312, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7427 ± 0.0722


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7250 ± 0.0611


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7227 ± 0.0958


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7346 ± 0.0932


# Task ID38: 9976

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9976)  # Replace with an actual task ID


Dataset shape: (2600, 500)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.5519 ± 0.0359


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5723 ± 0.0309


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7327 ± 0.0382


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6446 ± 0.0194


# Task ID39: 6

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(6)  # Replace with an actual task ID


Dataset shape: (20000, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7728 ± 0.0109


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9352 ± 0.0066


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.2533 ± 0.0039


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6069 ± 0.0110


# Task ID40: 53

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(53)  # Replace with an actual task ID


Dataset shape: (846, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7991 ± 0.0267


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7011 ± 0.0452


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6631 ± 0.0390


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7116 ± 0.0266


# Task ID41: 11

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(11)  # Replace with an actual task ID


Dataset shape: (625, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8497 ± 0.0556


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8369 ± 0.0542


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6932 ± 0.0861


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6800 ± 0.1023


# Task ID42: 15

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(15)  # Replace with an actual task ID


Dataset shape: (699, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9671 ± 0.0300


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9671 ± 0.0293


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9428 ± 0.0325


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9686 ± 0.0246


# Task ID43: 16

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(16)  # Replace with an actual task ID


Dataset shape: (2000, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9490 ± 0.0118


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9420 ± 0.0121


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5880 ± 0.0308


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9135 ± 0.0204


# Task ID44: 14

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14)  # Replace with an actual task ID


Dataset shape: (2000, 76)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8210 ± 0.0143


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8040 ± 0.0219


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5405 ± 0.0430


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7790 ± 0.0258


# Task ID45: 32

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(32)  # Replace with an actual task ID


Dataset shape: (10992, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9503 ± 0.0105


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9881 ± 0.0039


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7196 ± 0.0143


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8659 ± 0.0170


# Task ID46: 3549

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3549)  # Replace with an actual task ID


Dataset shape: (841, 70)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9964 ± 0.0054


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9941 ± 0.0079


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9133 ± 0.0492


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9834 ± 0.0109


# Task ID47: 12

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(12)  # Replace with an actual task ID


Dataset shape: (2000, 216)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9785 ± 0.0078


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9600 ± 0.0105


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5235 ± 0.0166


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9255 ± 0.0175


# Task ID48: 9981

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9981)  # Replace with an actual task ID


Dataset shape: (1080, 856)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9509 ± 0.0227


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8343 ± 0.0322


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4278 ± 0.0301


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8806 ± 0.0232


# Task ID49: 18

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(18)  # Replace with an actual task ID


Dataset shape: (2000, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7360 ± 0.0206


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7170 ± 0.0125


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6355 ± 0.0584


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7040 ± 0.0211


# Task ID50: 28

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(28)  # Replace with an actual task ID


Dataset shape: (5620, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9635 ± 0.0065


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9676 ± 0.0054


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5235 ± 0.0325


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9034 ± 0.0192


# Task ID51: 2074

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(2074)  # Replace with an actual task ID


Dataset shape: (6430, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8575 ± 0.0111


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8994 ± 0.0089


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7876 ± 0.0145


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8375 ± 0.0076


# Task ID52: 29

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(29)  # Replace with an actual task ID


Dataset shape: (690, 15)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8420 ± 0.1574


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8406 ± 0.1126


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8377 ± 0.1391


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8536 ± 0.1334


# Task ID53: 45

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(45)  # Replace with an actual task ID


Dataset shape: (3190, 60)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9411 ± 0.0174


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8379 ± 0.0260


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9210 ± 0.0135


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9088 ± 0.0332


# Task ID54: 125922

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(125922)  # Replace with an actual task ID


Dataset shape: (5500, 40)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9955 ± 0.0017


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9735 ± 0.0068


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5316 ± 0.0084


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8138 ± 0.0123


# Task ID55: 9960

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9960)  # Replace with an actual task ID


Dataset shape: (5456, 24)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6692 ± 0.0426


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7572 ± 0.0626


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9062 ± 0.0403


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9256 ± 0.0394


# Task ID56: 9964

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9964)  # Replace with an actual task ID


Dataset shape: (1593, 256)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9178 ± 0.0191


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9021 ± 0.0295


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5776 ± 0.0403


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8173 ± 0.0345


# Task ID57: 22

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(22)  # Replace with an actual task ID


Dataset shape: (2000, 47)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8245 ± 0.0199


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8040 ± 0.0237


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4935 ± 0.0535


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6935 ± 0.0265


# Task ID58: 2079

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(2079)  # Replace with an actual task ID


Dataset shape: (736, 19)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.4828 ± 0.0909


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.4245 ± 0.1145




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5655 ± 0.0622


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.4557 ± 0.0925


# Task ID59: 14969

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14969)  # Replace with an actual task ID


Dataset shape: (9873, 32)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.4598 ± 0.0498


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.4234 ± 0.0623


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4444 ± 0.0496


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.4620 ± 0.0466


# Task ID60: 3560

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3560)  # Replace with an actual task ID


Dataset shape: (797, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.2034 ± 0.0436


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.1895 ± 0.0194




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.1969 ± 0.0485


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.1997 ± 0.0395


# Task ID61: 14952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14952)  # Replace with an actual task ID


Dataset shape: (11055, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9372 ± 0.0068


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9438 ± 0.0119


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9174 ± 0.0077


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9233 ± 0.0099


# Task ID62: 125920

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(125920)  # Replace with an actual task ID


Dataset shape: (500, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6060 ± 0.0664


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5900 ± 0.0694




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5840 ± 0.0578


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6120 ± 0.0421


# Task ID63: 23

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(23)  # Replace with an actual task ID


Dataset shape: (1473, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.5098 ± 0.0369


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5153 ± 0.0181


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5520 ± 0.0307


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.5268 ± 0.0260


# Task ID64: 3904

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3904)  # Replace with an actual task ID


Dataset shape: (10885, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8100 ± 0.0211


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7984 ± 0.0167


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7962 ± 0.0164


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8084 ± 0.0126


# Task ID65: 3022

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3022)  # Replace with an actual task ID


Dataset shape: (990, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.5495 ± 0.0663


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6071 ± 0.0481


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4222 ± 0.0952


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.5808 ± 0.0713


# Task ID66: 9985

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9985)  # Replace with an actual task ID


Dataset shape: (6118, 51)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.4580 ± 0.0622


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5031 ± 0.0568


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4333 ± 0.0421


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.4706 ± 0.0492


# Task ID67: 9910

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9910)  # Replace with an actual task ID


Dataset shape: (3751, 1776)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7337 ± 0.0228


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7438 ± 0.0349


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7643 ± 0.0167


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7254 ± 0.0181


# Task ID68: 14970

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14970)  # Replace with an actual task ID


Dataset shape: (10299, 561)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9581 ± 0.0266


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9054 ± 0.0171


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8490 ± 0.0343


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8851 ± 0.0166


# Task ID69: 3021

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3021)  # Replace with an actual task ID


Dataset shape: (3772, 29)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9669 ± 0.0057


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9642 ± 0.0101


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9849 ± 0.0063


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9443 ± 0.0060


# Task ID70: 3481

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3481)  # Replace with an actual task ID


Dataset shape: (7797, 617)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9556 ± 0.0138


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9151 ± 0.0279


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.3926 ± 0.0204


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7816 ± 0.0239


# Task ID71: 3573

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3573)  # Replace with an actual task ID


Dataset shape: (70000, 784)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9173 ± 0.0069


In [None]:
run_knn(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


In [None]:
run_random_forest(X, y, seed=42)

# Task ID72: 146824

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146824)  # Replace with an actual task ID


Dataset shape: (2000, 240)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9640 ± 0.0080


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9695 ± 0.0123


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7735 ± 0.0281


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9405 ± 0.0149


# Task ID73: 146820

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146820)  # Replace with an actual task ID


Dataset shape: (4839, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9465 ± 0.0321


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9599 ± 0.0096


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9777 ± 0.0107


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9702 ± 0.0088


# Task ID74: 146822

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146822)  # Replace with an actual task ID


Dataset shape: (2310, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8745 ± 0.0099


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8831 ± 0.0190


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7558 ± 0.0116


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8485 ± 0.0130


# Task ID75: 146195

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7244 ± 0.0440


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6227 ± 0.0459


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5900 ± 0.1039


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6601 ± 0.0034


# Task ID76: 146800

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146800)  # Replace with an actual task ID


Dataset shape: (1080, 77)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8407 ± 0.0767


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5741 ± 0.1253


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5139 ± 0.1086


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6426 ± 0.1076


# Task ID77: 146817

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146817)  # Replace with an actual task ID


Dataset shape: (1941, 27)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6322 ± 0.0939


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6132 ± 0.1021


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5534 ± 0.0802


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.5931 ± 0.0807


# Task ID78: 146819

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146819)  # Replace with an actual task ID


Dataset shape: (540, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9611 ± 0.0175


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9148 ± 0.0091


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9259 ± 0.0166


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9148 ± 0.0091


# Task ID79: 146821

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146821)  # Replace with an actual task ID


Dataset shape: (1728, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8327 ± 0.0780


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7617 ± 0.0899




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7326 ± 0.0674


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7344 ± 0.0587


# Task ID80: 14954

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14954)  # Replace with an actual task ID


Dataset shape: (540, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6019 ± 0.1229


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6278 ± 0.0770




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6111 ± 0.1356


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6444 ± 0.1239


# Task ID81: 167141

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167141)  # Replace with an actual task ID


Dataset shape: (5000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8546 ± 0.0073


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8778 ± 0.0050


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9264 ± 0.0088


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8892 ± 0.0077


# Task ID82: 167140

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167140)  # Replace with an actual task ID


Dataset shape: (3186, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9466 ± 0.0115


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8528 ± 0.0167


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8939 ± 0.0155


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9005 ± 0.0119


# Task ID83: 167125

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167125)  # Replace with an actual task ID


Dataset shape: (3279, 1558)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9634 ± 0.0248


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9341 ± 0.0289


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9558 ± 0.0231


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8990 ± 0.0204


# Task ID84: 167124

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167124)  # Replace with an actual task ID


Dataset shape: (60000, 3072)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

In [None]:
run_knn(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


In [None]:
run_random_forest(X, y, seed=42)

# Task ID85: 167121

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167121)  # Replace with an actual task ID


Dataset shape: (92000, 1024)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

In [None]:
run_knn(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


In [None]:
run_random_forest(X, y, seed=42)

# GBDTs: XGBoost, CatBoost and LightGBM

In [None]:
import numpy as np
import pandas as pd
import openml
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, mean_squared_error
from catboost import CatBoostClassifier, CatBoostRegressor
from xgboost import XGBClassifier, XGBRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score



# Preprocessing

In [None]:
def load_preprocess_task(task_id, model_type="xgboost", task_type="classification", target_encode=None):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=task.target_name)

    print(f"Dataset shape: {X.shape}")

    # Encode target variable if classification
    if target_encode or (target_encode is None and task_type == "classification"):
        le = LabelEncoder()
        y = le.fit_transform(y)

    # Define feature columns
    categorical_cols = X.select_dtypes(include=['object', 'category']).columns
    numeric_cols = X.select_dtypes(include=['number']).columns
    cat_features = []

    # Define transformers for numeric and categorical columns
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

    # Configure encoders based on model type
    if model_type == "xgboost":
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])
    elif model_type == "catboost":
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('ordinal', OrdinalEncoder())
        ])
        cat_features = [X.columns.get_loc(col) for col in categorical_cols]
    elif model_type == "lightgbm":
        # Ensure one-hot encoding for LightGBM as it doesn't handle categorical strings natively
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])
        cat_features = [X.columns.get_loc(col) for col in categorical_cols]

    # Create preprocessor and preprocess data
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_cols),
            ('cat', categorical_transformer, categorical_cols)
        ])
    X_preprocessed = preprocessor.fit_transform(X)

    return X_preprocessed, y, cat_features if model_type in ["catboost", "lightgbm"] else None




# Cross Validation

In [None]:
def cross_validate_model(model, X, y, task_type="classification", n_folds=10):
    if task_type == "classification":
        cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = accuracy_score
    elif task_type == "regression":
        cv = KFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = mean_squared_error
    else:
        raise ValueError("Invalid task type. Use 'classification' or 'regression'.")

    scores = []
    for train_index, test_index in cv.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        score = scoring_func(y_test, y_pred)
        if task_type == "regression":
            score = np.sqrt(score)  # RMSE

        scores.append(score)

    avg_score = np.mean(scores)
    metric_name = "Accuracy" if task_type == "classification" else "RMSE"
    print(f"Average {metric_name}: {avg_score:.4f}")

    return avg_score





# XGBoost

In [None]:
def get_random_xgboost_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12))))),
        "alpha": np.power(10, rs.uniform(-8, 0)),
        "lambda": np.power(10, rs.uniform(-8, 0)),
        "eta": 3.0 * np.power(10, rs.uniform(-2, -1)),
    }
    return params

def run_xgboost(X, y, seed=42, task_type="classification"):
    params = get_random_xgboost_parameters(seed)
    model = XGBClassifier(**params) if task_type == "classification" else XGBRegressor(**params)
    scoring = 'accuracy' if task_type == "classification" else 'neg_root_mean_squared_error'

    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"XGBoost with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")




# CatBoost

In [None]:
def get_random_catboost_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "learning_rate": 3.0 * np.power(10, rs.uniform(-2, -1)),
        "max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12))))),
        "l2_leaf_reg": 0.5 * np.power(60, rs.uniform(0, 1)),
    }
    return params

def run_catboost(X, y, seed=42, task_type="classification", num_classes=None):
    params = get_random_catboost_parameters(seed)  # Removed task_type

    if task_type == "regression":
        model = CatBoostRegressor(iterations=1000, verbose=0, **params)
        scoring = 'neg_root_mean_squared_error'
    elif task_type == "classification":
        model = CatBoostClassifier(iterations=1000, classes_count=num_classes, verbose=0, **params)
        scoring = 'accuracy'

    # Perform cross-validation with suppressed verbosity
    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"CatBoost with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")




# LightGBM

In [None]:
def get_random_lightgbm_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "num_leaves": int(np.round(np.power(2, rs.uniform(1, 12)))),
        "lambda_l1": np.power(10, rs.uniform(-8, 1)),
        "lambda_l2": np.power(10, rs.uniform(-8, 1)),
        "learning_rate": 3.0 * np.power(10, rs.uniform(-2, 1)),
    }
    return params


def run_lightgbm(X, y, seed=42, task_type="classification"):
    params = get_random_lightgbm_parameters(seed)

    # Determine num_classes based on unique target values for LightGBM compatibility
    num_classes = len(np.unique(y))

    if task_type == "regression":
        params["objective"] = "regression"
        params["metric"] = "mse"
        model = LGBMRegressor(**params, verbose=-1)
        scoring = 'neg_root_mean_squared_error'
    elif task_type == "classification":
        params["objective"] = "multiclass" if num_classes > 2 else "binary"
        params["num_class"] = num_classes if num_classes > 2 else None
        model = LGBMClassifier(**params, verbose=-1)
        scoring = 'accuracy'

    # Perform cross-validation
    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"LightGBM with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")


# Task ID1: 14965

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14965)  # Replace with an actual task ID

Dataset shape: (45211, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7360 ± 0.1495


In [None]:
run_catboost(X, y, seed=42)


CatBoost with random params: {'learning_rate': 0.07106591851092234, 'max_depth': 11, 'l2_leaf_reg': 10.013039911391246}
10-fold CV accuracy: 0.6491 ± 0.1581


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7733 ± 0.1460


# Task ID2: 9977

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9977)  # Replace with an actual task ID


Dataset shape: (34465, 118)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9488 ± 0.0254


In [None]:
run_catboost(X, y, seed=42)


CatBoost with random params: {'learning_rate': 0.07106591851092234, 'max_depth': 11, 'l2_leaf_reg': 10.013039911391246}
10-fold CV accuracy: 0.9531 ± 0.0224


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8318 ± 0.0689


# Task ID3: 34539

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(34539)  # Replace with an actual task ID


Dataset shape: (32769, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9431 ± 0.0006


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9131 ± 0.0140


# Task ID4: 146606

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146606)  # Replace with an actual task ID


Dataset shape: (98050, 28)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7192 ± 0.0041


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6664 ± 0.0094


# Task ID5: 7592



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(7592)  # Replace with an actual task ID


Dataset shape: (48842, 14)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8721 ± 0.0031


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6680 ± 0.1565


# Task ID6: 146195



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6903 ± 0.0501


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4384 ± 0.1732


# Task ID7: 167119



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167119)  # Replace with an actual task ID


Dataset shape: (44819, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7006 ± 0.0713


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4532 ± 0.1107


# Task ID8: 167120



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167120)  # Replace with an actual task ID


Dataset shape: (96320, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5177 ± 0.0038


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5047 ± 0.0052


# Task ID9: 146825



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146825)  # Replace with an actual task ID


Dataset shape: (70000, 784)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5047 ± 0.0052


# Task ID10: 	3945



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3945)  # Replace with an actual task ID


Dataset shape: (50000, 230)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9821 ± 0.0001


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9682 ± 0.0092


# Task ID11: 	168331



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168331)  # Replace with an actual task ID


Dataset shape: (58310, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6448 ± 0.0050


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.1990 ± 0.0474


# Task ID12: 	168330



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168330)  # Replace with an actual task ID


Dataset shape: (83733, 54)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7014 ± 0.0031


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID13: 	168335



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168335)  # Replace with an actual task ID


Dataset shape: (130064, 50)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9355 ± 0.0020


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8926 ± 0.0137


# Task ID14: 	168332



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168332)  # Replace with an actual task ID


Dataset shape: (10000, 7200)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID15: 	168337



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168337)  # Replace with an actual task ID


Dataset shape: (20000, 4296)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

Exception ignored on calling ctypes callback function: <bound method DataIter._next_wrapper of <xgboost.data.SingleBatchInternalIter object at 0x781c108b4400>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 582, in _next_wrapper
    def _next_wrapper(self, this: None) -> int:  # pylint: disable=unused-argument
KeyboardInterrupt: 
Exception ignored on calling ctypes callback function: <bound method DataIter._next_wrapper of <xgboost.data.SingleBatchInternalIter object at 0x781c108b4a00>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 582, in _next_wrapper
    def _next_wrapper(self, this: None) -> int:  # pylint: disable=unused-argument
KeyboardInterrupt: 


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751



# Task ID16: 	146212



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146212)  # Replace with an actual task ID


Dataset shape: (58000, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9999 ± 0.0001


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 7}
10-fold CV accuracy: 0.8166 ± 0.1577


# Task ID17: 	168329



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168329)  # Replace with an actual task ID


Dataset shape: (65196, 27)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID18: 	168338



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168338)  # Replace with an actual task ID


Dataset shape: (20000, 4296)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID19: 	168868



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168868)  # Replace with an actual task ID


Dataset shape: (76000, 170)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9944 ± 0.0007


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9757 ± 0.0072


# Task ID20: 31

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(31)  # Replace with an actual task ID


Dataset shape: (1000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7560 ± 0.0310


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7030 ± 0.0650


# Task ID21: 10101

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(10101)  # Replace with an actual task ID


Dataset shape: (748, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7127 ± 0.1362


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6781 ± 0.1610


# Task ID22: 	3913

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3913)  # Replace with an actual task ID


Dataset shape: (522, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7966 ± 0.0775


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7910 ± 0.1004


# Task ID23: 3

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3)  # Replace with an actual task ID


Dataset shape: (3196, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9687 ± 0.0289


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8393 ± 0.1783


# Task ID24: 3917

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3917)  # Replace with an actual task ID


Dataset shape: (2109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8308 ± 0.0354


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7942 ± 0.0312


# Task ID25: 9957

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9957)  # Replace with an actual task ID


Dataset shape: (1055, 41)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8511 ± 0.0545


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8019 ± 0.0565


# Task ID26: 9946

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9946)  # Replace with an actual task ID


Dataset shape: (569, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9701 ± 0.0208


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9526 ± 0.0272


# Task ID27: 3918

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3918)  # Replace with an actual task ID


Dataset shape: (1109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9297 ± 0.0311


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8765 ± 0.0353


# Task ID28: 3903

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3903)  # Replace with an actual task ID


Dataset shape: (1563, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8925 ± 0.0144


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8421 ± 0.0472


# Task ID29: 37

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(37)  # Replace with an actual task ID


Dataset shape: (768, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7578 ± 0.0416


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7005 ± 0.0261


# Task ID30: 9971

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9971)  # Replace with an actual task ID


Dataset shape: (583, 10)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6793 ± 0.0824


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6211 ± 0.0865


# Task ID31: 9952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9952)  # Replace with an actual task ID


Dataset shape: (5404, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8769 ± 0.0154


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7089 ± 0.0997


# Task ID32: 3902

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3902)  # Replace with an actual task ID


Dataset shape: (1458, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9108 ± 0.0126


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8560 ± 0.0283


# Task ID33: 49

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(49)  # Replace with an actual task ID


Dataset shape: (958, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8779 ± 0.1370


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8237 ± 0.1105


# Task ID34: 43

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(43)  # Replace with an actual task ID


Dataset shape: (4601, 57)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9422 ± 0.0324


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8111 ± 0.0454


# Task ID35: 9978

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9978)  # Replace with an actual task ID


Dataset shape: (2534, 72)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9325 ± 0.0174


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9069 ± 0.0168


# Task ID36: 10093

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(10093)  # Replace with an actual task ID


Dataset shape: (1372, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9956 ± 0.0048


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9818 ± 0.0188


# Task ID37: 219

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(219)  # Replace with an actual task ID


Dataset shape: (45312, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7299 ± 0.0614


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7130 ± 0.0965


# Task ID38: 9976

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9976)  # Replace with an actual task ID


Dataset shape: (2600, 500)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7835 ± 0.0221


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6935 ± 0.0315


# Task ID39: 6

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(6)  # Replace with an actual task ID


Dataset shape: (20000, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9321 ± 0.0028


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 26}
10-fold CV accuracy: 0.1738 ± 0.0415


# Task ID40: 53

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(53)  # Replace with an actual task ID


Dataset shape: (846, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7696 ± 0.0277


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4054 ± 0.0701


# Task ID41: 11

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(11)  # Replace with an actual task ID


Dataset shape: (625, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7426 ± 0.1043


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.8131 ± 0.0642


# Task ID42: 15

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(15)  # Replace with an actual task ID


Dataset shape: (699, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9586 ± 0.0322


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9529 ± 0.0313


# Task ID43: 16

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(16)  # Replace with an actual task ID


Dataset shape: (2000, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9460 ± 0.0118


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3185 ± 0.1023


# Task ID44: 14

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14)  # Replace with an actual task ID


Dataset shape: (2000, 76)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8380 ± 0.0176


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.2800 ± 0.0680


# Task ID45: 32

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(32)  # Replace with an actual task ID


Dataset shape: (10992, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9889 ± 0.0031


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3439 ± 0.1041


# Task ID46: 3549

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3549)  # Replace with an actual task ID


Dataset shape: (841, 70)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9857 ± 0.0139


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.9013 ± 0.0547


# Task ID47: 12

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(12)  # Replace with an actual task ID


Dataset shape: (2000, 216)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9600 ± 0.0140


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.4495 ± 0.1933


# Task ID48: 9981

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9981)  # Replace with an actual task ID


Dataset shape: (1080, 856)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9176 ± 0.0308


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 9}
10-fold CV accuracy: 0.1278 ± 0.0522


# Task ID49: 18

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(18)  # Replace with an actual task ID


Dataset shape: (2000, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7065 ± 0.0166


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.2690 ± 0.0521


# Task ID50: 28

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(28)  # Replace with an actual task ID


Dataset shape: (5620, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9733 ± 0.0092


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3534 ± 0.1022


# Task ID51: 2074

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(2074)  # Replace with an actual task ID


Dataset shape: (6430, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9106 ± 0.0123


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.3664 ± 0.0641


# Task ID52: 29

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(29)  # Replace with an actual task ID


Dataset shape: (690, 15)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8391 ± 0.1284


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8087 ± 0.1184


# Task ID53: 45

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(45)  # Replace with an actual task ID


Dataset shape: (3190, 60)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9583 ± 0.0141


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.5639 ± 0.0946


# Task ID54: 125922

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(125922)  # Replace with an actual task ID


Dataset shape: (5500, 40)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9842 ± 0.0056


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 11}
10-fold CV accuracy: 0.4633 ± 0.1052


# Task ID55: 9960

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9960)  # Replace with an actual task ID


Dataset shape: (5456, 24)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9941 ± 0.0086


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.7197 ± 0.2254


# Task ID56: 9964

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9964)  # Replace with an actual task ID


Dataset shape: (1593, 256)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9272 ± 0.0229


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3132 ± 0.0912


# Task ID57: 22

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(22)  # Replace with an actual task ID


Dataset shape: (2000, 47)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7790 ± 0.0221


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3195 ± 0.0914


# Task ID58: 2079

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(2079)  # Replace with an actual task ID


Dataset shape: (736, 19)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5601 ± 0.0555


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 5}
10-fold CV accuracy: 0.2785 ± 0.0715


# Task ID59: 14969

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14969)  # Replace with an actual task ID


Dataset shape: (9873, 32)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.4783 ± 0.0596


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 5}
10-fold CV accuracy: 0.2969 ± 0.0562


# Task ID60: 3560

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3560)  # Replace with an actual task ID


Dataset shape: (797, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.1896 ± 0.0429


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.1595 ± 0.0293


# Task ID61: 14952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14952)  # Replace with an actual task ID


Dataset shape: (11055, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9557 ± 0.0059


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8177 ± 0.1445


# Task ID62: 125920

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(125920)  # Replace with an actual task ID


Dataset shape: (500, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6020 ± 0.0433


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5620 ± 0.0745


# Task ID63: 23

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(23)  # Replace with an actual task ID


Dataset shape: (1473, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5601 ± 0.0400


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4351 ± 0.0485


# Task ID64: 3904

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3904)  # Replace with an actual task ID


Dataset shape: (10885, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8066 ± 0.0187


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7150 ± 0.0319


# Task ID65: 3022

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3022)  # Replace with an actual task ID


Dataset shape: (990, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6545 ± 0.0697


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 11}
10-fold CV accuracy: 0.1808 ± 0.0672


# Task ID66: 9985

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9985)  # Replace with an actual task ID


Dataset shape: (6118, 51)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5414 ± 0.0602


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.2517 ± 0.1198


# Task ID67: 9910

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9910)  # Replace with an actual task ID


Dataset shape: (3751, 1776)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7974 ± 0.0180


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5692 ± 0.0653


# Task ID68: 14970

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14970)  # Replace with an actual task ID


Dataset shape: (10299, 561)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9555 ± 0.0293


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.4214 ± 0.1325


# Task ID69: 3021

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3021)  # Replace with an actual task ID


Dataset shape: (3772, 29)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9913 ± 0.0052


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9327 ± 0.0411


# Task ID70: 3481

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3481)  # Replace with an actual task ID


Dataset shape: (7797, 617)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9478 ± 0.0121


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 26}
10-fold CV accuracy: 0.2706 ± 0.0419


# Task ID71: 3573

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3573)  # Replace with an actual task ID


Dataset shape: (70000, 784)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

In [None]:
run_lightgbm(X, y, seed=42)


# Task ID72: 146824

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146824)  # Replace with an actual task ID


Dataset shape: (2000, 240)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9630 ± 0.0131


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3550 ± 0.0862


# Task ID73: 146820

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146820)  # Replace with an actual task ID


Dataset shape: (4839, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9845 ± 0.0111


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9653 ± 0.0158


# Task ID74: 146822

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146822)  # Replace with an actual task ID


Dataset shape: (2310, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9342 ± 0.0116


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 7}
10-fold CV accuracy: 0.4701 ± 0.1103


# Task ID75: 146195

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6903 ± 0.0501


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4384 ± 0.1732


# Task ID76: 146800

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146800)  # Replace with an actual task ID


Dataset shape: (1080, 77)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7093 ± 0.1207


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 8}
10-fold CV accuracy: 0.3481 ± 0.1663


# Task ID77: 146817

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146817)  # Replace with an actual task ID


Dataset shape: (1941, 27)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6353 ± 0.0864


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 7}
10-fold CV accuracy: 0.2788 ± 0.0751


# Task ID78: 146819

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146819)  # Replace with an actual task ID


Dataset shape: (540, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9463 ± 0.0255


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9370 ± 0.0343


# Task ID79: 146821

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146821)  # Replace with an actual task ID


Dataset shape: (1728, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8773 ± 0.0711


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.5679 ± 0.1789


# Task ID80: 14954

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14954)  # Replace with an actual task ID


Dataset shape: (540, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6241 ± 0.0837


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5648 ± 0.1022


# Task ID81: 167141

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167141)  # Replace with an actual task ID


Dataset shape: (5000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9518 ± 0.0099


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7800 ± 0.0865


# Task ID82: 167140

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167140)  # Replace with an actual task ID


Dataset shape: (3186, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9601 ± 0.0113


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.5577 ± 0.1720


# Task ID83: 167125

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167125)  # Replace with an actual task ID


Dataset shape: (3279, 1558)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9695 ± 0.0201


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8750 ± 0.0508


# Task ID84: 167124

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167124)  # Replace with an actual task ID


Dataset shape: (60000, 3072)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

In [None]:
run_lightgbm(X, y, seed=42)


# Task ID85: 167121

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167121)  # Replace with an actual task ID


Dataset shape: (92000, 1024)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

In [None]:
run_lightgbm(X, y, seed=42)


# MLP

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml
import pandas as pd

# Function for random parameter generation
def get_random_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "hidden_dim": rs.randint(10, 101),
        "n_layers": rs.randint(2, 6),
        "learning_rate": rs.uniform(0.00005, 0.0005),  # Adjusted lower learning rate range
    }
    return params

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load data from OpenML using task ID
def load_openml_data(task_id):

    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Convert categorical features to numeric using one-hot encoding
    X = pd.get_dummies(X, drop_first=True)

    # Fill NaN values with column means and Inf with large finite values
    X = X.fillna(X.mean())
    X.replace([np.inf, -np.inf], np.nan, inplace=True)
    X = X.dropna()  # Optionally drop remaining NaNs if any

    # Convert to numpy
    X = X.to_numpy().astype(np.float32)
    y = pd.factorize(y)[0]

    return X, y


# Define MLP model
def initialize_mlp(n_layers, input_dim, hidden_dim, output_dim, objective):
    layers = [nn.Linear(input_dim, hidden_dim), nn.ReLU()]
    layers.extend([nn.Linear(hidden_dim, hidden_dim), nn.ReLU()] * (n_layers - 1))
    layers.append(nn.Linear(hidden_dim, output_dim))
    model = nn.Sequential(*layers)
    return model

# Training function with gradient clipping
def train_mlp(model, X_train, y_train, X_val, y_val, args, params):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=params["learning_rate"])
    loss_func = nn.CrossEntropyLoss()

    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    train_loader = DataLoader(dataset=train_dataset, batch_size=args["batch_size"], shuffle=True)

    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    val_loader = DataLoader(dataset=val_dataset, batch_size=args["val_batch_size"], shuffle=False)

    for epoch in range(args["epochs"]):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            output = model(batch_X)
            loss = loss_func(output, batch_y)
            loss.backward()

            # Apply gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                output = model(batch_X)
                val_loss += loss_func(output, batch_y).item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}, Validation Loss: {val_loss:.4f}")

    return model

# Prediction function
def predict_mlp(model, X, args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
    with torch.no_grad():
        output = model(X_tensor)
        predictions = output.argmax(dim=1).cpu().numpy()

    return predictions

# Set hyperparameters and arguments
args = {
    "objective": "classification",
    "batch_size": 64,
    "val_batch_size": 64,
    "epochs": 10,
    "num_features": None,
    "num_classes": None,
}

seed = 42
params = get_random_parameters(seed)



Using device: cpu


# Task ID1: 14965

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=14965)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.2617
Epoch 2, Validation Loss: 0.2339
Epoch 3, Validation Loss: 0.2280
Epoch 4, Validation Loss: 0.2242
Epoch 5, Validation Loss: 0.2222
Epoch 6, Validation Loss: 0.2206
Epoch 7, Validation Loss: 0.2194
Epoch 8, Validation Loss: 0.2180
Epoch 9, Validation Loss: 0.2175
Epoch 10, Validation Loss: 0.2166
Validation Accuracy: 0.9035


# Task ID2: 9977

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9977)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.1591
Epoch 2, Validation Loss: 0.1377
Epoch 3, Validation Loss: 0.1296
Epoch 4, Validation Loss: 0.1253
Epoch 5, Validation Loss: 0.1221
Epoch 6, Validation Loss: 0.1208
Epoch 7, Validation Loss: 0.1183
Epoch 8, Validation Loss: 0.1173
Epoch 9, Validation Loss: 0.1144
Epoch 10, Validation Loss: 0.1163
Validation Accuracy: 0.9542


# Task ID3: 34539

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=34539)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.2228
Epoch 2, Validation Loss: 0.1846
Epoch 3, Validation Loss: 0.1945
Epoch 4, Validation Loss: 0.2289
Epoch 5, Validation Loss: 0.2576
Epoch 6, Validation Loss: 0.2801
Epoch 7, Validation Loss: 0.3096
Epoch 8, Validation Loss: 0.3476
Epoch 9, Validation Loss: 0.3596
Epoch 10, Validation Loss: 0.4040
Validation Accuracy: 0.9362


# Task ID4: 146606

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146606)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6401
Epoch 2, Validation Loss: 0.6242
Epoch 3, Validation Loss: 0.6142
Epoch 4, Validation Loss: 0.6077
Epoch 5, Validation Loss: 0.6024
Epoch 6, Validation Loss: 0.5982
Epoch 7, Validation Loss: 0.5935
Epoch 8, Validation Loss: 0.5907
Epoch 9, Validation Loss: 0.5875
Epoch 10, Validation Loss: 0.5851
Validation Accuracy: 0.6917


# Task ID5: 7592



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=7592)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.3434
Epoch 2, Validation Loss: 0.3242
Epoch 3, Validation Loss: 0.3154
Epoch 4, Validation Loss: 0.3106
Epoch 5, Validation Loss: 0.3086
Epoch 6, Validation Loss: 0.3070
Epoch 7, Validation Loss: 0.3053
Epoch 8, Validation Loss: 0.3050
Epoch 9, Validation Loss: 0.3043
Epoch 10, Validation Loss: 0.3035
Validation Accuracy: 0.8590


# Task ID6: 146195



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146195)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6165
Epoch 2, Validation Loss: 0.5790
Epoch 3, Validation Loss: 0.5577
Epoch 4, Validation Loss: 0.5400
Epoch 5, Validation Loss: 0.5217
Epoch 6, Validation Loss: 0.5079
Epoch 7, Validation Loss: 0.4953
Epoch 8, Validation Loss: 0.4868
Epoch 9, Validation Loss: 0.4793
Epoch 10, Validation Loss: 0.4734
Validation Accuracy: 0.8105


# Task ID7: 167119



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=167119)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.7152
Epoch 2, Validation Loss: 0.6600
Epoch 3, Validation Loss: 0.6155
Epoch 4, Validation Loss: 0.5792
Epoch 5, Validation Loss: 0.5527
Epoch 6, Validation Loss: 0.5334
Epoch 7, Validation Loss: 0.5176
Epoch 8, Validation Loss: 0.5044
Epoch 9, Validation Loss: 0.4946
Epoch 10, Validation Loss: 0.4844
Validation Accuracy: 0.8017


# Task ID8: 167120



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=167120)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6932
Epoch 2, Validation Loss: 0.6927
Epoch 3, Validation Loss: 0.6927
Epoch 4, Validation Loss: 0.6926
Epoch 5, Validation Loss: 0.6926
Epoch 6, Validation Loss: 0.6927
Epoch 7, Validation Loss: 0.6926
Epoch 8, Validation Loss: 0.6925
Epoch 9, Validation Loss: 0.6927
Epoch 10, Validation Loss: 0.6928
Validation Accuracy: 0.5166


# Task ID11: 	168331



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=168331)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.3411
Epoch 2, Validation Loss: 1.2322
Epoch 3, Validation Loss: 1.1965
Epoch 4, Validation Loss: 1.1750
Epoch 5, Validation Loss: 1.1582
Epoch 6, Validation Loss: 1.1426
Epoch 7, Validation Loss: 1.1332
Epoch 8, Validation Loss: 1.1247
Epoch 9, Validation Loss: 1.1153
Epoch 10, Validation Loss: 1.1086
Validation Accuracy: 0.6020


# Task ID12: 	168330



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=168330)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.8471
Epoch 2, Validation Loss: 0.8239
Epoch 3, Validation Loss: 0.8099
Epoch 4, Validation Loss: 0.7987
Epoch 5, Validation Loss: 0.7902
Epoch 6, Validation Loss: 0.7818
Epoch 7, Validation Loss: 0.7741
Epoch 8, Validation Loss: 0.7683
Epoch 9, Validation Loss: 0.7628
Epoch 10, Validation Loss: 0.7589
Validation Accuracy: 0.6781


# Task ID13: 	168335



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=168335)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.2862
Epoch 2, Validation Loss: 0.2583
Epoch 3, Validation Loss: 0.2466
Epoch 4, Validation Loss: 0.2383
Epoch 5, Validation Loss: 0.2342
Epoch 6, Validation Loss: 0.2281
Epoch 7, Validation Loss: 0.2243
Epoch 8, Validation Loss: 0.2217
Epoch 9, Validation Loss: 0.2180
Epoch 10, Validation Loss: 0.2138
Validation Accuracy: 0.9114



# Task ID16: 	146212



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146212)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.2034
Epoch 2, Validation Loss: 0.1067
Epoch 3, Validation Loss: 0.0710
Epoch 4, Validation Loss: 0.0523
Epoch 5, Validation Loss: 0.0421
Epoch 6, Validation Loss: 0.0360
Epoch 7, Validation Loss: 0.0320
Epoch 8, Validation Loss: 0.0293
Epoch 9, Validation Loss: 0.0272
Epoch 10, Validation Loss: 0.0261
Validation Accuracy: 0.9962


# Task ID19: 	168868



In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=168868)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.0426
Epoch 2, Validation Loss: 0.0381
Epoch 3, Validation Loss: 0.0379
Epoch 4, Validation Loss: 0.0369
Epoch 5, Validation Loss: 0.0378
Epoch 6, Validation Loss: 0.0383
Epoch 7, Validation Loss: 0.0405
Epoch 8, Validation Loss: 0.0446
Epoch 9, Validation Loss: 0.0424
Epoch 10, Validation Loss: 0.0426
Validation Accuracy: 0.9909


# Task ID20: 31

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=31)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6796
Epoch 2, Validation Loss: 0.6699
Epoch 3, Validation Loss: 0.6615
Epoch 4, Validation Loss: 0.6542
Epoch 5, Validation Loss: 0.6480
Epoch 6, Validation Loss: 0.6422
Epoch 7, Validation Loss: 0.6373
Epoch 8, Validation Loss: 0.6326
Epoch 9, Validation Loss: 0.6285
Epoch 10, Validation Loss: 0.6248
Validation Accuracy: 0.7050


# Task ID21: 10101

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=10101)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6583
Epoch 2, Validation Loss: 0.6488
Epoch 3, Validation Loss: 0.6396
Epoch 4, Validation Loss: 0.6304
Epoch 5, Validation Loss: 0.6215
Epoch 6, Validation Loss: 0.6122
Epoch 7, Validation Loss: 0.6034
Epoch 8, Validation Loss: 0.5944
Epoch 9, Validation Loss: 0.5860
Epoch 10, Validation Loss: 0.5780
Validation Accuracy: 0.7533


# Task ID22: 	3913

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3913)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6336
Epoch 2, Validation Loss: 0.6247
Epoch 3, Validation Loss: 0.6157
Epoch 4, Validation Loss: 0.6071
Epoch 5, Validation Loss: 0.5979
Epoch 6, Validation Loss: 0.5885
Epoch 7, Validation Loss: 0.5789
Epoch 8, Validation Loss: 0.5693
Epoch 9, Validation Loss: 0.5597
Epoch 10, Validation Loss: 0.5496
Validation Accuracy: 0.8095


# Task ID23: 3

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6705
Epoch 2, Validation Loss: 0.6494
Epoch 3, Validation Loss: 0.6219
Epoch 4, Validation Loss: 0.5850
Epoch 5, Validation Loss: 0.5380
Epoch 6, Validation Loss: 0.4850
Epoch 7, Validation Loss: 0.4317
Epoch 8, Validation Loss: 0.3815
Epoch 9, Validation Loss: 0.3370
Epoch 10, Validation Loss: 0.2992
Validation Accuracy: 0.9062


# Task ID24: 3917

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3917)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6958
Epoch 2, Validation Loss: 0.6509
Epoch 3, Validation Loss: 0.6055
Epoch 4, Validation Loss: 0.5575
Epoch 5, Validation Loss: 0.5046
Epoch 6, Validation Loss: 0.4568
Epoch 7, Validation Loss: 0.4231
Epoch 8, Validation Loss: 0.4041
Epoch 9, Validation Loss: 0.3947
Epoch 10, Validation Loss: 0.3909
Validation Accuracy: 0.8412


# Task ID25: 9957

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9957)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6828
Epoch 2, Validation Loss: 0.6638
Epoch 3, Validation Loss: 0.6472
Epoch 4, Validation Loss: 0.6314
Epoch 5, Validation Loss: 0.6163
Epoch 6, Validation Loss: 0.6010
Epoch 7, Validation Loss: 0.5852
Epoch 8, Validation Loss: 0.5687
Epoch 9, Validation Loss: 0.5516
Epoch 10, Validation Loss: 0.5351
Validation Accuracy: 0.7630


# Task ID26: 9946

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9946)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6439
Epoch 2, Validation Loss: 0.6249
Epoch 3, Validation Loss: 0.6068
Epoch 4, Validation Loss: 0.5884
Epoch 5, Validation Loss: 0.5695
Epoch 6, Validation Loss: 0.5504
Epoch 7, Validation Loss: 0.5311
Epoch 8, Validation Loss: 0.5119
Epoch 9, Validation Loss: 0.4920
Epoch 10, Validation Loss: 0.4710
Validation Accuracy: 0.9474


# Task ID27: 3918

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3918)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6102
Epoch 2, Validation Loss: 0.5890
Epoch 3, Validation Loss: 0.5686
Epoch 4, Validation Loss: 0.5492
Epoch 5, Validation Loss: 0.5296
Epoch 6, Validation Loss: 0.5104
Epoch 7, Validation Loss: 0.4907
Epoch 8, Validation Loss: 0.4704
Epoch 9, Validation Loss: 0.4500
Epoch 10, Validation Loss: 0.4292
Validation Accuracy: 0.9144


# Task ID28: 3903

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3903)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6419
Epoch 2, Validation Loss: 0.6060
Epoch 3, Validation Loss: 0.5697
Epoch 4, Validation Loss: 0.5309
Epoch 5, Validation Loss: 0.4889
Epoch 6, Validation Loss: 0.4463
Epoch 7, Validation Loss: 0.4095
Epoch 8, Validation Loss: 0.3783
Epoch 9, Validation Loss: 0.3557
Epoch 10, Validation Loss: 0.3417
Validation Accuracy: 0.8978


# Task ID29: 37

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=37)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6940
Epoch 2, Validation Loss: 0.6859
Epoch 3, Validation Loss: 0.6777
Epoch 4, Validation Loss: 0.6699
Epoch 5, Validation Loss: 0.6616
Epoch 6, Validation Loss: 0.6538
Epoch 7, Validation Loss: 0.6454
Epoch 8, Validation Loss: 0.6369
Epoch 9, Validation Loss: 0.6284
Epoch 10, Validation Loss: 0.6191
Validation Accuracy: 0.7143


# Task ID30: 9971

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9971)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6758
Epoch 2, Validation Loss: 0.6643
Epoch 3, Validation Loss: 0.6537
Epoch 4, Validation Loss: 0.6440
Epoch 5, Validation Loss: 0.6347
Epoch 6, Validation Loss: 0.6262
Epoch 7, Validation Loss: 0.6183
Epoch 8, Validation Loss: 0.6105
Epoch 9, Validation Loss: 0.6031
Epoch 10, Validation Loss: 0.5962
Validation Accuracy: 0.7436


# Task ID31: 9952

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9952)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5894
Epoch 2, Validation Loss: 0.5326
Epoch 3, Validation Loss: 0.4930
Epoch 4, Validation Loss: 0.4664
Epoch 5, Validation Loss: 0.4503
Epoch 6, Validation Loss: 0.4404
Epoch 7, Validation Loss: 0.4341
Epoch 8, Validation Loss: 0.4290
Epoch 9, Validation Loss: 0.4250
Epoch 10, Validation Loss: 0.4212
Validation Accuracy: 0.7928


# Task ID32: 3902

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3902)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6374
Epoch 2, Validation Loss: 0.6065
Epoch 3, Validation Loss: 0.5770
Epoch 4, Validation Loss: 0.5469
Epoch 5, Validation Loss: 0.5155
Epoch 6, Validation Loss: 0.4833
Epoch 7, Validation Loss: 0.4533
Epoch 8, Validation Loss: 0.4270
Epoch 9, Validation Loss: 0.4056
Epoch 10, Validation Loss: 0.3884
Validation Accuracy: 0.8562


# Task ID33: 49

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=49)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6989
Epoch 2, Validation Loss: 0.6880
Epoch 3, Validation Loss: 0.6777
Epoch 4, Validation Loss: 0.6678
Epoch 5, Validation Loss: 0.6581
Epoch 6, Validation Loss: 0.6489
Epoch 7, Validation Loss: 0.6397
Epoch 8, Validation Loss: 0.6311
Epoch 9, Validation Loss: 0.6227
Epoch 10, Validation Loss: 0.6157
Validation Accuracy: 0.6510


# Task ID34: 43

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=43)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6400
Epoch 2, Validation Loss: 0.5789
Epoch 3, Validation Loss: 0.4994
Epoch 4, Validation Loss: 0.4110
Epoch 5, Validation Loss: 0.3360
Epoch 6, Validation Loss: 0.2877
Epoch 7, Validation Loss: 0.2573
Epoch 8, Validation Loss: 0.2394
Epoch 9, Validation Loss: 0.2264
Epoch 10, Validation Loss: 0.2177
Validation Accuracy: 0.9327


# Task ID35: 9978

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9978)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5707
Epoch 2, Validation Loss: 0.4667
Epoch 3, Validation Loss: 0.3744
Epoch 4, Validation Loss: 0.3052
Epoch 5, Validation Loss: 0.2637
Epoch 6, Validation Loss: 0.2391
Epoch 7, Validation Loss: 0.2235
Epoch 8, Validation Loss: 0.2130
Epoch 9, Validation Loss: 0.2048
Epoch 10, Validation Loss: 0.1986
Validation Accuracy: 0.9349


# Task ID36: 10093

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=10093)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6556
Epoch 2, Validation Loss: 0.6368
Epoch 3, Validation Loss: 0.6176
Epoch 4, Validation Loss: 0.5975
Epoch 5, Validation Loss: 0.5765
Epoch 6, Validation Loss: 0.5538
Epoch 7, Validation Loss: 0.5295
Epoch 8, Validation Loss: 0.5030
Epoch 9, Validation Loss: 0.4755
Epoch 10, Validation Loss: 0.4461
Validation Accuracy: 0.8618


# Task ID37: 219

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=219)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5211
Epoch 2, Validation Loss: 0.4844
Epoch 3, Validation Loss: 0.4739
Epoch 4, Validation Loss: 0.4685
Epoch 5, Validation Loss: 0.4635
Epoch 6, Validation Loss: 0.4606
Epoch 7, Validation Loss: 0.4582
Epoch 8, Validation Loss: 0.4547
Epoch 9, Validation Loss: 0.4522
Epoch 10, Validation Loss: 0.4505
Validation Accuracy: 0.7888


# Task ID38: 9976

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9976)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6944
Epoch 2, Validation Loss: 0.6916
Epoch 3, Validation Loss: 0.6885
Epoch 4, Validation Loss: 0.6854
Epoch 5, Validation Loss: 0.6819
Epoch 6, Validation Loss: 0.6773
Epoch 7, Validation Loss: 0.6732
Epoch 8, Validation Loss: 0.6692
Epoch 9, Validation Loss: 0.6670
Epoch 10, Validation Loss: 0.6687
Validation Accuracy: 0.5423


# Task ID39: 6

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=6)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 3.0598
Epoch 2, Validation Loss: 2.5152
Epoch 3, Validation Loss: 1.9552
Epoch 4, Validation Loss: 1.6047
Epoch 5, Validation Loss: 1.3912
Epoch 6, Validation Loss: 1.2533
Epoch 7, Validation Loss: 1.1598
Epoch 8, Validation Loss: 1.0897
Epoch 9, Validation Loss: 1.0362
Epoch 10, Validation Loss: 0.9918
Validation Accuracy: 0.7130


# Task ID40: 53

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=53)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.3758
Epoch 2, Validation Loss: 1.3669
Epoch 3, Validation Loss: 1.3582
Epoch 4, Validation Loss: 1.3493
Epoch 5, Validation Loss: 1.3408
Epoch 6, Validation Loss: 1.3316
Epoch 7, Validation Loss: 1.3224
Epoch 8, Validation Loss: 1.3135
Epoch 9, Validation Loss: 1.3037
Epoch 10, Validation Loss: 1.2932
Validation Accuracy: 0.4471


# Task ID41: 11

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=11)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.0777
Epoch 2, Validation Loss: 1.0609
Epoch 3, Validation Loss: 1.0445
Epoch 4, Validation Loss: 1.0284
Epoch 5, Validation Loss: 1.0130
Epoch 6, Validation Loss: 0.9978
Epoch 7, Validation Loss: 0.9828
Epoch 8, Validation Loss: 0.9681
Epoch 9, Validation Loss: 0.9534
Epoch 10, Validation Loss: 0.9387
Validation Accuracy: 0.5760


# Task ID42: 15

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=15)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6640
Epoch 2, Validation Loss: 0.6463
Epoch 3, Validation Loss: 0.6286
Epoch 4, Validation Loss: 0.6108
Epoch 5, Validation Loss: 0.5924
Epoch 6, Validation Loss: 0.5734
Epoch 7, Validation Loss: 0.5538
Epoch 8, Validation Loss: 0.5335
Epoch 9, Validation Loss: 0.5123
Epoch 10, Validation Loss: 0.4902
Validation Accuracy: 0.9643


# Task ID43: 16

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=16)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2873
Epoch 2, Validation Loss: 2.2541
Epoch 3, Validation Loss: 2.2198
Epoch 4, Validation Loss: 2.1813
Epoch 5, Validation Loss: 2.1356
Epoch 6, Validation Loss: 2.0804
Epoch 7, Validation Loss: 2.0135
Epoch 8, Validation Loss: 1.9330
Epoch 9, Validation Loss: 1.8391
Epoch 10, Validation Loss: 1.7345
Validation Accuracy: 0.7475


# Task ID44: 14

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=14)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2667
Epoch 2, Validation Loss: 2.2367
Epoch 3, Validation Loss: 2.2022
Epoch 4, Validation Loss: 2.1601
Epoch 5, Validation Loss: 2.1078
Epoch 6, Validation Loss: 2.0448
Epoch 7, Validation Loss: 1.9664
Epoch 8, Validation Loss: 1.8777
Epoch 9, Validation Loss: 1.7794
Epoch 10, Validation Loss: 1.6748
Validation Accuracy: 0.6650


# Task ID45: 32

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=32)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.0543
Epoch 2, Validation Loss: 1.5984
Epoch 3, Validation Loss: 1.0985
Epoch 4, Validation Loss: 0.7730
Epoch 5, Validation Loss: 0.5953
Epoch 6, Validation Loss: 0.4847
Epoch 7, Validation Loss: 0.4060
Epoch 8, Validation Loss: 0.3460
Epoch 9, Validation Loss: 0.2996
Epoch 10, Validation Loss: 0.2613
Validation Accuracy: 0.9395


# Task ID46: 3549

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3549)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.3434
Epoch 2, Validation Loss: 1.3177
Epoch 3, Validation Loss: 1.2914
Epoch 4, Validation Loss: 1.2630
Epoch 5, Validation Loss: 1.2317
Epoch 6, Validation Loss: 1.1964
Epoch 7, Validation Loss: 1.1564
Epoch 8, Validation Loss: 1.1114
Epoch 9, Validation Loss: 1.0612
Epoch 10, Validation Loss: 1.0068
Validation Accuracy: 0.8698


# Task ID47: 12

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=12)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2062
Epoch 2, Validation Loss: 2.0821
Epoch 3, Validation Loss: 1.9274
Epoch 4, Validation Loss: 1.7305
Epoch 5, Validation Loss: 1.5005
Epoch 6, Validation Loss: 1.2659
Epoch 7, Validation Loss: 1.0485
Epoch 8, Validation Loss: 0.8616
Epoch 9, Validation Loss: 0.7098
Epoch 10, Validation Loss: 0.5956
Validation Accuracy: 0.9050


# Task ID48: 9981

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9981)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.1943
Epoch 2, Validation Loss: 2.1784
Epoch 3, Validation Loss: 2.1623
Epoch 4, Validation Loss: 2.1441
Epoch 5, Validation Loss: 2.1234
Epoch 6, Validation Loss: 2.0991
Epoch 7, Validation Loss: 2.0694
Epoch 8, Validation Loss: 2.0333
Epoch 9, Validation Loss: 1.9903
Epoch 10, Validation Loss: 1.9388
Validation Accuracy: 0.6343


# Task ID49: 18

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=18)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2746
Epoch 2, Validation Loss: 2.2305
Epoch 3, Validation Loss: 2.1823
Epoch 4, Validation Loss: 2.1290
Epoch 5, Validation Loss: 2.0712
Epoch 6, Validation Loss: 2.0048
Epoch 7, Validation Loss: 1.9350
Epoch 8, Validation Loss: 1.8620
Epoch 9, Validation Loss: 1.7887
Epoch 10, Validation Loss: 1.7161
Validation Accuracy: 0.4850


# Task ID50: 28

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=28)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.1857
Epoch 2, Validation Loss: 2.0054
Epoch 3, Validation Loss: 1.7261
Epoch 4, Validation Loss: 1.3772
Epoch 5, Validation Loss: 1.0364
Epoch 6, Validation Loss: 0.7710
Epoch 7, Validation Loss: 0.5834
Epoch 8, Validation Loss: 0.4555
Epoch 9, Validation Loss: 0.3731
Epoch 10, Validation Loss: 0.3176
Validation Accuracy: 0.9262


# Task ID51: 2074

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=2074)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.4955
Epoch 2, Validation Loss: 1.1203
Epoch 3, Validation Loss: 0.8331
Epoch 4, Validation Loss: 0.6529
Epoch 5, Validation Loss: 0.5382
Epoch 6, Validation Loss: 0.4693
Epoch 7, Validation Loss: 0.4304
Epoch 8, Validation Loss: 0.4069
Epoch 9, Validation Loss: 0.3908
Epoch 10, Validation Loss: 0.3777
Validation Accuracy: 0.8429


# Task ID52: 29

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=29)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.7111
Epoch 2, Validation Loss: 0.7075
Epoch 3, Validation Loss: 0.7037
Epoch 4, Validation Loss: 0.7000
Epoch 5, Validation Loss: 0.6961
Epoch 6, Validation Loss: 0.6926
Epoch 7, Validation Loss: 0.6879
Epoch 8, Validation Loss: 0.6830
Epoch 9, Validation Loss: 0.6778
Epoch 10, Validation Loss: 0.6720
Validation Accuracy: 0.5290


# Task ID53: 45

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=45)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.0605
Epoch 2, Validation Loss: 1.0213
Epoch 3, Validation Loss: 0.9760
Epoch 4, Validation Loss: 0.9206
Epoch 5, Validation Loss: 0.8491
Epoch 6, Validation Loss: 0.7591
Epoch 7, Validation Loss: 0.6581
Epoch 8, Validation Loss: 0.5572
Epoch 9, Validation Loss: 0.4657
Epoch 10, Validation Loss: 0.3918
Validation Accuracy: 0.8997


# Task ID54: 125922

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=125922)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2950
Epoch 2, Validation Loss: 2.1239
Epoch 3, Validation Loss: 1.8690
Epoch 4, Validation Loss: 1.6383
Epoch 5, Validation Loss: 1.4472
Epoch 6, Validation Loss: 1.2707
Epoch 7, Validation Loss: 1.1165
Epoch 8, Validation Loss: 0.9866
Epoch 9, Validation Loss: 0.8817
Epoch 10, Validation Loss: 0.7872
Validation Accuracy: 0.7736


# Task ID55: 9960

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9960)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.3750
Epoch 2, Validation Loss: 1.2335
Epoch 3, Validation Loss: 1.1062
Epoch 4, Validation Loss: 1.0164
Epoch 5, Validation Loss: 0.9501
Epoch 6, Validation Loss: 0.8935
Epoch 7, Validation Loss: 0.8453
Epoch 8, Validation Loss: 0.8035
Epoch 9, Validation Loss: 0.7678
Epoch 10, Validation Loss: 0.7366
Validation Accuracy: 0.7033


# Task ID56: 9964

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9964)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2544
Epoch 2, Validation Loss: 2.2103
Epoch 3, Validation Loss: 2.1569
Epoch 4, Validation Loss: 2.0902
Epoch 5, Validation Loss: 2.0091
Epoch 6, Validation Loss: 1.9161
Epoch 7, Validation Loss: 1.8119
Epoch 8, Validation Loss: 1.6974
Epoch 9, Validation Loss: 1.5791
Epoch 10, Validation Loss: 1.4525
Validation Accuracy: 0.6991


# Task ID57: 22

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=22)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.2549
Epoch 2, Validation Loss: 2.2111
Epoch 3, Validation Loss: 2.1641
Epoch 4, Validation Loss: 2.1103
Epoch 5, Validation Loss: 2.0456
Epoch 6, Validation Loss: 1.9719
Epoch 7, Validation Loss: 1.8883
Epoch 8, Validation Loss: 1.7946
Epoch 9, Validation Loss: 1.6943
Epoch 10, Validation Loss: 1.5930
Validation Accuracy: 0.6400


# Task ID58: 2079

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=2079)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.6230
Epoch 2, Validation Loss: 1.6110
Epoch 3, Validation Loss: 1.6001
Epoch 4, Validation Loss: 1.5892
Epoch 5, Validation Loss: 1.5793
Epoch 6, Validation Loss: 1.5685
Epoch 7, Validation Loss: 1.5575
Epoch 8, Validation Loss: 1.5465
Epoch 9, Validation Loss: 1.5355
Epoch 10, Validation Loss: 1.5239
Validation Accuracy: 0.4189


# Task ID59: 14969

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=14969)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.4949
Epoch 2, Validation Loss: 1.4222
Epoch 3, Validation Loss: 1.3568
Epoch 4, Validation Loss: 1.3166
Epoch 5, Validation Loss: 1.2917
Epoch 6, Validation Loss: 1.2732
Epoch 7, Validation Loss: 1.2587
Epoch 8, Validation Loss: 1.2465
Epoch 9, Validation Loss: 1.2376
Epoch 10, Validation Loss: 1.2295
Validation Accuracy: 0.4942


# Task ID60: 3560

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3560)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.7911
Epoch 2, Validation Loss: 1.7899
Epoch 3, Validation Loss: 1.7889
Epoch 4, Validation Loss: 1.7883
Epoch 5, Validation Loss: 1.7875
Epoch 6, Validation Loss: 1.7870
Epoch 7, Validation Loss: 1.7861
Epoch 8, Validation Loss: 1.7857
Epoch 9, Validation Loss: 1.7852
Epoch 10, Validation Loss: 1.7847
Validation Accuracy: 0.2062


# Task ID61: 14952

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=14952)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5276
Epoch 2, Validation Loss: 0.2956
Epoch 3, Validation Loss: 0.2029
Epoch 4, Validation Loss: 0.1713
Epoch 5, Validation Loss: 0.1561
Epoch 6, Validation Loss: 0.1467
Epoch 7, Validation Loss: 0.1411
Epoch 8, Validation Loss: 0.1361
Epoch 9, Validation Loss: 0.1325
Epoch 10, Validation Loss: 0.1294
Validation Accuracy: 0.9466


# Task ID62: 125920

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=125920)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6992
Epoch 2, Validation Loss: 0.6978
Epoch 3, Validation Loss: 0.6960
Epoch 4, Validation Loss: 0.6944
Epoch 5, Validation Loss: 0.6931
Epoch 6, Validation Loss: 0.6918
Epoch 7, Validation Loss: 0.6906
Epoch 8, Validation Loss: 0.6894
Epoch 9, Validation Loss: 0.6884
Epoch 10, Validation Loss: 0.6875
Validation Accuracy: 0.5500


# Task ID63: 23

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=23)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.0792
Epoch 2, Validation Loss: 1.0700
Epoch 3, Validation Loss: 1.0620
Epoch 4, Validation Loss: 1.0542
Epoch 5, Validation Loss: 1.0474
Epoch 6, Validation Loss: 1.0411
Epoch 7, Validation Loss: 1.0348
Epoch 8, Validation Loss: 1.0299
Epoch 9, Validation Loss: 1.0247
Epoch 10, Validation Loss: 1.0197
Validation Accuracy: 0.4441


# Task ID64: 3904

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3904)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5274
Epoch 2, Validation Loss: 0.4472
Epoch 3, Validation Loss: 0.4368
Epoch 4, Validation Loss: 0.4322
Epoch 5, Validation Loss: 0.4301
Epoch 6, Validation Loss: 0.4286
Epoch 7, Validation Loss: 0.4277
Epoch 8, Validation Loss: 0.4278
Epoch 9, Validation Loss: 0.4269
Epoch 10, Validation Loss: 0.4269
Validation Accuracy: 0.8103


# Task ID65: 3022

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3022)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.4103
Epoch 2, Validation Loss: 2.4044
Epoch 3, Validation Loss: 2.3989
Epoch 4, Validation Loss: 2.3929
Epoch 5, Validation Loss: 2.3870
Epoch 6, Validation Loss: 2.3805
Epoch 7, Validation Loss: 2.3737
Epoch 8, Validation Loss: 2.3668
Epoch 9, Validation Loss: 2.3592
Epoch 10, Validation Loss: 2.3508
Validation Accuracy: 0.2020


# Task ID66: 9985

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9985)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.7212
Epoch 2, Validation Loss: 1.6468
Epoch 3, Validation Loss: 1.5865
Epoch 4, Validation Loss: 1.5565
Epoch 5, Validation Loss: 1.5380
Epoch 6, Validation Loss: 1.5222
Epoch 7, Validation Loss: 1.5085
Epoch 8, Validation Loss: 1.4947
Epoch 9, Validation Loss: 1.4838
Epoch 10, Validation Loss: 1.4735
Validation Accuracy: 0.4379


# Task ID67: 9910

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9910)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6170
Epoch 2, Validation Loss: 0.5535
Epoch 3, Validation Loss: 0.5076
Epoch 4, Validation Loss: 0.4806
Epoch 5, Validation Loss: 0.4689
Epoch 6, Validation Loss: 0.4688
Epoch 7, Validation Loss: 0.4845
Epoch 8, Validation Loss: 0.4881
Epoch 9, Validation Loss: 0.5072
Epoch 10, Validation Loss: 0.5125
Validation Accuracy: 0.7803


# Task ID68: 14970

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=14970)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.7674
Epoch 2, Validation Loss: 0.4253
Epoch 3, Validation Loss: 0.2916
Epoch 4, Validation Loss: 0.2276
Epoch 5, Validation Loss: 0.1757
Epoch 6, Validation Loss: 0.1487
Epoch 7, Validation Loss: 0.1308
Epoch 8, Validation Loss: 0.1237
Epoch 9, Validation Loss: 0.1068
Epoch 10, Validation Loss: 0.0977
Validation Accuracy: 0.9665


# Task ID69: 3021

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3021)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5455
Epoch 2, Validation Loss: 0.4186
Epoch 3, Validation Loss: 0.3130
Epoch 4, Validation Loss: 0.2420
Epoch 5, Validation Loss: 0.2021
Epoch 6, Validation Loss: 0.1798
Epoch 7, Validation Loss: 0.1655
Epoch 8, Validation Loss: 0.1548
Epoch 9, Validation Loss: 0.1467
Epoch 10, Validation Loss: 0.1397
Validation Accuracy: 0.9497


# Task ID70: 3481

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=3481)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.6960
Epoch 2, Validation Loss: 1.9094
Epoch 3, Validation Loss: 1.2953
Epoch 4, Validation Loss: 0.8847
Epoch 5, Validation Loss: 0.6628
Epoch 6, Validation Loss: 0.5382
Epoch 7, Validation Loss: 0.4591
Epoch 8, Validation Loss: 0.4028
Epoch 9, Validation Loss: 0.3583
Epoch 10, Validation Loss: 0.3219
Validation Accuracy: 0.9135


# Task ID71: 3573

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=9946)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


# Task ID72: 146824

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146824)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.1643
Epoch 2, Validation Loss: 2.0326
Epoch 3, Validation Loss: 1.8724
Epoch 4, Validation Loss: 1.6769
Epoch 5, Validation Loss: 1.4650
Epoch 6, Validation Loss: 1.2563
Epoch 7, Validation Loss: 1.0627
Epoch 8, Validation Loss: 0.8854
Epoch 9, Validation Loss: 0.7300
Epoch 10, Validation Loss: 0.6046
Validation Accuracy: 0.9025


# Task ID73: 146820

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146820)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.4838
Epoch 2, Validation Loss: 0.3455
Epoch 3, Validation Loss: 0.2481
Epoch 4, Validation Loss: 0.2040
Epoch 5, Validation Loss: 0.1864
Epoch 6, Validation Loss: 0.1785
Epoch 7, Validation Loss: 0.1737
Epoch 8, Validation Loss: 0.1702
Epoch 9, Validation Loss: 0.1673
Epoch 10, Validation Loss: 0.1648
Validation Accuracy: 0.9473


# Task ID74: 146822

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146822)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.9233
Epoch 2, Validation Loss: 1.8733
Epoch 3, Validation Loss: 1.8190
Epoch 4, Validation Loss: 1.7579
Epoch 5, Validation Loss: 1.6870
Epoch 6, Validation Loss: 1.6068
Epoch 7, Validation Loss: 1.5206
Epoch 8, Validation Loss: 1.4356
Epoch 9, Validation Loss: 1.3555
Epoch 10, Validation Loss: 1.2816
Validation Accuracy: 0.6429


# Task ID75: 146195

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146195)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6205
Epoch 2, Validation Loss: 0.5788
Epoch 3, Validation Loss: 0.5567
Epoch 4, Validation Loss: 0.5365
Epoch 5, Validation Loss: 0.5192
Epoch 6, Validation Loss: 0.5063
Epoch 7, Validation Loss: 0.4956
Epoch 8, Validation Loss: 0.4878
Epoch 9, Validation Loss: 0.4815
Epoch 10, Validation Loss: 0.4753
Validation Accuracy: 0.8122


# Task ID76: 146800

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146800)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 2.0621
Epoch 2, Validation Loss: 2.0422
Epoch 3, Validation Loss: 2.0219
Epoch 4, Validation Loss: 2.0011
Epoch 5, Validation Loss: 1.9783
Epoch 6, Validation Loss: 1.9540
Epoch 7, Validation Loss: 1.9259
Epoch 8, Validation Loss: 1.8951
Epoch 9, Validation Loss: 1.8606
Epoch 10, Validation Loss: 1.8221
Validation Accuracy: 0.5787


# Task ID77: 146817

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146817)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.8724
Epoch 2, Validation Loss: 1.8041
Epoch 3, Validation Loss: 1.7286
Epoch 4, Validation Loss: 1.6425
Epoch 5, Validation Loss: 1.5520
Epoch 6, Validation Loss: 1.4630
Epoch 7, Validation Loss: 1.3808
Epoch 8, Validation Loss: 1.3095
Epoch 9, Validation Loss: 1.2487
Epoch 10, Validation Loss: 1.1969
Validation Accuracy: 0.5424


# Task ID78: 146819

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146819)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6470
Epoch 2, Validation Loss: 0.6295
Epoch 3, Validation Loss: 0.6127
Epoch 4, Validation Loss: 0.5961
Epoch 5, Validation Loss: 0.5802
Epoch 6, Validation Loss: 0.5644
Epoch 7, Validation Loss: 0.5490
Epoch 8, Validation Loss: 0.5338
Epoch 9, Validation Loss: 0.5189
Epoch 10, Validation Loss: 0.5042
Validation Accuracy: 0.9074


# Task ID79: 146821

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=146821)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.3637
Epoch 2, Validation Loss: 1.3112
Epoch 3, Validation Loss: 1.2607
Epoch 4, Validation Loss: 1.2085
Epoch 5, Validation Loss: 1.1542
Epoch 6, Validation Loss: 1.0967
Epoch 7, Validation Loss: 1.0378
Epoch 8, Validation Loss: 0.9819
Epoch 9, Validation Loss: 0.9317
Epoch 10, Validation Loss: 0.8885
Validation Accuracy: 0.6792


# Task ID80: 14954

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=14954)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.6938
Epoch 2, Validation Loss: 0.6906
Epoch 3, Validation Loss: 0.6875
Epoch 4, Validation Loss: 0.6842
Epoch 5, Validation Loss: 0.6810
Epoch 6, Validation Loss: 0.6775
Epoch 7, Validation Loss: 0.6740
Epoch 8, Validation Loss: 0.6703
Epoch 9, Validation Loss: 0.6663
Epoch 10, Validation Loss: 0.6620
Validation Accuracy: 0.6204


# Task ID81: 167141

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=167141)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5772
Epoch 2, Validation Loss: 0.4845
Epoch 3, Validation Loss: 0.4260
Epoch 4, Validation Loss: 0.3952
Epoch 5, Validation Loss: 0.3730
Epoch 6, Validation Loss: 0.3538
Epoch 7, Validation Loss: 0.3358
Epoch 8, Validation Loss: 0.3198
Epoch 9, Validation Loss: 0.3072
Epoch 10, Validation Loss: 0.2973
Validation Accuracy: 0.8610


# Task ID82: 167140

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=167140)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 1.0362
Epoch 2, Validation Loss: 0.9816
Epoch 3, Validation Loss: 0.9197
Epoch 4, Validation Loss: 0.8465
Epoch 5, Validation Loss: 0.7579
Epoch 6, Validation Loss: 0.6573
Epoch 7, Validation Loss: 0.5527
Epoch 8, Validation Loss: 0.4574
Epoch 9, Validation Loss: 0.3820
Epoch 10, Validation Loss: 0.3258
Validation Accuracy: 0.9060


# Task ID83: 167125

In [None]:
# Load and preprocess data
X, y = load_openml_data(task_id=167125)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Update args based on data
args["num_features"] = X_train.shape[1]
args["num_classes"] = len(np.unique(y))

# Initialize and train the model
mlp_model = initialize_mlp(params["n_layers"], args["num_features"], params["hidden_dim"], args["num_classes"], args["objective"])
trained_model = train_mlp(mlp_model, X_train, y_train, X_val, y_val, args, params)

# Make predictions and calculate accuracy
predictions = predict_mlp(trained_model, X_val, args)
accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy:.4f}")


Epoch 1, Validation Loss: 0.5308
Epoch 2, Validation Loss: 0.3286
Epoch 3, Validation Loss: 0.1991
Epoch 4, Validation Loss: 0.1481
Epoch 5, Validation Loss: 0.1243
Epoch 6, Validation Loss: 0.1097
Epoch 7, Validation Loss: 0.0990
Epoch 8, Validation Loss: 0.0946
Epoch 9, Validation Loss: 0.0911
Epoch 10, Validation Loss: 0.0889
Validation Accuracy: 0.9710


# Task ID84: 167124

# Task ID85: 167121

# ResNet





In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Load tabular data from OpenML with preprocessing
def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = openml.datasets.get_dataset(task.dataset_id)
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Handle missing values
    imputer = SimpleImputer(strategy="most_frequent")
    X = imputer.fit_transform(X)

    # Convert categorical columns in X to numerical codes
    for col in range(X.shape[1]):
        if isinstance(X[0, col], str):
            X[:, col] = pd.factorize(X[:, col])[0]

    # Convert target y to numeric if needed and ensure it's a numpy array
    if isinstance(y[0], str):
        y = pd.factorize(y)[0]
    y = np.array(y)  # Ensure y is a numpy array

    return X.astype(np.float32), y.astype(np.int64)

# Define a custom dataset for tabular data
class TabularDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Define a simple ResNet-like model for tabular data
class ResNetTabular(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ResNetTabular, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.fc3(out)
        return out

# Get data loaders
def get_data_loaders(X, y, batch_size):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    train_dataset = TabularDataset(X_train, np.array(y_train))
    val_dataset = TabularDataset(X_val, np.array(y_val))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

# Task ID1: 14965

In [None]:
# Parameters
task_id = 14965  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.4260
Epoch [2/10], Loss: 0.5946
Epoch [3/10], Loss: 0.1920
Epoch [4/10], Loss: 0.2464
Epoch [5/10], Loss: 0.0870
Epoch [6/10], Loss: 0.1267
Epoch [7/10], Loss: 0.0284
Epoch [8/10], Loss: 0.2418
Epoch [9/10], Loss: 0.1787
Epoch [10/10], Loss: 0.5847
Accuracy: 0.8986


# Task ID2: 9977

In [None]:
# Parameters
task_id = 9977  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.0974
Epoch [2/10], Loss: 0.2079
Epoch [3/10], Loss: 0.2211
Epoch [4/10], Loss: 0.1062
Epoch [5/10], Loss: 0.2668
Epoch [6/10], Loss: 0.1894
Epoch [7/10], Loss: 0.0312
Epoch [8/10], Loss: 0.0508
Epoch [9/10], Loss: 0.2419
Epoch [10/10], Loss: 0.0775
Accuracy: 0.9569


# Task ID3: 34539

In [None]:
# Parameters
task_id = 34539  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.0910
Epoch [2/10], Loss: 0.4356
Epoch [3/10], Loss: 0.0497
Epoch [4/10], Loss: 0.4178
Epoch [5/10], Loss: 0.0615
Epoch [6/10], Loss: 0.0547
Epoch [7/10], Loss: 0.4389
Epoch [8/10], Loss: 0.0719
Epoch [9/10], Loss: 0.0612
Epoch [10/10], Loss: 0.0526
Accuracy: 0.9437


# Task ID4: 146606

In [None]:
# Parameters
task_id = 146606  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.7213
Epoch [2/10], Loss: 0.4284
Epoch [3/10], Loss: 0.6582
Epoch [4/10], Loss: 0.5026
Epoch [5/10], Loss: 0.6339
Epoch [6/10], Loss: 0.4914
Epoch [7/10], Loss: 0.3908
Epoch [8/10], Loss: 0.5078
Epoch [9/10], Loss: 0.4592
Epoch [10/10], Loss: 0.5521
Accuracy: 0.7110


# Task ID5: 7592



In [None]:
# Parameters
task_id = 7592  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.0014
Epoch [2/10], Loss: 0.6058
Epoch [3/10], Loss: 0.8110
Epoch [4/10], Loss: 0.0000
Epoch [5/10], Loss: 0.0696
Epoch [6/10], Loss: 0.3649
Epoch [7/10], Loss: 0.7185
Epoch [8/10], Loss: 0.0277
Epoch [9/10], Loss: 0.0139
Epoch [10/10], Loss: 0.0697
Accuracy: 0.8547


# Task ID6: 146195



In [None]:
# Parameters
task_id = 146195  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.8686
Epoch [2/10], Loss: 0.9432
Epoch [3/10], Loss: 0.6589
Epoch [4/10], Loss: 0.7128
Epoch [5/10], Loss: 0.8151
Epoch [6/10], Loss: 0.9076
Epoch [7/10], Loss: 0.5395
Epoch [8/10], Loss: 0.7531
Epoch [9/10], Loss: 0.4656
Epoch [10/10], Loss: 0.6060
Accuracy: 0.7419


# Task ID7: 167119



In [None]:
# Parameters
task_id = 167119  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.7850
Epoch [2/10], Loss: 0.5707
Epoch [3/10], Loss: 0.5016
Epoch [4/10], Loss: 0.4219
Epoch [5/10], Loss: 0.5030
Epoch [6/10], Loss: 0.2640
Epoch [7/10], Loss: 0.3358
Epoch [8/10], Loss: 0.5580
Epoch [9/10], Loss: 0.3370
Epoch [10/10], Loss: 0.6294
Accuracy: 0.8168


# Task ID8: 167120



In [None]:
# Parameters
task_id = 167120  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6991
Epoch [2/10], Loss: 0.6941
Epoch [3/10], Loss: 0.6848
Epoch [4/10], Loss: 0.6814
Epoch [5/10], Loss: 0.6823
Epoch [6/10], Loss: 0.7074
Epoch [7/10], Loss: 0.6843
Epoch [8/10], Loss: 0.6936
Epoch [9/10], Loss: 0.6967
Epoch [10/10], Loss: 0.6904
Accuracy: 0.5147


# Task ID11: 	168331



In [None]:
# Parameters
task_id = 168331 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.2453
Epoch [2/10], Loss: 1.2675
Epoch [3/10], Loss: 1.3478
Epoch [4/10], Loss: 1.6738
Epoch [5/10], Loss: 1.0183
Epoch [6/10], Loss: 0.6520
Epoch [7/10], Loss: 1.0676
Epoch [8/10], Loss: 1.2553
Epoch [9/10], Loss: 1.3280
Epoch [10/10], Loss: 1.3362
Accuracy: 0.6153


# Task ID12: 	168330



In [None]:
# Parameters
task_id = 168330  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.9018
Epoch [2/10], Loss: 0.9688
Epoch [3/10], Loss: 1.2281
Epoch [4/10], Loss: 0.5237
Epoch [5/10], Loss: 0.6596
Epoch [6/10], Loss: 0.8224
Epoch [7/10], Loss: 0.5093
Epoch [8/10], Loss: 0.7616
Epoch [9/10], Loss: 0.5017
Epoch [10/10], Loss: 0.7088
Accuracy: 0.6976


# Task ID13: 	168335



In [None]:
# Parameters
task_id = 168335  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.2640
Epoch [2/10], Loss: 0.3778
Epoch [3/10], Loss: 0.2037
Epoch [4/10], Loss: 0.4725
Epoch [5/10], Loss: 0.1526
Epoch [6/10], Loss: 0.2035
Epoch [7/10], Loss: 0.2507
Epoch [8/10], Loss: 0.0468
Epoch [9/10], Loss: 0.0393
Epoch [10/10], Loss: 0.0797
Accuracy: 0.9237



# Task ID16: 	146212



In [None]:
# Parameters
task_id = 146212  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.0848
Epoch [2/10], Loss: 0.0027
Epoch [3/10], Loss: 0.0044
Epoch [4/10], Loss: 0.0607
Epoch [5/10], Loss: 0.0098
Epoch [6/10], Loss: 0.0027
Epoch [7/10], Loss: 0.0029
Epoch [8/10], Loss: 0.0078
Epoch [9/10], Loss: 0.0013
Epoch [10/10], Loss: 0.0029
Accuracy: 0.9967


# Task ID19: 	168868



In [None]:
# Parameters
task_id = 168868  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.0231
Epoch [2/10], Loss: 0.0178
Epoch [3/10], Loss: 0.0001
Epoch [4/10], Loss: 0.0014
Epoch [5/10], Loss: 0.0202
Epoch [6/10], Loss: 0.0688
Epoch [7/10], Loss: 0.0147
Epoch [8/10], Loss: 0.0085
Epoch [9/10], Loss: 0.0034
Epoch [10/10], Loss: 0.0060
Accuracy: 0.9904


# Task ID20: 31

In [None]:
# Parameters
task_id = 31  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6869
Epoch [2/10], Loss: 0.5056
Epoch [3/10], Loss: 0.5218
Epoch [4/10], Loss: 0.4387
Epoch [5/10], Loss: 0.5354
Epoch [6/10], Loss: 0.5089
Epoch [7/10], Loss: 0.6623
Epoch [8/10], Loss: 0.5546
Epoch [9/10], Loss: 0.5116
Epoch [10/10], Loss: 0.5411
Accuracy: 0.7700


# Task ID21: 10101

In [None]:
# Parameters
task_id = 10101  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5918
Epoch [2/10], Loss: 0.4164
Epoch [3/10], Loss: 0.4128
Epoch [4/10], Loss: 0.5102
Epoch [5/10], Loss: 0.5324
Epoch [6/10], Loss: 0.5481
Epoch [7/10], Loss: 0.4402
Epoch [8/10], Loss: 0.5139
Epoch [9/10], Loss: 0.4102
Epoch [10/10], Loss: 0.6876
Accuracy: 0.7667


# Task ID22: 	3913

In [None]:
# Parameters
task_id = 3913  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5112
Epoch [2/10], Loss: 0.2776
Epoch [3/10], Loss: 1.0583
Epoch [4/10], Loss: 0.2997
Epoch [5/10], Loss: 0.0267
Epoch [6/10], Loss: 0.0063
Epoch [7/10], Loss: 0.0171
Epoch [8/10], Loss: 0.0147
Epoch [9/10], Loss: 0.0334
Epoch [10/10], Loss: 0.0413
Accuracy: 0.8667


# Task ID23: 3

In [None]:
# Parameters
task_id = 3 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.3384
Epoch [2/10], Loss: 0.1961
Epoch [3/10], Loss: 0.1481
Epoch [4/10], Loss: 0.1710
Epoch [5/10], Loss: 0.1064
Epoch [6/10], Loss: 0.1497
Epoch [7/10], Loss: 0.0458
Epoch [8/10], Loss: 0.1258
Epoch [9/10], Loss: 0.0724
Epoch [10/10], Loss: 0.0192
Accuracy: 0.9672


# Task ID24: 3917

In [None]:
# Parameters
task_id = 3917  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.4596
Epoch [2/10], Loss: 0.5719
Epoch [3/10], Loss: 0.4327
Epoch [4/10], Loss: 0.3523
Epoch [5/10], Loss: 0.2566
Epoch [6/10], Loss: 0.4604
Epoch [7/10], Loss: 0.4556
Epoch [8/10], Loss: 0.1894
Epoch [9/10], Loss: 0.3987
Epoch [10/10], Loss: 0.3939
Accuracy: 0.8555


# Task ID25: 9957

In [None]:
# Parameters
task_id = 9957 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6627
Epoch [2/10], Loss: 0.8670
Epoch [3/10], Loss: 0.4477
Epoch [4/10], Loss: 0.7525
Epoch [5/10], Loss: 0.3378
Epoch [6/10], Loss: 0.2250
Epoch [7/10], Loss: 0.1704
Epoch [8/10], Loss: 0.3046
Epoch [9/10], Loss: 0.1955
Epoch [10/10], Loss: 0.1029
Accuracy: 0.8768


# Task ID26: 9946

In [None]:
# Parameters
task_id = 9946  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5340
Epoch [2/10], Loss: 0.2510
Epoch [3/10], Loss: 0.1707
Epoch [4/10], Loss: 0.0688
Epoch [5/10], Loss: 0.0360
Epoch [6/10], Loss: 0.6044
Epoch [7/10], Loss: 0.1031
Epoch [8/10], Loss: 0.1066
Epoch [9/10], Loss: 0.0260
Epoch [10/10], Loss: 0.0106
Accuracy: 0.9737


# Task ID27: 3918

In [None]:
# Parameters
task_id = 3918  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.3709
Epoch [2/10], Loss: 0.1950
Epoch [3/10], Loss: 0.2589
Epoch [4/10], Loss: 0.1441
Epoch [5/10], Loss: 0.1903
Epoch [6/10], Loss: 0.2888
Epoch [7/10], Loss: 0.2672
Epoch [8/10], Loss: 0.1748
Epoch [9/10], Loss: 0.1271
Epoch [10/10], Loss: 0.0570
Accuracy: 0.9234


# Task ID28: 3903

In [None]:
# Parameters
task_id = 3903  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.1874
Epoch [2/10], Loss: 0.0021
Epoch [3/10], Loss: 0.0902
Epoch [4/10], Loss: 0.0500
Epoch [5/10], Loss: 0.2451
Epoch [6/10], Loss: 0.0912
Epoch [7/10], Loss: 0.2821
Epoch [8/10], Loss: 0.0349
Epoch [9/10], Loss: 0.0121
Epoch [10/10], Loss: 0.2675
Accuracy: 0.8946


# Task ID29: 37

In [None]:
# Parameters
task_id = 37  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.7980
Epoch [2/10], Loss: 0.5303
Epoch [3/10], Loss: 0.4557
Epoch [4/10], Loss: 0.3613
Epoch [5/10], Loss: 0.2631
Epoch [6/10], Loss: 0.2889
Epoch [7/10], Loss: 0.7615
Epoch [8/10], Loss: 0.6689
Epoch [9/10], Loss: 0.3163
Epoch [10/10], Loss: 1.0312
Accuracy: 0.7727


# Task ID30: 9971

In [None]:
# Parameters
task_id = 9971  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.7102
Epoch [2/10], Loss: 0.6663
Epoch [3/10], Loss: 0.6042
Epoch [4/10], Loss: 0.5484
Epoch [5/10], Loss: 0.5739
Epoch [6/10], Loss: 0.5683
Epoch [7/10], Loss: 0.4374
Epoch [8/10], Loss: 0.5640
Epoch [9/10], Loss: 0.4436
Epoch [10/10], Loss: 0.5529
Accuracy: 0.7521


# Task ID31: 9952

In [None]:
# Parameters
task_id = 9952  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.4420
Epoch [2/10], Loss: 0.2358
Epoch [3/10], Loss: 0.6512
Epoch [4/10], Loss: 0.2106
Epoch [5/10], Loss: 0.0253
Epoch [6/10], Loss: 0.0595
Epoch [7/10], Loss: 0.2756
Epoch [8/10], Loss: 0.7425
Epoch [9/10], Loss: 0.1605
Epoch [10/10], Loss: 0.1029
Accuracy: 0.8409


# Task ID32: 3902

In [None]:
# Parameters
task_id = 3902 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5660
Epoch [2/10], Loss: 0.2736
Epoch [3/10], Loss: 0.5322
Epoch [4/10], Loss: 0.1324
Epoch [5/10], Loss: 0.2318
Epoch [6/10], Loss: 0.0899
Epoch [7/10], Loss: 0.0418
Epoch [8/10], Loss: 0.1281
Epoch [9/10], Loss: 0.2423
Epoch [10/10], Loss: 0.1533
Accuracy: 0.9007


# Task ID33: 49

In [None]:
# Parameters
task_id = 49  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6397
Epoch [2/10], Loss: 0.6337
Epoch [3/10], Loss: 0.6754
Epoch [4/10], Loss: 0.5163
Epoch [5/10], Loss: 0.5286
Epoch [6/10], Loss: 0.5523
Epoch [7/10], Loss: 0.5079
Epoch [8/10], Loss: 0.6817
Epoch [9/10], Loss: 0.4796
Epoch [10/10], Loss: 0.5917
Accuracy: 0.7448


# Task ID34: 43

In [None]:
# Parameters
task_id = 43  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.3681
Epoch [2/10], Loss: 0.4118
Epoch [3/10], Loss: 0.1910
Epoch [4/10], Loss: 0.1333
Epoch [5/10], Loss: 0.3396
Epoch [6/10], Loss: 0.3177
Epoch [7/10], Loss: 0.2570
Epoch [8/10], Loss: 0.0715
Epoch [9/10], Loss: 0.1488
Epoch [10/10], Loss: 0.2670
Accuracy: 0.9414


# Task ID35: 9978

In [None]:
# Parameters
task_id = 9978  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.2508
Epoch [2/10], Loss: 0.0895
Epoch [3/10], Loss: 0.2939
Epoch [4/10], Loss: 0.2479
Epoch [5/10], Loss: 0.1636
Epoch [6/10], Loss: 0.0165
Epoch [7/10], Loss: 0.0959
Epoch [8/10], Loss: 0.0246
Epoch [9/10], Loss: 0.0195
Epoch [10/10], Loss: 0.0221
Accuracy: 0.9428


# Task ID36: 10093

In [None]:
# Parameters
task_id = 10093 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.3740
Epoch [2/10], Loss: 0.1824
Epoch [3/10], Loss: 0.0304
Epoch [4/10], Loss: 0.0635
Epoch [5/10], Loss: 0.0438
Epoch [6/10], Loss: 0.3713
Epoch [7/10], Loss: 0.0670
Epoch [8/10], Loss: 0.0077
Epoch [9/10], Loss: 0.0743
Epoch [10/10], Loss: 0.0029
Accuracy: 0.9927


# Task ID37: 219

In [None]:
# Parameters
task_id = 219  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.4165
Epoch [2/10], Loss: 0.5305
Epoch [3/10], Loss: 0.4529
Epoch [4/10], Loss: 0.4580
Epoch [5/10], Loss: 0.6772
Epoch [6/10], Loss: 0.5069
Epoch [7/10], Loss: 0.5180
Epoch [8/10], Loss: 0.2981
Epoch [9/10], Loss: 0.5443
Epoch [10/10], Loss: 0.4480
Accuracy: 0.7951


# Task ID38: 9976

In [None]:
# Parameters
task_id = 9976  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6845
Epoch [2/10], Loss: 0.5947
Epoch [3/10], Loss: 0.4864
Epoch [4/10], Loss: 0.6458
Epoch [5/10], Loss: 0.4855
Epoch [6/10], Loss: 0.2363
Epoch [7/10], Loss: 0.2353
Epoch [8/10], Loss: 0.0765
Epoch [9/10], Loss: 0.3338
Epoch [10/10], Loss: 0.2892
Accuracy: 0.5731


# Task ID39: 6

In [None]:
# Parameters
task_id = 6  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.9848
Epoch [2/10], Loss: 1.1941
Epoch [3/10], Loss: 1.0551
Epoch [4/10], Loss: 1.0708
Epoch [5/10], Loss: 1.2120
Epoch [6/10], Loss: 0.8768
Epoch [7/10], Loss: 1.1595
Epoch [8/10], Loss: 0.5226
Epoch [9/10], Loss: 1.2247
Epoch [10/10], Loss: 0.6530
Accuracy: 0.8377


# Task ID40: 53

In [None]:
# Parameters
task_id = 53  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.3027
Epoch [2/10], Loss: 1.2649
Epoch [3/10], Loss: 0.9712
Epoch [4/10], Loss: 0.6766
Epoch [5/10], Loss: 0.8990
Epoch [6/10], Loss: 0.5625
Epoch [7/10], Loss: 0.7454
Epoch [8/10], Loss: 0.9923
Epoch [9/10], Loss: 0.5062
Epoch [10/10], Loss: 0.5359
Accuracy: 0.7824


# Task ID41: 11

In [None]:
# Parameters
task_id = 11  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.9080
Epoch [2/10], Loss: 0.8733
Epoch [3/10], Loss: 0.8591
Epoch [4/10], Loss: 0.4565
Epoch [5/10], Loss: 0.5395
Epoch [6/10], Loss: 0.4304
Epoch [7/10], Loss: 0.2466
Epoch [8/10], Loss: 0.2302
Epoch [9/10], Loss: 0.3437
Epoch [10/10], Loss: 0.4615
Accuracy: 0.9040


# Task ID42: 15

In [None]:
# Parameters
task_id = 15  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.4783
Epoch [2/10], Loss: 0.1655
Epoch [3/10], Loss: 0.3796
Epoch [4/10], Loss: 0.0724
Epoch [5/10], Loss: 0.0301
Epoch [6/10], Loss: 0.2333
Epoch [7/10], Loss: 0.6262
Epoch [8/10], Loss: 0.0076
Epoch [9/10], Loss: 0.0314
Epoch [10/10], Loss: 0.0277
Accuracy: 0.9714


# Task ID43: 16

In [None]:
# Parameters
task_id = 16  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.9781
Epoch [2/10], Loss: 0.8679
Epoch [3/10], Loss: 0.6595
Epoch [4/10], Loss: 0.3748
Epoch [5/10], Loss: 0.2482
Epoch [6/10], Loss: 0.5316
Epoch [7/10], Loss: 0.1599
Epoch [8/10], Loss: 0.2725
Epoch [9/10], Loss: 0.1761
Epoch [10/10], Loss: 0.1800
Accuracy: 0.9575


# Task ID44: 14

In [None]:
# Parameters
task_id = 14  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.9336
Epoch [2/10], Loss: 1.2328
Epoch [3/10], Loss: 0.7231
Epoch [4/10], Loss: 0.7118
Epoch [5/10], Loss: 0.5210
Epoch [6/10], Loss: 0.5873
Epoch [7/10], Loss: 0.6012
Epoch [8/10], Loss: 0.6615
Epoch [9/10], Loss: 0.3988
Epoch [10/10], Loss: 0.2812
Accuracy: 0.8275


# Task ID45: 32

In [None]:
# Parameters
task_id = 32  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6213
Epoch [2/10], Loss: 0.2162
Epoch [3/10], Loss: 0.5217
Epoch [4/10], Loss: 0.2088
Epoch [5/10], Loss: 0.1713
Epoch [6/10], Loss: 0.0627
Epoch [7/10], Loss: 0.1875
Epoch [8/10], Loss: 0.2259
Epoch [9/10], Loss: 0.1253
Epoch [10/10], Loss: 0.1890
Accuracy: 0.9809


# Task ID46: 3549

In [None]:
# Parameters
task_id = 3549  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.0427
Epoch [2/10], Loss: 0.6480
Epoch [3/10], Loss: 0.2173
Epoch [4/10], Loss: 0.3367
Epoch [5/10], Loss: 0.1164
Epoch [6/10], Loss: 0.0216
Epoch [7/10], Loss: 0.0136
Epoch [8/10], Loss: 0.0206
Epoch [9/10], Loss: 0.0112
Epoch [10/10], Loss: 0.0094
Accuracy: 1.0000


# Task ID47: 12

In [None]:
# Parameters
task_id = 12 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.0848
Epoch [2/10], Loss: 0.4446
Epoch [3/10], Loss: 0.3421
Epoch [4/10], Loss: 0.1097
Epoch [5/10], Loss: 0.2730
Epoch [6/10], Loss: 0.1099
Epoch [7/10], Loss: 0.1292
Epoch [8/10], Loss: 0.0717
Epoch [9/10], Loss: 0.1066
Epoch [10/10], Loss: 0.0456
Accuracy: 0.9750


# Task ID48: 9981

In [None]:
# Parameters
task_id = 9981  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 2.0943
Epoch [2/10], Loss: 1.6230
Epoch [3/10], Loss: 0.9115
Epoch [4/10], Loss: 0.4620
Epoch [5/10], Loss: 0.3348
Epoch [6/10], Loss: 0.3186
Epoch [7/10], Loss: 0.0979
Epoch [8/10], Loss: 0.1229
Epoch [9/10], Loss: 0.1042
Epoch [10/10], Loss: 0.1168
Accuracy: 0.9491


# Task ID49: 18

In [None]:
# Parameters
task_id = 18  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.7950
Epoch [2/10], Loss: 1.5099
Epoch [3/10], Loss: 1.0113
Epoch [4/10], Loss: 0.8657
Epoch [5/10], Loss: 0.7223
Epoch [6/10], Loss: 0.6450
Epoch [7/10], Loss: 0.7077
Epoch [8/10], Loss: 0.9311
Epoch [9/10], Loss: 0.9843
Epoch [10/10], Loss: 0.7082
Accuracy: 0.7300


# Task ID50: 28

In [None]:
# Parameters
task_id = 28  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.2769
Epoch [2/10], Loss: 0.3782
Epoch [3/10], Loss: 0.4242
Epoch [4/10], Loss: 0.0945
Epoch [5/10], Loss: 0.2345
Epoch [6/10], Loss: 0.0353
Epoch [7/10], Loss: 0.0238
Epoch [8/10], Loss: 0.1468
Epoch [9/10], Loss: 0.0908
Epoch [10/10], Loss: 0.0219
Accuracy: 0.9751


# Task ID51: 2074

In [None]:
# Parameters
task_id = 2074  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.2922
Epoch [2/10], Loss: 0.3533
Epoch [3/10], Loss: 0.4110
Epoch [4/10], Loss: 0.3735
Epoch [5/10], Loss: 0.4006
Epoch [6/10], Loss: 0.3744
Epoch [7/10], Loss: 0.1820
Epoch [8/10], Loss: 0.2762
Epoch [9/10], Loss: 0.2456
Epoch [10/10], Loss: 0.1159
Accuracy: 0.8919


# Task ID52: 29

In [None]:
# Parameters
task_id = 29  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5683
Epoch [2/10], Loss: 0.6107
Epoch [3/10], Loss: 0.4976
Epoch [4/10], Loss: 0.3759
Epoch [5/10], Loss: 0.2898
Epoch [6/10], Loss: 0.6791
Epoch [7/10], Loss: 0.4550
Epoch [8/10], Loss: 0.2221
Epoch [9/10], Loss: 0.1403
Epoch [10/10], Loss: 0.2874
Accuracy: 0.8333


# Task ID53: 45

In [None]:
# Parameters
task_id = 45  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.8273
Epoch [2/10], Loss: 0.3736
Epoch [3/10], Loss: 0.4439
Epoch [4/10], Loss: 0.5267
Epoch [5/10], Loss: 0.2413
Epoch [6/10], Loss: 0.1165
Epoch [7/10], Loss: 0.2268
Epoch [8/10], Loss: 0.1592
Epoch [9/10], Loss: 0.2348
Epoch [10/10], Loss: 0.0930
Accuracy: 0.9028


# Task ID54: 125922

In [None]:
# Parameters
task_id = 125922  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.7484
Epoch [2/10], Loss: 1.1360
Epoch [3/10], Loss: 0.4406
Epoch [4/10], Loss: 0.2962
Epoch [5/10], Loss: 0.2426
Epoch [6/10], Loss: 0.0788
Epoch [7/10], Loss: 0.0731
Epoch [8/10], Loss: 0.2455
Epoch [9/10], Loss: 0.0644
Epoch [10/10], Loss: 0.2277
Accuracy: 0.9809


# Task ID55: 9960

In [None]:
# Parameters
task_id = 9960 # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.1461
Epoch [2/10], Loss: 0.6053
Epoch [3/10], Loss: 1.0406
Epoch [4/10], Loss: 0.6747
Epoch [5/10], Loss: 0.5201
Epoch [6/10], Loss: 0.9662
Epoch [7/10], Loss: 0.6516
Epoch [8/10], Loss: 0.2981
Epoch [9/10], Loss: 0.7435
Epoch [10/10], Loss: 0.2616
Accuracy: 0.8233


# Task ID56: 9964

In [None]:
# Parameters
task_id = 9964  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.7351
Epoch [2/10], Loss: 0.8990
Epoch [3/10], Loss: 0.5939
Epoch [4/10], Loss: 0.3084
Epoch [5/10], Loss: 0.3122
Epoch [6/10], Loss: 0.3787
Epoch [7/10], Loss: 0.1843
Epoch [8/10], Loss: 0.3635
Epoch [9/10], Loss: 0.2732
Epoch [10/10], Loss: 0.1935
Accuracy: 0.9122


# Task ID57: 22

In [None]:
# Parameters
task_id = 22  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.9153
Epoch [2/10], Loss: 1.1205
Epoch [3/10], Loss: 0.8081
Epoch [4/10], Loss: 0.8976
Epoch [5/10], Loss: 0.7231
Epoch [6/10], Loss: 0.9096
Epoch [7/10], Loss: 0.4405
Epoch [8/10], Loss: 0.6210
Epoch [9/10], Loss: 0.4733
Epoch [10/10], Loss: 0.6931
Accuracy: 0.8100


# Task ID58: 2079

In [None]:
# Parameters
task_id = 2079  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.5049
Epoch [2/10], Loss: 1.3901
Epoch [3/10], Loss: 1.4932
Epoch [4/10], Loss: 1.3889
Epoch [5/10], Loss: 1.1976
Epoch [6/10], Loss: 0.9845
Epoch [7/10], Loss: 1.0315
Epoch [8/10], Loss: 0.9093
Epoch [9/10], Loss: 1.1634
Epoch [10/10], Loss: 1.2318
Accuracy: 0.5743


# Task ID59: 14969

In [None]:
# Parameters
task_id = 14969  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.2482
Epoch [2/10], Loss: 1.2744
Epoch [3/10], Loss: 1.2302
Epoch [4/10], Loss: 1.1223
Epoch [5/10], Loss: 1.0441
Epoch [6/10], Loss: 1.0920
Epoch [7/10], Loss: 1.2609
Epoch [8/10], Loss: 1.2281
Epoch [9/10], Loss: 1.5800
Epoch [10/10], Loss: 1.1081
Accuracy: 0.5337


# Task ID60: 3560

In [None]:
# Parameters
task_id = 3560  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.7975
Epoch [2/10], Loss: 1.7885
Epoch [3/10], Loss: 1.7737
Epoch [4/10], Loss: 1.7932
Epoch [5/10], Loss: 1.7920
Epoch [6/10], Loss: 1.7832
Epoch [7/10], Loss: 1.7472
Epoch [8/10], Loss: 1.7870
Epoch [9/10], Loss: 1.7094
Epoch [10/10], Loss: 1.7713
Accuracy: 0.2625


# Task ID61: 14952

In [None]:
# Parameters
task_id = 14952  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.3069
Epoch [2/10], Loss: 0.1478
Epoch [3/10], Loss: 0.2428
Epoch [4/10], Loss: 0.1103
Epoch [5/10], Loss: 0.0717
Epoch [6/10], Loss: 0.1554
Epoch [7/10], Loss: 0.3517
Epoch [8/10], Loss: 0.2146
Epoch [9/10], Loss: 0.0141
Epoch [10/10], Loss: 0.0763
Accuracy: 0.9457


# Task ID62: 125920

In [None]:
# Parameters
task_id = 125920  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6561
Epoch [2/10], Loss: 0.6528
Epoch [3/10], Loss: 0.6298
Epoch [4/10], Loss: 0.6924
Epoch [5/10], Loss: 0.6469
Epoch [6/10], Loss: 0.6853
Epoch [7/10], Loss: 0.7803
Epoch [8/10], Loss: 0.7304
Epoch [9/10], Loss: 0.7056
Epoch [10/10], Loss: 0.6113
Accuracy: 0.5800


# Task ID63: 23

In [None]:
# Parameters
task_id = 23  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.9581
Epoch [2/10], Loss: 0.9892
Epoch [3/10], Loss: 0.9708
Epoch [4/10], Loss: 0.9396
Epoch [5/10], Loss: 1.0600
Epoch [6/10], Loss: 1.0222
Epoch [7/10], Loss: 0.8828
Epoch [8/10], Loss: 1.0116
Epoch [9/10], Loss: 0.8553
Epoch [10/10], Loss: 0.8614
Accuracy: 0.5661


# Task ID64: 3904

In [None]:
# Parameters
task_id = 3904  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.2033
Epoch [2/10], Loss: 0.2305
Epoch [3/10], Loss: 0.1396
Epoch [4/10], Loss: 0.4330
Epoch [5/10], Loss: 0.5676
Epoch [6/10], Loss: 0.1745
Epoch [7/10], Loss: 0.2576
Epoch [8/10], Loss: 0.3292
Epoch [9/10], Loss: 0.9026
Epoch [10/10], Loss: 0.1090
Accuracy: 0.8112


# Task ID65: 3022

In [None]:
# Parameters
task_id = 3022  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 2.3624
Epoch [2/10], Loss: 2.3273
Epoch [3/10], Loss: 2.0606
Epoch [4/10], Loss: 1.8122
Epoch [5/10], Loss: 1.6415
Epoch [6/10], Loss: 1.5427
Epoch [7/10], Loss: 1.5094
Epoch [8/10], Loss: 1.1993
Epoch [9/10], Loss: 1.2095
Epoch [10/10], Loss: 1.0983
Accuracy: 0.6263


# Task ID66: 9985

In [None]:
# Parameters
task_id = 9985  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.3718
Epoch [2/10], Loss: 1.3923
Epoch [3/10], Loss: 1.4622
Epoch [4/10], Loss: 1.2558
Epoch [5/10], Loss: 1.3364
Epoch [6/10], Loss: 1.1151
Epoch [7/10], Loss: 1.4405
Epoch [8/10], Loss: 1.4199
Epoch [9/10], Loss: 1.1781
Epoch [10/10], Loss: 1.5860
Accuracy: 0.4943


# Task ID67: 9910

In [None]:
# Parameters
task_id = 9910  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.4531
Epoch [2/10], Loss: 0.3834
Epoch [3/10], Loss: 0.5013
Epoch [4/10], Loss: 0.3747
Epoch [5/10], Loss: 0.5048
Epoch [6/10], Loss: 0.4651
Epoch [7/10], Loss: 0.6111
Epoch [8/10], Loss: 0.3295
Epoch [9/10], Loss: 0.2774
Epoch [10/10], Loss: 0.2270
Accuracy: 0.7843


# Task ID68: 14970

In [None]:
# Parameters
task_id = 14970  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.1258
Epoch [2/10], Loss: 0.0953
Epoch [3/10], Loss: 0.1113
Epoch [4/10], Loss: 0.0469
Epoch [5/10], Loss: 0.0619
Epoch [6/10], Loss: 0.0536
Epoch [7/10], Loss: 0.0102
Epoch [8/10], Loss: 0.0102
Epoch [9/10], Loss: 0.0825
Epoch [10/10], Loss: 0.2505
Accuracy: 0.9699


# Task ID69: 3021

In [None]:
# Parameters
task_id = 3021  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


Epoch [1/10], Loss: nan
Epoch [2/10], Loss: nan
Epoch [3/10], Loss: nan
Epoch [4/10], Loss: nan
Epoch [5/10], Loss: nan
Epoch [6/10], Loss: nan
Epoch [7/10], Loss: nan
Epoch [8/10], Loss: nan
Epoch [9/10], Loss: nan
Epoch [10/10], Loss: nan
Accuracy: 0.9497


# Task ID70: 3481

In [None]:
# Parameters
task_id = 3481  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6701
Epoch [2/10], Loss: 0.5188
Epoch [3/10], Loss: 0.4814
Epoch [4/10], Loss: 0.3009
Epoch [5/10], Loss: 0.4821
Epoch [6/10], Loss: 0.1372
Epoch [7/10], Loss: 0.1265
Epoch [8/10], Loss: 0.1033
Epoch [9/10], Loss: 0.1955
Epoch [10/10], Loss: 0.4077
Accuracy: 0.9519


# Task ID71: 3573

In [None]:
# Parameters
task_id = 9946  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


# Task ID72: 146824

In [None]:
# Parameters
task_id = 146824  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.0078
Epoch [2/10], Loss: 0.2178
Epoch [3/10], Loss: 0.2980
Epoch [4/10], Loss: 0.1871
Epoch [5/10], Loss: 0.1209
Epoch [6/10], Loss: 0.2171
Epoch [7/10], Loss: 0.1167
Epoch [8/10], Loss: 0.1056
Epoch [9/10], Loss: 0.0560
Epoch [10/10], Loss: 0.1168
Accuracy: 0.9625


# Task ID73: 146820

In [None]:
# Parameters
task_id = 146820  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.1358
Epoch [2/10], Loss: 0.1010
Epoch [3/10], Loss: 0.0912
Epoch [4/10], Loss: 0.3462
Epoch [5/10], Loss: 0.1265
Epoch [6/10], Loss: 0.3241
Epoch [7/10], Loss: 0.0289
Epoch [8/10], Loss: 0.0818
Epoch [9/10], Loss: 0.0911
Epoch [10/10], Loss: 0.0320
Accuracy: 0.9783


# Task ID74: 146822

In [None]:
# Parameters
task_id = 146822  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.3144
Epoch [2/10], Loss: 0.6705
Epoch [3/10], Loss: 0.4327
Epoch [4/10], Loss: 0.6459
Epoch [5/10], Loss: 0.4002
Epoch [6/10], Loss: 0.4064
Epoch [7/10], Loss: 0.5871
Epoch [8/10], Loss: 0.5033
Epoch [9/10], Loss: 0.2537
Epoch [10/10], Loss: 0.4168
Accuracy: 0.8636


# Task ID75: 146195

In [None]:
# Parameters
task_id = 146195  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.7639
Epoch [2/10], Loss: 0.7727
Epoch [3/10], Loss: 0.8116
Epoch [4/10], Loss: 1.0752
Epoch [5/10], Loss: 0.9051
Epoch [6/10], Loss: 0.5998
Epoch [7/10], Loss: 0.7801
Epoch [8/10], Loss: 0.7414
Epoch [9/10], Loss: 0.8102
Epoch [10/10], Loss: 0.8910
Accuracy: 0.7438


# Task ID76: 146800

In [None]:
# Parameters
task_id = 146800  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.8622
Epoch [2/10], Loss: 1.4900
Epoch [3/10], Loss: 1.1354
Epoch [4/10], Loss: 0.9604
Epoch [5/10], Loss: 0.6071
Epoch [6/10], Loss: 0.6917
Epoch [7/10], Loss: 0.4655
Epoch [8/10], Loss: 0.5691
Epoch [9/10], Loss: 0.3564
Epoch [10/10], Loss: 0.3355
Accuracy: 0.9537


# Task ID77: 146817

In [None]:
# Parameters
task_id = 146817  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 1.4363
Epoch [2/10], Loss: 1.5182
Epoch [3/10], Loss: 0.9396
Epoch [4/10], Loss: 0.9890
Epoch [5/10], Loss: 1.1000
Epoch [6/10], Loss: 0.7605
Epoch [7/10], Loss: 0.4802
Epoch [8/10], Loss: 0.8595
Epoch [9/10], Loss: 1.2034
Epoch [10/10], Loss: 0.4989
Accuracy: 0.7224


# Task ID78: 146819

In [None]:
# Parameters
task_id = 146819  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5807
Epoch [2/10], Loss: 0.1377
Epoch [3/10], Loss: 0.5074
Epoch [4/10], Loss: 0.2223
Epoch [5/10], Loss: 0.0579
Epoch [6/10], Loss: 0.0759
Epoch [7/10], Loss: 0.1981
Epoch [8/10], Loss: 0.0467
Epoch [9/10], Loss: 0.1606
Epoch [10/10], Loss: 0.0927
Accuracy: 0.9074


# Task ID79: 146821

In [None]:
# Parameters
task_id = 146821  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.9359
Epoch [2/10], Loss: 0.5941
Epoch [3/10], Loss: 0.6852
Epoch [4/10], Loss: 0.3264
Epoch [5/10], Loss: 0.1279
Epoch [6/10], Loss: 0.3459
Epoch [7/10], Loss: 0.5113
Epoch [8/10], Loss: 0.6479
Epoch [9/10], Loss: 0.0682
Epoch [10/10], Loss: 0.4343
Accuracy: 0.8931


# Task ID80: 14954

In [None]:
# Parameters
task_id = 14954  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.6958
Epoch [2/10], Loss: 0.6255
Epoch [3/10], Loss: 0.6655
Epoch [4/10], Loss: 0.5182
Epoch [5/10], Loss: 0.4262
Epoch [6/10], Loss: 0.4989
Epoch [7/10], Loss: 0.5278
Epoch [8/10], Loss: 0.5725
Epoch [9/10], Loss: 0.3996
Epoch [10/10], Loss: 0.5399
Accuracy: 0.7315


# Task ID81: 167141

In [None]:
# Parameters
task_id = 167141  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.3436
Epoch [2/10], Loss: 0.3594
Epoch [3/10], Loss: 0.1749
Epoch [4/10], Loss: 0.4268
Epoch [5/10], Loss: 0.5808
Epoch [6/10], Loss: 0.3436
Epoch [7/10], Loss: 0.3714
Epoch [8/10], Loss: 0.1146
Epoch [9/10], Loss: 0.2928
Epoch [10/10], Loss: 0.2401
Accuracy: 0.9260


# Task ID82: 167140

In [None]:
# Parameters
task_id = 167140  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.5412
Epoch [2/10], Loss: 0.1624
Epoch [3/10], Loss: 0.1953
Epoch [4/10], Loss: 0.1324
Epoch [5/10], Loss: 0.0358
Epoch [6/10], Loss: 0.2913
Epoch [7/10], Loss: 0.2629
Epoch [8/10], Loss: 0.0061
Epoch [9/10], Loss: 0.0200
Epoch [10/10], Loss: 0.0457
Accuracy: 0.9498


# Task ID83: 167125

In [None]:
# Parameters
task_id = 167125  # Example task ID from OpenML
batch_size = 32
epochs = 10
learning_rate = 0.001

# Load data
X, y = load_openml_data(task_id)

# Initialize data loaders
train_loader, val_loader = get_data_loaders(X, y, batch_size=batch_size)

# Initialize model, loss, and optimizer
num_classes = len(np.unique(y))
input_size = X.shape[1]  # Number of features
model = ResNetTabular(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/10], Loss: 0.2874
Epoch [2/10], Loss: 0.0253
Epoch [3/10], Loss: 0.0332
Epoch [4/10], Loss: 0.0362
Epoch [5/10], Loss: 0.0667
Epoch [6/10], Loss: 0.0069
Epoch [7/10], Loss: 0.0061
Epoch [8/10], Loss: 0.0323
Epoch [9/10], Loss: 0.0654
Epoch [10/10], Loss: 0.0032
Accuracy: 0.9680


# Task ID84: 167124

# Task ID85: 167121

# NODE

In [None]:
import time
import shutil
import numpy as np
import openml
import pandas as pd  # Make sure to import pandas
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim  # Importing standard PyTorch optimizers
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import os

# Set a custom OpenML cache directory
openml.config.cache_directory = os.path.expanduser("~/.openml_cache")

class NODE(nn.Module):
    def __init__(self, params, num_features, num_classes):
        super(NODE, self).__init__()

        layer_dim = int(params["total_tree_count"] / params["num_layers"])

        self.model = nn.Sequential(
            nn.Linear(num_features, layer_dim),  # Example layer, modify as needed
            nn.ReLU(),
            nn.Linear(layer_dim, num_classes)
        )

    def forward(self, x):
        return self.model(x)

    def predict(self, X):
        self.eval()
        with torch.no_grad():
            return self.forward(X)

def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

def get_random_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "num_layers": rs.choice([2, 4, 8]),
        "total_tree_count": rs.choice([1024, 2048]),
        "tree_depth": rs.choice([6, 8]),
        "tree_output_dim": rs.choice([2, 3]),
        "learning_rate": 1e-4,  # Smaller learning rate
        "epochs": 100,  # Example value, set as needed
        "objective": 'classification',  # Assuming classification
    }
    return params

def load_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = openml.datasets.get_dataset(task.dataset_id)

    # Retrieve the target feature directly from the task
    target_feature = task.target_name  # Changed from task.get_target() to task.target_name

    # Retrieve the dataset's data
    X, y, _, _ = dataset.get_data(target=target_feature)
    return X, y

def preprocess_data(X):
    # Convert categorical columns to numeric using one-hot encoding
    if isinstance(X, pd.DataFrame):
        X = pd.get_dummies(X, drop_first=True)  # One-hot encode categorical variables

    # Handle NaN values by filling them with the column mean or dropping rows
    X = X.fillna(X.mean())  # Fill NaNs with column mean for numeric stability
    return X

def encode_target(y):
    # Use LabelEncoder to convert categorical target variable to numeric
    le = LabelEncoder()
    return le.fit_transform(y)

def train_model(X_train, y_train, X_val, y_val, params):
    num_features = X_train.shape[1]
    num_classes = len(np.unique(y_train))

    model = NODE(params, num_features, num_classes).to(device)

    if params['objective'] == 'classification':
        loss_function = nn.CrossEntropyLoss()
    elif params['objective'] == 'regression':
        loss_function = nn.MSELoss()

    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])  # Use standard PyTorch Adam

    # Check for NaN values in input data using Pandas method
    if X_train.isna().any().any() or X_val.isna().any().any():
        raise ValueError("Input data contains NaN values.")

    # Normalize the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Initialize model weights
    model.apply(initialize_weights)

    # Training loop
    for epoch in range(params['epochs']):
        model.train()
        optimizer.zero_grad()

        # Convert DataFrame to FloatTensor
        outputs = model(torch.FloatTensor(X_train).to(device))  # Convert to NumPy array
        loss = loss_function(outputs, torch.LongTensor(y_train).to(device))  # Ensure y_train is LongTensor for classification
        loss.backward()
        optimizer.step()

        if np.isnan(loss.item()):  # Check for NaN loss
            print("NaN loss encountered, stopping training.")
            break

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(torch.FloatTensor(X_val).to(device))  # Convert to NumPy array
        val_predictions = torch.argmax(val_outputs, dim=1)
        accuracy = accuracy_score(y_val, val_predictions.cpu())

    return model, accuracy

# Define your parameters
params = get_random_parameters(seed=42)



# Task ID1: 14965

In [None]:
# Load the data from OpenML
task_id = 14965  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8795


# Task ID2: 9977

In [None]:
# Load the data from OpenML
task_id = 9977  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9179


# Task ID3: 34539

In [None]:
# Load the data from OpenML
task_id = 34539  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9179


# Task ID4: 146606

In [None]:
# Load the data from OpenML
task_id = 146606  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5961


# Task ID5: 7592



In [None]:
# Load the data from OpenML
task_id = 7592  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8271


# Task ID6: 146195



In [None]:
# Load the data from OpenML
task_id = 146195  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7077


# Task ID7: 167119



In [None]:
# Load the data from OpenML
task_id = 167119  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6431


# Task ID8: 167120



In [None]:
# Load the data from OpenML
task_id = 167120  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5103


# Task ID11: 	168331



In [None]:
# Load the data from OpenML
task_id = 168331  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5065


# Task ID12: 	168330



In [None]:
# Load the data from OpenML
task_id = 168330  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5555


# Task ID13: 	168335



In [None]:
# Load the data from OpenML
task_id = 168335  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8076



# Task ID16: 	146212



In [None]:
# Load the data from OpenML
task_id = 146212  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8828


# Task ID19: 	168868



In [None]:
# Load the data from OpenML
task_id = 168868  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9825


# Task ID20: 31

In [None]:
# Load the data from OpenML
task_id = 31  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7300


# Task ID21: 10101

In [None]:
# Load the data from OpenML
task_id = 10101  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7533


# Task ID22: 	3913

In [None]:
# Load the data from OpenML
task_id = 3913  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8667


# Task ID23: 3

In [None]:
# Load the data from OpenML
task_id = 3  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7391


# Task ID24: 3917

In [None]:
# Load the data from OpenML
task_id = 3917  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8507


# Task ID25: 9957

In [None]:
# Load the data from OpenML
task_id = 9957 # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8578


# Task ID26: 9946

In [None]:
# Load the data from OpenML
task_id = 9946  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9561


# Task ID27: 3918

In [None]:
# Load the data from OpenML
task_id = 3918  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9144


# Task ID28: 3903

In [None]:
# Load the data from OpenML
task_id = 3903  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8946


# Task ID29: 37

In [None]:
# Load the data from OpenML
task_id = 37  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7532


# Task ID30: 9971

In [None]:
# Load the data from OpenML
task_id = 9971  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7521


# Task ID31: 9952

In [None]:
# Load the data from OpenML
task_id = 9952  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7771


# Task ID32: 3902

In [None]:
# Load the data from OpenML
task_id = 3902  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8493


# Task ID33: 49

In [None]:
# Load the data from OpenML
task_id = 49  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7083


# Task ID34: 43

In [None]:
# Load the data from OpenML
task_id = 43  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8697


# Task ID35: 9978

In [None]:
# Load the data from OpenML
task_id = 9978  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9389


# Task ID36: 10093

In [None]:
# Load the data from OpenML
task_id = 10093  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6509


# Task ID37: 219

In [None]:
# Load the data from OpenML
task_id = 219  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6895


# Task ID38: 9976

In [None]:
# Load the data from OpenML
task_id = 9976  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5404


# Task ID39: 6

In [None]:
# Load the data from OpenML
task_id = 6  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.2350


# Task ID40: 53

In [None]:
# Load the data from OpenML
task_id = 53  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5353


# Task ID41: 11

In [None]:
# Load the data from OpenML
task_id = 11  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8080


# Task ID42: 15

In [None]:
# Load the data from OpenML
task_id = 15  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9714


# Task ID43: 16

In [None]:
# Load the data from OpenML
task_id = 16  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6950


# Task ID44: 14

In [None]:
# Load the data from OpenML
task_id = 14  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5350


# Task ID45: 32

In [None]:
# Load the data from OpenML
task_id = 32  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6221


# Task ID46: 3549

In [None]:
# Load the data from OpenML
task_id = 3549  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9527


# Task ID47: 12

In [None]:
# Load the data from OpenML
task_id = 12  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9175


# Task ID48: 9981

In [None]:
# Load the data from OpenML
task_id = 9981  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8102


# Task ID49: 18

In [None]:
# Load the data from OpenML
task_id = 18  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.3325


# Task ID50: 28

In [None]:
# Load the data from OpenML
task_id = 28  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7402


# Task ID51: 2074

In [None]:
# Load the data from OpenML
task_id = 2074  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7729


# Task ID52: 29

In [None]:
# Load the data from OpenML
task_id = 29  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7971


# Task ID53: 45

In [None]:
# Load the data from OpenML
task_id = 45  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7461


# Task ID54: 125922

In [None]:
# Load the data from OpenML
task_id = 125922  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6091


# Task ID55: 9960

In [None]:
# Load the data from OpenML
task_id = 9960  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5879


# Task ID56: 9964

In [None]:
# Load the data from OpenML
task_id = 9964  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7962


# Task ID57: 22

In [None]:
# Load the data from OpenML
task_id = 22  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6225


# Task ID58: 2079

In [None]:
# Load the data from OpenML
task_id = 2079  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5270


# Task ID59: 14969

In [None]:
# Load the data from OpenML
task_id = 14969  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4608


# Task ID60: 3560

In [None]:
# Load the data from OpenML
task_id = 3560  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.2125


# Task ID61: 14952

In [None]:
# Load the data from OpenML
task_id = 14952  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8123


# Task ID62: 125920

In [None]:
# Load the data from OpenML
task_id = 125920  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5400


# Task ID63: 23

In [None]:
# Load the data from OpenML
task_id = 23  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4576


# Task ID64: 3904

In [None]:
# Load the data from OpenML
task_id = 3904  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8130


# Task ID65: 3022

In [None]:
# Load the data from OpenML
task_id = 3022  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.2475


# Task ID66: 9985

In [None]:
# Load the data from OpenML
task_id = 9985  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4044


# Task ID67: 9910

In [None]:
# Load the data from OpenML
task_id = 9910  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7790


# Task ID68: 14970

In [None]:
# Load the data from OpenML
task_id = 14970  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9170


# Task ID69: 3021

In [None]:
# Load the data from OpenML
task_id = 3021  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9497


# Task ID70: 3481

In [None]:
# Load the data from OpenML
task_id = 3481  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8154


# Task ID71: 3573

In [None]:
# Load the data from OpenML
task_id = 3573  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


# Task ID72: 146824

In [None]:
# Load the data from OpenML
task_id = 146824  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9300


# Task ID73: 146820

In [None]:
# Load the data from OpenML
task_id = 146820 # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9473


# Task ID74: 146822

In [None]:
# Load the data from OpenML
task_id = 146822  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5455


# Task ID75: 146195

In [None]:
# Load the data from OpenML
task_id = 146195  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6934


# Task ID76: 146800

In [None]:
# Load the data from OpenML
task_id = 146800  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6481


# Task ID77: 146817

In [None]:
# Load the data from OpenML
task_id = 146817  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5784


# Task ID78: 146819

In [None]:
# Load the data from OpenML
task_id = 146819  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9074


# Task ID79: 146821

In [None]:
# Load the data from OpenML
task_id = 146821  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6821


# Task ID80: 14954

In [None]:
# Load the data from OpenML
task_id = 14954  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7963


# Task ID81: 167141

In [None]:
# Load the data from OpenML
task_id = 167141  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8510


# Task ID82: 167140

In [None]:
# Load the data from OpenML
task_id = 167140  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8009


# Task ID83: 167125

In [None]:
# Load the data from OpenML
task_id = 167125  # Replace with your OpenML task ID
X, y = load_data(task_id)

# Preprocess the data
X = preprocess_data(X)

# Encode the target variable
y = encode_target(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9771


# Task ID84: 167124

# Task ID85: 167121

# TabNet




In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": trial.suggest_int("cat_emb_dim", 1, 3),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": trial.suggest_categorical("mask_type", ["sparsemax", "entmax"]),
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": rs.randint(1, 4),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": rs.choice(["sparsemax", "entmax"]),
    }
    return params

# Load data from OpenML, remove classes, and encode categorical features
def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Remove class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify categorical columns and encode them
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_dims = []  # Track max values to set embedding dimensions correctly

    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])  # Ensure all categorical columns are integer-encoded
        cat_dims.append(len(uniques))  # Track unique counts as embedding dimensions

    # Replace any remaining non-numeric entries with NaN and drop them or fill appropriately
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Or replace with another valid category ID if needed

    # Encode target variable
    y, _ = pd.factorize(y)  # Converts labels to numeric format

    return X, y, cat_dims

# Example configuration and arguments
class Args:
    objective = "classification"  # Or "regression" based on task
    batch_size = 32
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, X, cat_dims):
        # Get categorical column indices based on factorized object columns
        self.cat_idx = [X.columns.get_loc(col) for col in X.select_dtypes(include=['object']).columns]
        self.cat_dims = cat_dims  # Use dimensions from factorization for embedding sizes

# Task ID1: 14965

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14965)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.3465  | eval_logloss: 0.89848 |  0:01:38s
epoch 1  | loss: 0.28701 | eval_logloss: 0.42651 |  0:03:17s
epoch 2  | loss: 0.28254 | eval_logloss: 0.29386 |  0:04:55s
epoch 3  | loss: 0.28017 | eval_logloss: 0.30636 |  0:06:34s
epoch 4  | loss: 0.28278 | eval_logloss: 0.31493 |  0:08:13s
epoch 5  | loss: 0.28208 | eval_logloss: 0.39377 |  0:09:54s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.29386




Validation Accuracy: 0.8836


# Task ID2: 9977

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9977)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.3963  | eval_logloss: 1.45557 |  0:01:18s
epoch 1  | loss: 0.17881 | eval_logloss: 1.02047 |  0:02:49s
epoch 2  | loss: 0.1493  | eval_logloss: 1.04512 |  0:04:06s
epoch 3  | loss: 0.1381  | eval_logloss: 0.92759 |  0:05:23s
epoch 4  | loss: 0.12986 | eval_logloss: 0.53905 |  0:06:40s
epoch 5  | loss: 0.12656 | eval_logloss: 0.87485 |  0:07:58s
epoch 6  | loss: 0.12038 | eval_logloss: 0.46256 |  0:09:20s
epoch 7  | loss: 0.11966 | eval_logloss: 0.25007 |  0:10:38s
epoch 8  | loss: 0.11339 | eval_logloss: 1.29685 |  0:11:56s
epoch 9  | loss: 0.11283 | eval_logloss: 0.92155 |  0:13:13s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.25007




Validation Accuracy: 0.9367


# Task ID3: 34539

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=34539)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.26029 | eval_logloss: 0.68683 |  0:01:12s
epoch 1  | loss: 0.22643 | eval_logloss: 0.25801 |  0:02:26s
epoch 2  | loss: 0.22433 | eval_logloss: 0.24328 |  0:03:38s
epoch 3  | loss: 0.22227 | eval_logloss: 0.21793 |  0:04:51s
epoch 4  | loss: 0.22344 | eval_logloss: 0.21722 |  0:06:04s
epoch 5  | loss: 0.2225  | eval_logloss: 0.23177 |  0:07:18s
epoch 6  | loss: 0.2221  | eval_logloss: 0.2354  |  0:08:30s
epoch 7  | loss: 0.22352 | eval_logloss: 0.28985 |  0:09:42s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.21722




Validation Accuracy: 0.9435


# Task ID4: 146606

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146606)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.71186 | eval_logloss: 0.7747  |  0:03:32s
epoch 1  | loss: 0.67884 | eval_logloss: 0.708   |  0:07:51s
epoch 2  | loss: 0.67799 | eval_logloss: 0.69193 |  0:11:32s
epoch 3  | loss: 0.67771 | eval_logloss: 0.71725 |  0:15:06s
epoch 4  | loss: 0.67498 | eval_logloss: 0.67667 |  0:18:39s
epoch 5  | loss: 0.67353 | eval_logloss: 0.7062  |  0:22:16s
epoch 6  | loss: 0.66881 | eval_logloss: 0.72076 |  0:25:55s
epoch 7  | loss: 0.66763 | eval_logloss: 0.67615 |  0:29:32s
epoch 8  | loss: 0.66067 | eval_logloss: 0.65701 |  0:33:05s
epoch 9  | loss: 0.66065 | eval_logloss: 0.65639 |  0:36:40s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.65639




Validation Accuracy: 0.6150


# Task ID5: 7592



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=7592)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.51344 | eval_logloss: 0.8332  |  0:01:46s
epoch 1  | loss: 0.41641 | eval_logloss: 0.57792 |  0:03:35s
epoch 2  | loss: 0.40977 | eval_logloss: 0.51473 |  0:05:23s
epoch 3  | loss: 0.41555 | eval_logloss: 0.91587 |  0:07:14s
epoch 4  | loss: 0.41224 | eval_logloss: 1.37989 |  0:09:06s
epoch 5  | loss: 0.41125 | eval_logloss: 0.81665 |  0:10:55s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.51473




Validation Accuracy: 0.7848


# Task ID6: 146195



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146195)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.896   | eval_logloss: 2.01412 |  0:02:33s
epoch 1  | loss: 0.8415  | eval_logloss: 0.84725 |  0:05:04s
epoch 2  | loss: 0.83912 | eval_logloss: 6.05471 |  0:07:37s
epoch 3  | loss: 0.83894 | eval_logloss: 0.84646 |  0:10:08s
epoch 4  | loss: 0.83915 | eval_logloss: 1.60132 |  0:12:42s
epoch 5  | loss: 0.83728 | eval_logloss: 1.00331 |  0:15:13s
epoch 6  | loss: 0.83939 | eval_logloss: 0.88437 |  0:17:49s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 0.84646




Validation Accuracy: 0.6569


# Task ID7: 167119



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167119)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.77246 | eval_logloss: 1.57999 |  0:01:38s
epoch 1  | loss: 0.64025 | eval_logloss: 1.18152 |  0:03:27s
epoch 2  | loss: 0.60071 | eval_logloss: 0.81802 |  0:05:05s
epoch 3  | loss: 0.55812 | eval_logloss: 0.7299  |  0:06:43s
epoch 4  | loss: 0.54603 | eval_logloss: 1.11812 |  0:08:21s
epoch 5  | loss: 0.53139 | eval_logloss: 0.81699 |  0:09:59s
epoch 6  | loss: 0.52363 | eval_logloss: 0.4834  |  0:11:38s
epoch 7  | loss: 0.49867 | eval_logloss: 0.4671  |  0:13:18s
epoch 8  | loss: 0.48945 | eval_logloss: 0.47089 |  0:14:57s
epoch 9  | loss: 0.48366 | eval_logloss: 0.46573 |  0:16:37s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.46573




Validation Accuracy: 0.7952


# Task ID8: 167120



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167120)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.71209 | eval_logloss: 0.69413 |  0:03:37s
epoch 1  | loss: 0.69438 | eval_logloss: 0.69306 |  0:07:14s
epoch 2  | loss: 0.69331 | eval_logloss: 0.69404 |  0:10:53s
epoch 3  | loss: 0.69327 | eval_logloss: 0.69329 |  0:14:28s
epoch 4  | loss: 0.69302 | eval_logloss: 0.69295 |  0:18:02s
epoch 5  | loss: 0.69318 | eval_logloss: 0.71189 |  0:21:38s
epoch 6  | loss: 0.69329 | eval_logloss: 0.69334 |  0:25:12s
epoch 7  | loss: 0.69327 | eval_logloss: 0.70784 |  0:28:50s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.69295




Validation Accuracy: 0.5071


# Task ID11: 	168331



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168331)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.6942  | eval_logloss: 3.93489 |  0:02:32s
epoch 1  | loss: 1.38955 | eval_logloss: 2.20657 |  0:04:52s
epoch 2  | loss: 1.31262 | eval_logloss: 3.18054 |  0:07:12s
epoch 3  | loss: 1.2644  | eval_logloss: 1.50697 |  0:09:35s
epoch 4  | loss: 1.27018 | eval_logloss: 1.88719 |  0:11:54s
epoch 5  | loss: 1.21719 | eval_logloss: 1.46682 |  0:14:13s
epoch 6  | loss: 1.1868  | eval_logloss: 1.45254 |  0:16:30s
epoch 7  | loss: 1.17425 | eval_logloss: 1.47863 |  0:18:50s
epoch 8  | loss: 1.16154 | eval_logloss: 1.43581 |  0:21:39s
epoch 9  | loss: 1.14858 | eval_logloss: 1.45584 |  0:24:00s
Stop training because you reached max_epochs = 10 with best_epoch = 8 and best_eval_logloss = 1.43581




Validation Accuracy: 0.5475


# Task ID12: 	168330



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168330)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.99482 | eval_logloss: 1.24022 |  0:03:05s
epoch 1  | loss: 0.84874 | eval_logloss: 1.46023 |  0:06:13s
epoch 2  | loss: 0.80891 | eval_logloss: 0.87615 |  0:09:22s
epoch 3  | loss: 0.77896 | eval_logloss: 0.85331 |  0:12:31s
epoch 4  | loss: 0.76773 | eval_logloss: 1.10859 |  0:15:41s
epoch 5  | loss: 0.75482 | eval_logloss: 0.79825 |  0:18:58s
epoch 6  | loss: 0.75109 | eval_logloss: 0.96151 |  0:22:10s
epoch 7  | loss: 0.74612 | eval_logloss: 0.72795 |  0:25:19s
epoch 8  | loss: 0.73993 | eval_logloss: 0.77799 |  0:28:33s
epoch 9  | loss: 0.73496 | eval_logloss: 0.82595 |  0:31:47s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.72795




Validation Accuracy: 0.6983


# Task ID13: 	168335



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168335)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.31082 | eval_logloss: 0.3181  |  0:04:08s
epoch 1  | loss: 0.2099  | eval_logloss: 0.41314 |  0:08:21s
epoch 2  | loss: 0.19644 | eval_logloss: 1.19324 |  0:12:31s
epoch 3  | loss: 0.20571 | eval_logloss: 0.36119 |  0:16:54s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 0.3181




Validation Accuracy: 0.8663



# Task ID16: 	146212



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146212)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.16955 | eval_logloss: 2.33135 |  0:02:24s
epoch 1  | loss: 0.05515 | eval_logloss: 0.78685 |  0:04:48s
epoch 2  | loss: 0.0538  | eval_logloss: 0.26912 |  0:07:12s
epoch 3  | loss: 0.04524 | eval_logloss: 0.13735 |  0:09:38s
epoch 4  | loss: 0.02799 | eval_logloss: 0.10543 |  0:12:03s
epoch 5  | loss: 0.02399 | eval_logloss: 0.20159 |  0:14:27s
epoch 6  | loss: 0.03056 | eval_logloss: 0.25784 |  0:16:49s
epoch 7  | loss: 0.0208  | eval_logloss: 0.11338 |  0:19:08s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.10543




Validation Accuracy: 0.9764


# Task ID19: 	168868



In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=168868)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.10852 | eval_logloss: 0.09223 |  0:02:37s
epoch 1  | loss: 0.0514  | eval_logloss: 0.13615 |  0:05:18s
epoch 2  | loss: 0.04712 | eval_logloss: 0.06024 |  0:08:08s
epoch 3  | loss: 0.0495  | eval_logloss: 0.08095 |  0:11:00s
epoch 4  | loss: 0.05274 | eval_logloss: 0.05522 |  0:13:52s
epoch 5  | loss: 0.05017 | eval_logloss: 0.06555 |  0:16:45s
epoch 6  | loss: 0.04313 | eval_logloss: 0.06066 |  0:19:26s
epoch 7  | loss: 0.04361 | eval_logloss: 0.05076 |  0:22:05s
epoch 8  | loss: 0.04403 | eval_logloss: 0.59572 |  0:24:43s
epoch 9  | loss: 0.04406 | eval_logloss: 0.05768 |  0:27:22s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.05076




Validation Accuracy: 0.9809


# Task ID20: 31

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=31)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.34689 | eval_logloss: 9.24658 |  0:00:05s
epoch 1  | loss: 1.18631 | eval_logloss: 4.94214 |  0:00:08s
epoch 2  | loss: 0.83039 | eval_logloss: 6.37695 |  0:00:10s
epoch 3  | loss: 0.66874 | eval_logloss: 11.23938|  0:00:12s
epoch 4  | loss: 0.63382 | eval_logloss: 9.88428 |  0:00:14s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 4.94214




Validation Accuracy: 0.6900


# Task ID21: 10101

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=10101)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.3636  | eval_logloss: 4.46387 |  0:00:01s
epoch 1  | loss: 0.9386  | eval_logloss: 6.05811 |  0:00:03s
epoch 2  | loss: 0.60565 | eval_logloss: 4.67643 |  0:00:04s
epoch 3  | loss: 0.55462 | eval_logloss: 5.46162 |  0:00:06s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 4.46387




Validation Accuracy: 0.7200


# Task ID22: 	3913

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3913)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.92756 | eval_logloss: 8.50322 |  0:00:01s
epoch 1  | loss: 0.93712 | eval_logloss: 8.19894 |  0:00:02s
epoch 2  | loss: 1.14424 | eval_logloss: 10.02093|  0:00:03s
epoch 3  | loss: 0.73191 | eval_logloss: 11.84291|  0:00:04s
epoch 4  | loss: 1.05149 | eval_logloss: 6.07329 |  0:00:05s
epoch 5  | loss: 0.67271 | eval_logloss: 9.71726 |  0:00:06s
epoch 6  | loss: 0.6837  | eval_logloss: 5.92657 |  0:00:07s
epoch 7  | loss: 0.53478 | eval_logloss: 12.29841|  0:00:09s
epoch 8  | loss: 0.44473 | eval_logloss: 10.40043|  0:00:10s
epoch 9  | loss: 0.38226 | eval_logloss: 3.59684 |  0:00:11s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 3.59684




Validation Accuracy: 0.7714


# Task ID23: 3

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69673 | eval_logloss: 7.47299 |  0:00:28s
epoch 1  | loss: 0.69673 | eval_logloss: 7.47299 |  0:00:48s
epoch 2  | loss: 0.69673 | eval_logloss: 7.47299 |  0:01:05s
epoch 3  | loss: 0.69673 | eval_logloss: 7.47299 |  0:01:22s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 7.47299




Validation Accuracy: 0.5312


# Task ID24: 3917

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3917)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.07655 | eval_logloss: 9.02898 |  0:00:05s
epoch 1  | loss: 0.77614 | eval_logloss: 2.7201  |  0:00:10s
epoch 2  | loss: 0.49718 | eval_logloss: 3.00605 |  0:00:14s
epoch 3  | loss: 0.42752 | eval_logloss: 3.83157 |  0:00:20s
epoch 4  | loss: 0.37198 | eval_logloss: 2.68079 |  0:00:24s
epoch 5  | loss: 0.37401 | eval_logloss: 1.75649 |  0:00:29s
epoch 6  | loss: 0.3726  | eval_logloss: 1.96631 |  0:00:35s
epoch 7  | loss: 0.37644 | eval_logloss: 2.03858 |  0:00:39s
epoch 8  | loss: 0.3669  | eval_logloss: 1.95649 |  0:00:43s

Early stopping occurred at epoch 8 with best_epoch = 5 and best_eval_logloss = 1.75649




Validation Accuracy: 0.8270


# Task ID25: 9957

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9957)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.42965 | eval_logloss: 5.81784 |  0:00:02s
epoch 1  | loss: 1.3272  | eval_logloss: 5.41684 |  0:00:05s
epoch 2  | loss: 1.25499 | eval_logloss: 6.04451 |  0:00:08s
epoch 3  | loss: 0.69689 | eval_logloss: 5.74228 |  0:00:10s
epoch 4  | loss: 0.61616 | eval_logloss: 5.31628 |  0:00:12s
epoch 5  | loss: 0.56039 | eval_logloss: 6.27827 |  0:00:15s
epoch 6  | loss: 0.55947 | eval_logloss: 5.6358  |  0:00:17s
epoch 7  | loss: 0.53881 | eval_logloss: 5.55946 |  0:00:20s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 5.31628




Validation Accuracy: 0.6635


# Task ID26: 9946

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9946)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.9977  | eval_logloss: 6.53087 |  0:00:02s
epoch 1  | loss: 0.92732 | eval_logloss: 8.67054 |  0:00:05s
epoch 2  | loss: 0.86623 | eval_logloss: 9.2298  |  0:00:06s
epoch 3  | loss: 0.46641 | eval_logloss: 9.50949 |  0:00:07s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 6.53087




Validation Accuracy: 0.5877


# Task ID27: 3918

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3918)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.04891 | eval_logloss: 4.18315 |  0:00:07s
epoch 1  | loss: 1.19057 | eval_logloss: 8.25844 |  0:00:12s
epoch 2  | loss: 0.46498 | eval_logloss: 4.0222  |  0:00:18s
epoch 3  | loss: 0.29515 | eval_logloss: 12.44818|  0:00:22s
epoch 4  | loss: 0.25252 | eval_logloss: 2.88959 |  0:00:24s
epoch 5  | loss: 0.31126 | eval_logloss: 9.04523 |  0:00:27s
epoch 6  | loss: 0.26592 | eval_logloss: 1.36444 |  0:00:29s
epoch 7  | loss: 0.27141 | eval_logloss: 1.44562 |  0:00:33s
epoch 8  | loss: 0.25278 | eval_logloss: 3.28764 |  0:00:36s
epoch 9  | loss: 0.24221 | eval_logloss: 3.89003 |  0:00:39s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 1.36444




Validation Accuracy: 0.9144


# Task ID28: 3903

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3903)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.71368 | eval_logloss: 2.03737 |  0:00:04s
epoch 1  | loss: 0.52013 | eval_logloss: 1.83363 |  0:00:08s
epoch 2  | loss: 0.41295 | eval_logloss: 1.73176 |  0:00:11s
epoch 3  | loss: 0.37941 | eval_logloss: 3.11145 |  0:00:15s
epoch 4  | loss: 0.34096 | eval_logloss: 1.68083 |  0:00:20s
epoch 5  | loss: 0.33321 | eval_logloss: 1.62989 |  0:00:24s
epoch 6  | loss: 0.29487 | eval_logloss: 1.76104 |  0:00:28s
epoch 7  | loss: 0.30232 | eval_logloss: 1.59179 |  0:00:32s
epoch 8  | loss: 0.29718 | eval_logloss: 8.36491 |  0:00:36s
epoch 9  | loss: 0.2992  | eval_logloss: 1.24993 |  0:00:40s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 1.24993




Validation Accuracy: 0.8882


# Task ID29: 37

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=37)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.17244 | eval_logloss: 6.83245 |  0:00:01s
epoch 1  | loss: 0.95167 | eval_logloss: 7.76415 |  0:00:03s
epoch 2  | loss: 0.61299 | eval_logloss: 7.86767 |  0:00:05s
epoch 3  | loss: 0.61923 | eval_logloss: 10.14676|  0:00:07s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 6.83245




Validation Accuracy: 0.5714


# Task ID30: 9971

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9971)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.31847 | eval_logloss: 6.13169 |  0:00:01s
epoch 1  | loss: 1.14787 | eval_logloss: 5.58665 |  0:00:02s
epoch 2  | loss: 0.93563 | eval_logloss: 7.78689 |  0:00:04s
epoch 3  | loss: 0.76167 | eval_logloss: 6.13169 |  0:00:05s
epoch 4  | loss: 0.63493 | eval_logloss: 6.67673 |  0:00:06s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 5.58665




Validation Accuracy: 0.6496


# Task ID31: 9952

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9952)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69858 | eval_logloss: 4.06578 |  0:00:13s
epoch 1  | loss: 0.50012 | eval_logloss: 5.10602 |  0:00:27s
epoch 2  | loss: 0.46375 | eval_logloss: 3.98746 |  0:00:40s
epoch 3  | loss: 0.45129 | eval_logloss: 2.53219 |  0:00:54s
epoch 4  | loss: 0.43466 | eval_logloss: 1.06911 |  0:01:07s
epoch 5  | loss: 0.43551 | eval_logloss: 0.88662 |  0:01:21s
epoch 6  | loss: 0.42693 | eval_logloss: 0.6498  |  0:01:35s
epoch 7  | loss: 0.42591 | eval_logloss: 0.49176 |  0:01:48s
epoch 8  | loss: 0.43433 | eval_logloss: 0.62525 |  0:02:02s
epoch 9  | loss: 0.424   | eval_logloss: 0.56668 |  0:02:15s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.49176




Validation Accuracy: 0.7900


# Task ID32: 3902

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3902)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.84881 | eval_logloss: 5.9511  |  0:00:04s
epoch 1  | loss: 0.73736 | eval_logloss: 4.53803 |  0:00:08s
epoch 2  | loss: 0.46176 | eval_logloss: 2.29308 |  0:00:11s
epoch 3  | loss: 0.35295 | eval_logloss: 2.62067 |  0:00:16s
epoch 4  | loss: 0.32215 | eval_logloss: 2.29308 |  0:00:20s
epoch 5  | loss: 0.29483 | eval_logloss: 2.29308 |  0:00:24s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 2.29308




Validation Accuracy: 0.8562


# Task ID33: 49

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 4  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=49)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69534 | eval_logloss: 0.69315 |  0:00:29s
epoch 1  | loss: 0.69534 | eval_logloss: 0.69315 |  0:01:05s
epoch 2  | loss: 0.69534 | eval_logloss: 0.69315 |  0:01:24s
epoch 3  | loss: 0.69534 | eval_logloss: 0.69315 |  0:01:43s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 0.69315


  feature_importances_ = sum_explain / np.sum(sum_explain)


Validation Accuracy: 0.6510


# Task ID34: 43

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=43)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.89573 | eval_logloss: 3.83524 |  0:00:51s
epoch 1  | loss: 0.48869 | eval_logloss: 1.58229 |  0:01:35s
epoch 2  | loss: 0.3461  | eval_logloss: 1.10953 |  0:02:19s
epoch 3  | loss: 0.30336 | eval_logloss: 1.20901 |  0:03:03s
epoch 4  | loss: 0.31972 | eval_logloss: 0.9515  |  0:03:46s
epoch 5  | loss: 0.29299 | eval_logloss: 0.55348 |  0:04:31s
epoch 6  | loss: 0.2637  | eval_logloss: 0.53266 |  0:05:14s
epoch 7  | loss: 0.26325 | eval_logloss: 0.61412 |  0:05:57s
epoch 8  | loss: 0.25624 | eval_logloss: 1.01671 |  0:06:39s
epoch 9  | loss: 0.23249 | eval_logloss: 0.65936 |  0:07:22s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 0.53266




Validation Accuracy: 0.8208


# Task ID35: 9978

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9978)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.67168 | eval_logloss: 1.03767 |  0:00:23s
epoch 1  | loss: 0.25097 | eval_logloss: 1.03496 |  0:00:47s
epoch 2  | loss: 0.25197 | eval_logloss: 0.74863 |  0:01:13s
epoch 3  | loss: 0.22699 | eval_logloss: 1.88177 |  0:01:37s
epoch 4  | loss: 0.23745 | eval_logloss: 0.97193 |  0:02:01s
epoch 5  | loss: 0.23823 | eval_logloss: 0.96236 |  0:02:25s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.74863




Validation Accuracy: 0.8955


# Task ID36: 10093

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=10093)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.97136 | eval_logloss: 4.34792 |  0:00:13s
epoch 1  | loss: 0.43185 | eval_logloss: 3.86977 |  0:00:25s
epoch 2  | loss: 0.36574 | eval_logloss: 4.06272 |  0:00:38s
epoch 3  | loss: 0.32282 | eval_logloss: 3.34202 |  0:00:52s
epoch 4  | loss: 0.29568 | eval_logloss: 2.94016 |  0:01:05s
epoch 5  | loss: 0.26915 | eval_logloss: 5.25189 |  0:01:18s
epoch 6  | loss: 0.29462 | eval_logloss: 2.68352 |  0:01:31s
epoch 7  | loss: 0.23989 | eval_logloss: 1.66723 |  0:01:44s
epoch 8  | loss: 0.23303 | eval_logloss: 1.09113 |  0:01:57s
epoch 9  | loss: 0.17572 | eval_logloss: 0.81969 |  0:02:10s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.81969




Validation Accuracy: 0.9164


# Task ID37: 219

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=219)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.55812 | eval_logloss: 1.24437 |  0:02:12s
epoch 1  | loss: 0.50988 | eval_logloss: 1.71687 |  0:04:11s
epoch 2  | loss: 0.51261 | eval_logloss: 0.64444 |  0:06:08s
epoch 3  | loss: 0.50404 | eval_logloss: 0.51315 |  0:08:07s
epoch 4  | loss: 0.50175 | eval_logloss: 1.15444 |  0:10:07s
epoch 5  | loss: 0.49845 | eval_logloss: 0.54822 |  0:12:06s
epoch 6  | loss: 0.50422 | eval_logloss: 1.59942 |  0:14:03s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 0.51315




Validation Accuracy: 0.7526


# Task ID38: 9976

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9976)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.35332 | eval_logloss: 7.78724 |  0:00:09s
epoch 1  | loss: 0.99687 | eval_logloss: 8.04319 |  0:00:19s
epoch 2  | loss: 0.797   | eval_logloss: 8.06317 |  0:00:27s
epoch 3  | loss: 0.72739 | eval_logloss: 7.84856 |  0:00:36s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 7.78724




Validation Accuracy: 0.5115


# Task ID39: 6

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=6)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.52516 | eval_logloss: 13.27656|  0:00:51s
epoch 1  | loss: 1.43021 | eval_logloss: 10.25438|  0:01:43s
epoch 2  | loss: 1.20465 | eval_logloss: 5.88078 |  0:02:35s
epoch 3  | loss: 1.10063 | eval_logloss: 4.37772 |  0:03:26s
epoch 4  | loss: 1.02321 | eval_logloss: 5.01252 |  0:04:19s
epoch 5  | loss: 0.97172 | eval_logloss: 2.61749 |  0:05:11s
epoch 6  | loss: 0.94548 | eval_logloss: 2.53675 |  0:06:04s
epoch 7  | loss: 0.89046 | eval_logloss: 2.87962 |  0:06:56s
epoch 8  | loss: 0.89461 | eval_logloss: 3.54118 |  0:07:48s
epoch 9  | loss: 0.85949 | eval_logloss: 2.82158 |  0:08:41s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 2.53675




Validation Accuracy: 0.5430


# Task ID40: 53

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=53)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.35915 | eval_logloss: 11.53478|  0:00:02s
epoch 1  | loss: 2.02485 | eval_logloss: 11.53478|  0:00:04s
epoch 2  | loss: 2.17864 | eval_logloss: 12.19124|  0:00:07s
epoch 3  | loss: 1.42329 | eval_logloss: 12.00368|  0:00:10s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 11.53478




Validation Accuracy: 0.2765


# Task ID41: 11

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=11)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.14849 | eval_logloss: 9.31035 |  0:00:01s
epoch 1  | loss: 1.11661 | eval_logloss: 7.77988 |  0:00:04s
epoch 2  | loss: 0.83861 | eval_logloss: 6.76513 |  0:00:06s
epoch 3  | loss: 0.6374  | eval_logloss: 4.97402 |  0:00:07s
epoch 4  | loss: 0.54761 | eval_logloss: 4.71895 |  0:00:09s
epoch 5  | loss: 0.46219 | eval_logloss: 9.18281 |  0:00:11s
epoch 6  | loss: 0.47785 | eval_logloss: 8.29004 |  0:00:12s
epoch 7  | loss: 0.47396 | eval_logloss: 7.26973 |  0:00:14s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 4.71895




Validation Accuracy: 0.7040


# Task ID42: 15

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=15)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.51653 | eval_logloss: 10.93192|  0:00:02s
epoch 1  | loss: 0.41641 | eval_logloss: 8.65444 |  0:00:04s
epoch 2  | loss: 0.32008 | eval_logloss: 10.81805|  0:00:06s
epoch 3  | loss: 0.27131 | eval_logloss: 7.74344 |  0:00:08s
epoch 4  | loss: 0.21542 | eval_logloss: 9.22381 |  0:00:10s
epoch 5  | loss: 0.13686 | eval_logloss: 10.5903 |  0:00:12s
epoch 6  | loss: 0.23555 | eval_logloss: 8.14845 |  0:00:13s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 7.74344




Validation Accuracy: 0.5143


# Task ID43: 16

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=16)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.68524 | eval_logloss: 13.91963|  0:00:06s
epoch 1  | loss: 2.71258 | eval_logloss: 13.45409|  0:00:11s
epoch 2  | loss: 2.07086 | eval_logloss: 13.51117|  0:00:16s
epoch 3  | loss: 1.58317 | eval_logloss: 13.21697|  0:00:23s
epoch 4  | loss: 1.37061 | eval_logloss: 13.72293|  0:00:28s
epoch 5  | loss: 1.15076 | eval_logloss: 12.34968|  0:00:34s
epoch 6  | loss: 0.99943 | eval_logloss: 10.6877 |  0:00:39s
epoch 7  | loss: 0.92738 | eval_logloss: 9.86958 |  0:00:45s
epoch 8  | loss: 0.86861 | eval_logloss: 10.69403|  0:00:51s
epoch 9  | loss: 0.8236  | eval_logloss: 9.75459 |  0:00:56s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 9.75459




Validation Accuracy: 0.3375


# Task ID44: 14

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.63054 | eval_logloss: 14.54598|  0:00:05s
epoch 1  | loss: 2.79112 | eval_logloss: 13.8594 |  0:00:10s
epoch 2  | loss: 2.19693 | eval_logloss: 14.39966|  0:00:15s
epoch 3  | loss: 1.57906 | eval_logloss: 14.03264|  0:00:21s
epoch 4  | loss: 1.32264 | eval_logloss: 13.58695|  0:00:26s
epoch 5  | loss: 1.18568 | eval_logloss: 13.43258|  0:00:32s
epoch 6  | loss: 0.96883 | eval_logloss: 9.66555 |  0:00:37s
epoch 7  | loss: 0.92166 | eval_logloss: 9.94002 |  0:00:43s
epoch 8  | loss: 0.92694 | eval_logloss: 10.09771|  0:00:48s
epoch 9  | loss: 0.8296  | eval_logloss: 8.79039 |  0:00:54s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 8.79039




Validation Accuracy: 0.3400


# Task ID45: 32

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=32)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.62185 | eval_logloss: 12.88854|  0:00:28s
epoch 1  | loss: 0.65008 | eval_logloss: 11.81195|  0:00:57s
epoch 2  | loss: 0.46543 | eval_logloss: 7.9902  |  0:01:25s
epoch 3  | loss: 0.36477 | eval_logloss: 4.86628 |  0:01:55s
epoch 4  | loss: 0.28835 | eval_logloss: 4.84269 |  0:02:24s
epoch 5  | loss: 0.28201 | eval_logloss: 3.60895 |  0:02:53s
epoch 6  | loss: 0.23222 | eval_logloss: 3.13824 |  0:03:22s
epoch 7  | loss: 0.21231 | eval_logloss: 2.6245  |  0:03:51s
epoch 8  | loss: 0.18237 | eval_logloss: 1.74174 |  0:04:19s
epoch 9  | loss: 0.16706 | eval_logloss: 2.71475 |  0:04:47s
Stop training because you reached max_epochs = 10 with best_epoch = 8 and best_eval_logloss = 1.74174




Validation Accuracy: 0.7563


# Task ID46: 3549

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3549)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.10331 | eval_logloss: 11.98037|  0:00:02s
epoch 1  | loss: 2.22585 | eval_logloss: 9.8107  |  0:00:04s
epoch 2  | loss: 1.68331 | eval_logloss: 8.96228 |  0:00:06s
epoch 3  | loss: 1.15805 | eval_logloss: 10.3767 |  0:00:08s
epoch 4  | loss: 1.02533 | eval_logloss: 9.91327 |  0:00:11s
epoch 5  | loss: 0.8891  | eval_logloss: 9.2447  |  0:00:13s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 8.96228




Validation Accuracy: 0.4379


# Task ID47: 12

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=12)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.78448 | eval_logloss: 13.0329 |  0:00:16s
epoch 1  | loss: 2.02954 | eval_logloss: 13.48574|  0:00:28s
epoch 2  | loss: 1.35196 | eval_logloss: 11.50661|  0:00:37s
epoch 3  | loss: 1.13319 | eval_logloss: 11.61802|  0:00:48s
epoch 4  | loss: 1.09804 | eval_logloss: 11.83884|  0:00:58s
epoch 5  | loss: 1.00785 | eval_logloss: 8.07718 |  0:01:09s
epoch 6  | loss: 0.90997 | eval_logloss: 7.63674 |  0:01:19s
epoch 7  | loss: 0.83216 | eval_logloss: 8.50177 |  0:01:29s
epoch 8  | loss: 0.78182 | eval_logloss: 5.64768 |  0:01:40s
epoch 9  | loss: 0.68053 | eval_logloss: 4.96465 |  0:01:50s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 4.96465




Validation Accuracy: 0.4800


# Task ID48: 9981

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9981)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 4.18252 | eval_logloss: 14.61385|  0:00:13s
epoch 1  | loss: 3.98198 | eval_logloss: 13.72817|  0:00:24s
epoch 2  | loss: 2.66812 | eval_logloss: 14.61564|  0:00:30s
epoch 3  | loss: 2.2368  | eval_logloss: 13.7897 |  0:00:38s
epoch 4  | loss: 1.97772 | eval_logloss: 10.91763|  0:00:45s
epoch 5  | loss: 1.70386 | eval_logloss: 8.8964  |  0:00:53s
epoch 6  | loss: 1.5747  | eval_logloss: 9.02813 |  0:01:00s
epoch 7  | loss: 1.36443 | eval_logloss: 6.73812 |  0:01:08s
epoch 8  | loss: 1.17264 | eval_logloss: 8.93185 |  0:01:15s
epoch 9  | loss: 1.16976 | eval_logloss: 4.89783 |  0:01:22s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 4.89783




Validation Accuracy: 0.5324


# Task ID49: 18

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=18)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.21757 | eval_logloss: 12.87348|  0:00:08s
epoch 1  | loss: 1.34322 | eval_logloss: 14.82642|  0:00:19s
epoch 2  | loss: 1.0787  | eval_logloss: 12.65845|  0:00:29s
epoch 3  | loss: 1.02473 | eval_logloss: 9.31076 |  0:00:38s
epoch 4  | loss: 0.94744 | eval_logloss: 13.22309|  0:00:47s
epoch 5  | loss: 0.91763 | eval_logloss: 11.69855|  0:00:57s
epoch 6  | loss: 0.91225 | eval_logloss: 12.40859|  0:01:07s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 9.31076




Validation Accuracy: 0.3725


# Task ID50: 28

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=28)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.51465 | eval_logloss: 10.40119|  0:00:28s
epoch 1  | loss: 1.215   | eval_logloss: 9.918   |  0:00:56s
epoch 2  | loss: 0.86968 | eval_logloss: 8.05036 |  0:01:23s
epoch 3  | loss: 0.74891 | eval_logloss: 5.88487 |  0:01:51s
epoch 4  | loss: 0.62198 | eval_logloss: 2.33301 |  0:02:19s
epoch 5  | loss: 0.57125 | eval_logloss: 2.11888 |  0:02:47s
epoch 6  | loss: 0.49438 | eval_logloss: 3.0191  |  0:03:15s
epoch 7  | loss: 0.44801 | eval_logloss: 2.85334 |  0:03:43s
epoch 8  | loss: 0.4076  | eval_logloss: 0.75577 |  0:04:10s
epoch 9  | loss: 0.36435 | eval_logloss: 0.71362 |  0:04:38s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.71362




Validation Accuracy: 0.8025


# Task ID51: 2074

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=2074)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.51129 | eval_logloss: 12.15818|  0:00:32s
epoch 1  | loss: 0.75999 | eval_logloss: 6.77683 |  0:01:03s
epoch 2  | loss: 0.68029 | eval_logloss: 4.79691 |  0:01:33s
epoch 3  | loss: 0.66508 | eval_logloss: 5.62337 |  0:02:05s
epoch 4  | loss: 0.6356  | eval_logloss: 1.38191 |  0:02:37s
epoch 5  | loss: 0.58669 | eval_logloss: 1.71945 |  0:03:08s
epoch 6  | loss: 0.53544 | eval_logloss: 1.31969 |  0:03:39s
epoch 7  | loss: 0.54407 | eval_logloss: 0.66251 |  0:04:11s
epoch 8  | loss: 0.53615 | eval_logloss: 0.71303 |  0:04:41s
epoch 9  | loss: 0.54719 | eval_logloss: 1.65011 |  0:05:12s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.66251




Validation Accuracy: 0.7893


# Task ID52: 29

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=29)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.48005 | eval_logloss: 8.08672 |  0:00:04s
epoch 1  | loss: 1.37652 | eval_logloss: 6.00728 |  0:00:07s
epoch 2  | loss: 0.99528 | eval_logloss: 7.62462 |  0:00:10s
epoch 3  | loss: 0.86614 | eval_logloss: 7.77241 |  0:00:13s
epoch 4  | loss: 0.6973  | eval_logloss: 8.59213 |  0:00:17s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 6.00728




Validation Accuracy: 0.6232


# Task ID53: 45

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=45)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.10271 | eval_logloss: 11.46952|  0:00:15s
epoch 1  | loss: 1.10271 | eval_logloss: 7.97119 |  0:00:32s
epoch 2  | loss: 1.10271 | eval_logloss: 11.46952|  0:00:49s
epoch 3  | loss: 1.10271 | eval_logloss: 7.97119 |  0:01:06s
epoch 4  | loss: 1.10271 | eval_logloss: 7.97119 |  0:01:23s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 7.97119




Validation Accuracy: 0.5000


# Task ID54: 125922

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=125922)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.25871 | eval_logloss: 13.14887|  0:00:25s
epoch 1  | loss: 1.11513 | eval_logloss: 9.44277 |  0:00:52s
epoch 2  | loss: 0.94684 | eval_logloss: 5.33629 |  0:01:17s
epoch 3  | loss: 0.71211 | eval_logloss: 5.09027 |  0:01:44s
epoch 4  | loss: 0.64502 | eval_logloss: 6.74302 |  0:02:10s
epoch 5  | loss: 0.63858 | eval_logloss: 6.14965 |  0:02:37s
epoch 6  | loss: 0.58242 | eval_logloss: 3.33172 |  0:03:05s
epoch 7  | loss: 0.52517 | eval_logloss: 2.09625 |  0:03:32s
epoch 8  | loss: 0.48328 | eval_logloss: 1.80142 |  0:04:00s
epoch 9  | loss: 0.46713 | eval_logloss: 1.96301 |  0:04:26s
Stop training because you reached max_epochs = 10 with best_epoch = 8 and best_eval_logloss = 1.80142




Validation Accuracy: 0.7155


# Task ID55: 9960

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9960)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.54984 | eval_logloss: 9.47903 |  0:00:26s
epoch 1  | loss: 0.82261 | eval_logloss: 9.10091 |  0:00:52s
epoch 2  | loss: 0.65935 | eval_logloss: 8.04586 |  0:01:20s
epoch 3  | loss: 0.56649 | eval_logloss: 7.88895 |  0:01:47s
epoch 4  | loss: 0.5372  | eval_logloss: 6.3676  |  0:02:14s
epoch 5  | loss: 0.52447 | eval_logloss: 4.94183 |  0:02:41s
epoch 6  | loss: 0.53178 | eval_logloss: 3.8235  |  0:03:08s
epoch 7  | loss: 0.48892 | eval_logloss: 2.3727  |  0:03:36s
epoch 8  | loss: 0.49948 | eval_logloss: 3.48711 |  0:04:02s
epoch 9  | loss: 0.48963 | eval_logloss: 3.17181 |  0:04:30s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 2.3727




Validation Accuracy: 0.6081


# Task ID56: 9964

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9964)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.68528 | eval_logloss: 14.05106|  0:00:10s
epoch 1  | loss: 2.98703 | eval_logloss: 14.24641|  0:00:19s
epoch 2  | loss: 2.43497 | eval_logloss: 14.04329|  0:00:26s
epoch 3  | loss: 2.15597 | eval_logloss: 14.29339|  0:00:34s
epoch 4  | loss: 1.93001 | eval_logloss: 13.65193|  0:00:38s
epoch 5  | loss: 1.58914 | eval_logloss: 13.45126|  0:00:43s
epoch 6  | loss: 1.50048 | eval_logloss: 12.56901|  0:00:48s
epoch 7  | loss: 1.27909 | eval_logloss: 11.61492|  0:00:53s
epoch 8  | loss: 1.21407 | eval_logloss: 10.66038|  0:00:58s
epoch 9  | loss: 1.15783 | eval_logloss: 8.9243  |  0:01:03s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 8.9243




Validation Accuracy: 0.4263


# Task ID57: 22

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=22)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.62943 | eval_logloss: 14.41203|  0:00:05s
epoch 1  | loss: 2.63318 | eval_logloss: 14.388  |  0:00:10s
epoch 2  | loss: 2.01606 | eval_logloss: 12.73724|  0:00:15s
epoch 3  | loss: 1.66916 | eval_logloss: 12.03187|  0:00:21s
epoch 4  | loss: 1.40582 | eval_logloss: 12.62031|  0:00:26s
epoch 5  | loss: 1.29474 | eval_logloss: 9.84051 |  0:00:32s
epoch 6  | loss: 1.17072 | eval_logloss: 10.6358 |  0:00:37s
epoch 7  | loss: 1.08807 | eval_logloss: 7.63036 |  0:00:42s
epoch 8  | loss: 1.01745 | eval_logloss: 9.25853 |  0:00:48s
epoch 9  | loss: 1.00206 | eval_logloss: 8.77457 |  0:00:53s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 7.63036




Validation Accuracy: 0.4625


# Task ID58: 2079

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=2079)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.33494 | eval_logloss: 12.81854|  0:00:05s
epoch 1  | loss: 2.76566 | eval_logloss: 13.57257|  0:00:12s
epoch 2  | loss: 1.84611 | eval_logloss: 13.62319|  0:00:16s
epoch 3  | loss: 1.27614 | eval_logloss: 12.17223|  0:00:21s
epoch 4  | loss: 1.24854 | eval_logloss: 10.01785|  0:00:28s
epoch 5  | loss: 1.16541 | eval_logloss: 12.5888 |  0:00:32s
epoch 6  | loss: 1.20899 | eval_logloss: 12.61539|  0:00:35s
epoch 7  | loss: 1.04628 | eval_logloss: 12.37821|  0:00:39s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 10.01785




Validation Accuracy: 0.3716


# Task ID59: 14969

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14969)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.78831 | eval_logloss: 9.95755 |  0:00:49s
epoch 1  | loss: 1.33728 | eval_logloss: 1.69743 |  0:01:38s
epoch 2  | loss: 1.31625 | eval_logloss: 2.08021 |  0:02:27s
epoch 3  | loss: 1.32187 | eval_logloss: 3.15533 |  0:03:17s
epoch 4  | loss: 1.31574 | eval_logloss: 1.50279 |  0:04:08s
epoch 5  | loss: 1.29438 | eval_logloss: 1.73998 |  0:04:57s
epoch 6  | loss: 1.31121 | eval_logloss: 3.35892 |  0:05:47s
epoch 7  | loss: 1.29905 | eval_logloss: 3.95843 |  0:06:35s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 1.50279




Validation Accuracy: 0.4203


# Task ID60: 3560

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3560)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.45934 | eval_logloss: 13.67008|  0:00:04s
epoch 1  | loss: 2.43579 | eval_logloss: 11.94223|  0:00:08s
epoch 2  | loss: 1.89646 | eval_logloss: 13.75031|  0:00:12s
epoch 3  | loss: 1.84084 | eval_logloss: 13.39133|  0:00:16s
epoch 4  | loss: 1.79361 | eval_logloss: 12.15877|  0:00:20s

Early stopping occurred at epoch 4 with best_epoch = 1 and best_eval_logloss = 11.94223




Validation Accuracy: 0.2500


# Task ID61: 14952

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14952)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.60541 | eval_logloss: 2.28642 |  0:00:54s
epoch 1  | loss: 0.27768 | eval_logloss: 1.92621 |  0:01:50s
epoch 2  | loss: 0.28006 | eval_logloss: 0.48772 |  0:02:46s
epoch 3  | loss: 0.25894 | eval_logloss: 0.27911 |  0:03:41s
epoch 4  | loss: 0.22448 | eval_logloss: 0.42473 |  0:04:36s
epoch 5  | loss: 0.20491 | eval_logloss: 0.89658 |  0:05:30s
epoch 6  | loss: 0.18057 | eval_logloss: 1.24414 |  0:06:25s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 0.27911




Validation Accuracy: 0.8960


# Task ID62: 125920

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 6  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=125920)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.31841 | eval_logloss: 8.44946 |  0:00:10s
epoch 1  | loss: 1.1383  | eval_logloss: 7.77588 |  0:00:19s
epoch 2  | loss: 0.74923 | eval_logloss: 9.24658 |  0:00:29s
epoch 3  | loss: 0.70811 | eval_logloss: 4.90499 |  0:00:41s
epoch 4  | loss: 0.69134 | eval_logloss: 7.42831 |  0:00:49s
epoch 5  | loss: 0.71446 | eval_logloss: 2.69128 |  0:00:59s
epoch 6  | loss: 0.71875 | eval_logloss: 3.57783 |  0:01:05s
epoch 7  | loss: 0.69537 | eval_logloss: 4.63117 |  0:01:12s
epoch 8  | loss: 0.70034 | eval_logloss: 0.9218  |  0:01:18s
epoch 9  | loss: 0.69606 | eval_logloss: 0.73823 |  0:01:25s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.73823




Validation Accuracy: 0.4200


# Task ID63: 23

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=23)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.3021  | eval_logloss: 9.88968 |  0:00:04s
epoch 1  | loss: 1.4079  | eval_logloss: 9.56543 |  0:00:08s
epoch 2  | loss: 1.11022 | eval_logloss: 9.85276 |  0:00:11s
epoch 3  | loss: 1.06278 | eval_logloss: 9.04951 |  0:00:15s
epoch 4  | loss: 1.01892 | eval_logloss: 9.56996 |  0:00:22s
epoch 5  | loss: 1.04256 | eval_logloss: 9.00102 |  0:00:27s
epoch 6  | loss: 0.99035 | eval_logloss: 7.67257 |  0:00:35s
epoch 7  | loss: 0.98769 | eval_logloss: 8.34191 |  0:00:40s
epoch 8  | loss: 0.99289 | eval_logloss: 7.85675 |  0:00:49s
epoch 9  | loss: 0.98539 | eval_logloss: 8.16619 |  0:00:52s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 7.67257




Validation Accuracy: 0.4746


# Task ID64: 3904

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3904)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.69085 | eval_logloss: 3.02211 |  0:00:29s
epoch 1  | loss: 0.47165 | eval_logloss: 0.64944 |  0:00:57s
epoch 2  | loss: 0.47162 | eval_logloss: 1.28842 |  0:01:26s
epoch 3  | loss: 0.47037 | eval_logloss: 0.85295 |  0:01:55s
epoch 4  | loss: 0.46402 | eval_logloss: 0.47787 |  0:02:25s
epoch 5  | loss: 0.46248 | eval_logloss: 0.47846 |  0:02:52s
epoch 6  | loss: 0.46463 | eval_logloss: 0.49844 |  0:03:21s
epoch 7  | loss: 0.46658 | eval_logloss: 0.55034 |  0:03:50s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 0.47787




Validation Accuracy: 0.8075


# Task ID65: 3022

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3022)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.83868 | eval_logloss: 14.33204|  0:00:02s
epoch 1  | loss: 3.06355 | eval_logloss: 13.44636|  0:00:05s
epoch 2  | loss: 2.74071 | eval_logloss: 14.25153|  0:00:07s
epoch 3  | loss: 2.4456  | eval_logloss: 13.26899|  0:00:10s
epoch 4  | loss: 2.03694 | eval_logloss: 13.60739|  0:00:13s
epoch 5  | loss: 1.82103 | eval_logloss: 12.95499|  0:00:16s
epoch 6  | loss: 1.62725 | eval_logloss: 12.83425|  0:00:18s
epoch 7  | loss: 1.4455  | eval_logloss: 13.4795 |  0:00:20s
epoch 8  | loss: 1.34725 | eval_logloss: 13.1243 |  0:00:23s
epoch 9  | loss: 1.39547 | eval_logloss: 12.96336|  0:00:25s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 12.83425




Validation Accuracy: 0.1919


# Task ID66: 9985

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9985)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.42217 | eval_logloss: 10.8249 |  0:00:16s
epoch 1  | loss: 1.60615 | eval_logloss: 10.70007|  0:00:32s
epoch 2  | loss: 1.53296 | eval_logloss: 10.67791|  0:00:49s
epoch 3  | loss: 1.50847 | eval_logloss: 7.18164 |  0:01:06s
epoch 4  | loss: 1.49401 | eval_logloss: 3.21664 |  0:01:22s
epoch 5  | loss: 1.48047 | eval_logloss: 3.2488  |  0:01:38s
epoch 6  | loss: 1.49348 | eval_logloss: 4.74702 |  0:01:53s
epoch 7  | loss: 1.49008 | eval_logloss: 5.15324 |  0:02:09s

Early stopping occurred at epoch 7 with best_epoch = 4 and best_eval_logloss = 3.21664




Validation Accuracy: 0.3595


# Task ID67: 9910

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=9910)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.29178 | eval_logloss: 9.00076 |  0:00:25s
epoch 1  | loss: 0.89262 | eval_logloss: 9.06445 |  0:00:51s
epoch 2  | loss: 0.71167 | eval_logloss: 5.5455  |  0:01:18s
epoch 3  | loss: 0.70148 | eval_logloss: 5.43918 |  0:01:43s
epoch 4  | loss: 0.68886 | eval_logloss: 2.96545 |  0:02:08s
epoch 5  | loss: 0.68351 | eval_logloss: 1.76203 |  0:02:35s
epoch 6  | loss: 0.66777 | eval_logloss: 1.22934 |  0:03:00s
epoch 7  | loss: 0.65735 | eval_logloss: 1.32232 |  0:03:26s
epoch 8  | loss: 0.62609 | eval_logloss: 3.20282 |  0:03:52s
epoch 9  | loss: 0.58995 | eval_logloss: 3.42794 |  0:04:19s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 1.22934




Validation Accuracy: 0.5593


# Task ID68: 14970

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14970)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.64427 | eval_logloss: 11.77286|  0:00:37s
epoch 1  | loss: 0.89124 | eval_logloss: 9.11257 |  0:01:13s
epoch 2  | loss: 0.45323 | eval_logloss: 3.97269 |  0:01:49s
epoch 3  | loss: 0.2724  | eval_logloss: 2.01379 |  0:02:27s
epoch 4  | loss: 0.1953  | eval_logloss: 3.0766  |  0:03:04s
epoch 5  | loss: 0.19732 | eval_logloss: 4.46687 |  0:03:40s
epoch 6  | loss: 0.13282 | eval_logloss: 6.04901 |  0:04:17s

Early stopping occurred at epoch 6 with best_epoch = 3 and best_eval_logloss = 2.01379




Validation Accuracy: 0.7568


# Task ID69: 3021

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 64  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=3021)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.39779 | eval_logloss: 0.9362  |  0:00:08s
epoch 1  | loss: 0.30459 | eval_logloss: 2.02867 |  0:00:18s
epoch 2  | loss: 0.17925 | eval_logloss: 0.95467 |  0:00:32s
epoch 3  | loss: 0.12473 | eval_logloss: 1.06857 |  0:00:39s

Early stopping occurred at epoch 3 with best_epoch = 0 and best_eval_logloss = 0.9362




Validation Accuracy: 0.9404


# Task ID70: 3481

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3481)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.64021 | eval_logloss: 15.17622|  0:00:33s
epoch 1  | loss: 1.94732 | eval_logloss: 14.67334|  0:01:25s
epoch 2  | loss: 1.36603 | eval_logloss: 12.75947|  0:01:56s
epoch 3  | loss: 1.0548  | eval_logloss: 10.73966|  0:02:24s
epoch 4  | loss: 0.82436 | eval_logloss: 10.94935|  0:02:53s
epoch 5  | loss: 0.77931 | eval_logloss: 9.17639 |  0:03:21s
epoch 6  | loss: 0.6501  | eval_logloss: 6.36518 |  0:03:49s
epoch 7  | loss: 0.58232 | eval_logloss: 6.5604  |  0:04:18s
epoch 8  | loss: 0.52974 | eval_logloss: 8.6821  |  0:04:47s
epoch 9  | loss: 0.47891 | eval_logloss: 7.94506 |  0:05:16s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 6.36518




Validation Accuracy: 0.3782


# Task ID71: 3573

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=3573)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')


# Task ID72: 146824

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146824)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.47736 | eval_logloss: 13.63074|  0:00:06s
epoch 1  | loss: 2.97089 | eval_logloss: 13.98944|  0:00:12s
epoch 2  | loss: 2.20004 | eval_logloss: 13.34332|  0:00:18s
epoch 3  | loss: 1.59475 | eval_logloss: 11.96605|  0:00:24s
epoch 4  | loss: 1.31504 | eval_logloss: 10.83907|  0:00:29s
epoch 5  | loss: 0.97346 | eval_logloss: 10.56946|  0:00:35s
epoch 6  | loss: 0.83513 | eval_logloss: 8.05052 |  0:00:41s
epoch 7  | loss: 0.72657 | eval_logloss: 9.93368 |  0:00:46s
epoch 8  | loss: 0.64297 | eval_logloss: 9.11678 |  0:00:53s
epoch 9  | loss: 0.58012 | eval_logloss: 5.32921 |  0:00:58s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 5.32921




Validation Accuracy: 0.6300


# Task ID73: 146820

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146820)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.25688 | eval_logloss: 0.85641 |  0:00:12s
epoch 1  | loss: 0.14395 | eval_logloss: 1.88568 |  0:00:24s
epoch 2  | loss: 0.13104 | eval_logloss: 0.80956 |  0:00:36s
epoch 3  | loss: 0.13393 | eval_logloss: 1.69857 |  0:00:48s
epoch 4  | loss: 0.11677 | eval_logloss: 1.0874  |  0:01:00s
epoch 5  | loss: 0.07932 | eval_logloss: 2.48067 |  0:01:13s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 0.80956




Validation Accuracy: 0.9432


# Task ID74: 146822

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146822)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.11134 | eval_logloss: 11.90503|  0:00:05s
epoch 1  | loss: 1.22501 | eval_logloss: 13.21631|  0:00:12s
epoch 2  | loss: 1.03007 | eval_logloss: 11.3184 |  0:00:17s
epoch 3  | loss: 0.82236 | eval_logloss: 11.73652|  0:00:24s
epoch 4  | loss: 0.83585 | eval_logloss: 11.02817|  0:00:30s
epoch 5  | loss: 0.69213 | eval_logloss: 11.18421|  0:00:36s
epoch 6  | loss: 0.6192  | eval_logloss: 10.03292|  0:00:42s
epoch 7  | loss: 0.66025 | eval_logloss: 9.87935 |  0:00:47s
epoch 8  | loss: 0.60797 | eval_logloss: 10.61304|  0:00:54s
epoch 9  | loss: 0.52615 | eval_logloss: 9.25311 |  0:00:59s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 9.25311




Validation Accuracy: 0.3831


# Task ID75: 146195

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 64  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=146195)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.88735 | eval_logloss: 1.85102 |  0:01:56s
epoch 1  | loss: 0.83127 | eval_logloss: 1.09154 |  0:03:34s
epoch 2  | loss: 0.79927 | eval_logloss: 0.96378 |  0:05:13s
epoch 3  | loss: 0.79278 | eval_logloss: 1.04925 |  0:06:52s
epoch 4  | loss: 0.79345 | eval_logloss: 0.8551  |  0:08:32s
epoch 5  | loss: 0.75154 | eval_logloss: 0.92834 |  0:10:11s
epoch 6  | loss: 0.71754 | eval_logloss: 1.10864 |  0:11:51s
epoch 7  | loss: 0.7025  | eval_logloss: 0.80745 |  0:13:31s
epoch 8  | loss: 0.69249 | eval_logloss: 0.79891 |  0:15:08s
epoch 9  | loss: 0.66537 | eval_logloss: 0.73321 |  0:16:47s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 0.73321




Validation Accuracy: 0.7370


# Task ID76: 146800

In [None]:
import numpy as np
import pandas as pd
import torch
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import openml

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

class TabNetModel:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.params["n_a"] = self.params["n_d"]
        self.params["cat_idxs"] = args.cat_idx
        self.params["cat_dims"] = args.cat_dims
        self.params["device_name"] = device  # Set device for TabNet

        if args.objective == "regression":
            self.model = TabNetRegressor(**self.params)
            self.metric = "rmse"
        elif args.objective == "classification":
            self.model = TabNetClassifier(**self.params)
            self.metric = "logloss"

    def fit(self, X, y, X_val=None, y_val=None):
        if self.args.objective == "regression":
            y, y_val = y.reshape(-1, 1), y_val.reshape(-1, 1)

        drop_last = X.shape[0] % self.args.batch_size == 1
        self.model.fit(
            X,
            y,
            eval_set=[(X_val, y_val)],
            eval_name=["eval"],
            eval_metric=[self.metric],
            max_epochs=self.args.epochs,
            patience=self.args.early_stopping_rounds,
            batch_size=self.args.batch_size,
            drop_last=drop_last,
        )
        history = self.model.history
        return history["loss"], history["eval_" + self.metric]

    def predict(self, X):
        X = np.array(X, dtype=float)
        if self.args.objective == "regression":
            return self.model.predict(X)
        else:
            return self.model.predict_proba(X)

def define_trial_parameters(cls, trial, args):
    params = {
        "n_d": trial.suggest_int("n_d", 8, 64),
        "n_steps": trial.suggest_int("n_steps", 3, 10),
        "gamma": trial.suggest_float("gamma", 1.0, 2.0),
        "cat_emb_dim": min(trial.suggest_int("cat_emb_dim", 1, 3), 2),
        "n_independent": trial.suggest_int("n_independent", 1, 5),
        "n_shared": trial.suggest_int("n_shared", 1, 5),
        "momentum": trial.suggest_float("momentum", 0.001, 0.4, log=True),
        "mask_type": "entmax",  # Use entmax to avoid sparsemax issues
    }
    return params

def get_random_parameters(cls, seed):
    rs = np.random.RandomState(seed)
    params = {
        "n_d": rs.randint(8, 65),
        "n_steps": rs.randint(3, 11),
        "gamma": 1.0 + rs.rand(),
        "cat_emb_dim": min(rs.randint(1, 4), 2),
        "n_independent": rs.randint(1, 6),
        "n_shared": rs.randint(1, 6),
        "momentum": 0.4 * np.power(10, rs.uniform(-3, -1)),
        "mask_type": "entmax",  # Use entmax as alternative to sparsemax
    }
    return params

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Drop class labels for training
    X = X.drop(columns=[dataset.default_target_attribute], errors='ignore')

    # Identify and encode categorical columns
    categorical_cols = X.select_dtypes(include=['object']).columns
    cat_idx = []
    cat_dims = []
    for col in categorical_cols:
        X[col], uniques = pd.factorize(X[col])
        X[col] = X[col].replace(-1, len(uniques))  # Replace NaN factorized values with a valid new index
        cat_idx.append(X.columns.get_loc(col))
        cat_dims.append(len(uniques) + 1)  # Account for NaN as an additional category

    # Scale numeric features and handle missing values
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(-1, inplace=True)  # Replace NaN with -1 for non-categorical columns
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    y, _ = pd.factorize(y)

    return X, y, cat_dims, cat_idx


# Example configuration and arguments
class Args:
    objective = "classification"  # Change to "regression" if needed
    batch_size = 16  # Larger batch size for numerical stability
    epochs = 10
    early_stopping_rounds = 3

    def __init__(self, cat_idx, cat_dims):
        self.cat_idx = cat_idx
        self.cat_dims = cat_dims

# Load data and set up arguments
X, y, cat_dims, cat_idx = load_openml_data(task_id=146800)  # Example OpenML task ID
args = Args(cat_idx, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters and initialize the model
seed = 42
params = get_random_parameters(TabNetModel, seed)
params["device_name"] = device

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 3.64069 | eval_logloss: 13.72817|  0:00:08s
epoch 1  | loss: 3.06553 | eval_logloss: 13.65436|  0:00:18s
epoch 2  | loss: 2.12764 | eval_logloss: 14.17106|  0:00:25s
epoch 3  | loss: 1.75403 | eval_logloss: 12.99573|  0:00:36s
epoch 4  | loss: 1.51718 | eval_logloss: 13.03454|  0:00:45s
epoch 5  | loss: 1.49023 | eval_logloss: 12.67445|  0:00:54s
epoch 6  | loss: 1.43927 | eval_logloss: 10.23553|  0:01:02s
epoch 7  | loss: 1.3487  | eval_logloss: 12.07331|  0:01:07s
epoch 8  | loss: 1.31162 | eval_logloss: 10.41501|  0:01:12s
epoch 9  | loss: 1.23692 | eval_logloss: 10.73824|  0:01:17s

Early stopping occurred at epoch 9 with best_epoch = 6 and best_eval_logloss = 10.23553




Validation Accuracy: 0.2731


# Task ID77: 146817

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146817)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 2.68165 | eval_logloss: 11.99211|  0:00:08s
epoch 1  | loss: 1.74952 | eval_logloss: 12.22625|  0:00:18s
epoch 2  | loss: 1.43355 | eval_logloss: 11.65258|  0:00:24s
epoch 3  | loss: 1.29062 | eval_logloss: 10.65536|  0:00:30s
epoch 4  | loss: 1.18507 | eval_logloss: 9.73483 |  0:00:34s
epoch 5  | loss: 1.14264 | eval_logloss: 10.2273 |  0:00:39s
epoch 6  | loss: 1.09982 | eval_logloss: 9.07718 |  0:00:45s
epoch 7  | loss: 1.14554 | eval_logloss: 9.05169 |  0:00:49s
epoch 8  | loss: 1.06585 | eval_logloss: 11.81899|  0:00:55s
epoch 9  | loss: 1.07118 | eval_logloss: 8.62176 |  0:01:00s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 8.62176




Validation Accuracy: 0.3445


# Task ID78: 146819

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146819)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.78554 | eval_logloss: 2.65706 |  0:00:01s
epoch 1  | loss: 1.10755 | eval_logloss: 7.23312 |  0:00:03s
epoch 2  | loss: 0.82675 | eval_logloss: 1.62376 |  0:00:05s
epoch 3  | loss: 0.47711 | eval_logloss: 2.65706 |  0:00:06s
epoch 4  | loss: 0.60157 | eval_logloss: 2.93957 |  0:00:07s
epoch 5  | loss: 0.36914 | eval_logloss: 1.62376 |  0:00:09s
epoch 6  | loss: 0.36235 | eval_logloss: 2.25002 |  0:00:10s
epoch 7  | loss: 0.27615 | eval_logloss: 4.65629 |  0:00:11s
epoch 8  | loss: 0.29527 | eval_logloss: 7.1624  |  0:00:13s

Early stopping occurred at epoch 8 with best_epoch = 5 and best_eval_logloss = 1.62376




Validation Accuracy: 0.8981


# Task ID79: 146821

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=146821)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.84776 | eval_logloss: 11.79552|  0:00:04s
epoch 1  | loss: 1.05541 | eval_logloss: 8.20157 |  0:00:08s
epoch 2  | loss: 0.85181 | eval_logloss: 5.11446 |  0:00:13s
epoch 3  | loss: 0.75032 | eval_logloss: 5.11446 |  0:00:17s
epoch 4  | loss: 0.80587 | eval_logloss: 5.11446 |  0:00:22s
epoch 5  | loss: 0.79653 | eval_logloss: 5.11446 |  0:00:27s

Early stopping occurred at epoch 5 with best_epoch = 2 and best_eval_logloss = 5.11446




Validation Accuracy: 0.6792


# Task ID80: 14954

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=14954)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.13078 | eval_logloss: 9.44734 |  0:00:01s
epoch 1  | loss: 1.13367 | eval_logloss: 7.97119 |  0:00:02s
epoch 2  | loss: 1.37395 | eval_logloss: 7.4926  |  0:00:04s
epoch 3  | loss: 0.83519 | eval_logloss: 6.79028 |  0:00:06s
epoch 4  | loss: 0.70683 | eval_logloss: 7.0855  |  0:00:08s
epoch 5  | loss: 0.77105 | eval_logloss: 6.64266 |  0:00:09s
epoch 6  | loss: 0.72537 | eval_logloss: 7.0855  |  0:00:10s
epoch 7  | loss: 0.63287 | eval_logloss: 8.41404 |  0:00:12s
epoch 8  | loss: 0.67329 | eval_logloss: 7.23312 |  0:00:13s

Early stopping occurred at epoch 8 with best_epoch = 5 and best_eval_logloss = 6.64266




Validation Accuracy: 0.5833


# Task ID81: 167141

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167141)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 0.761   | eval_logloss: 6.77967 |  0:00:13s
epoch 1  | loss: 0.41278 | eval_logloss: 4.91764 |  0:00:27s
epoch 2  | loss: 0.38843 | eval_logloss: 11.72693|  0:00:43s
epoch 3  | loss: 0.34273 | eval_logloss: 4.4863  |  0:00:56s
epoch 4  | loss: 0.33398 | eval_logloss: 2.83512 |  0:01:10s
epoch 5  | loss: 0.33208 | eval_logloss: 2.75758 |  0:01:23s
epoch 6  | loss: 0.31043 | eval_logloss: 2.91404 |  0:01:36s
epoch 7  | loss: 0.30053 | eval_logloss: 2.2786  |  0:01:49s
epoch 8  | loss: 0.29898 | eval_logloss: 6.11773 |  0:02:03s
epoch 9  | loss: 0.27517 | eval_logloss: 1.32208 |  0:02:17s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 1.32208




Validation Accuracy: 0.7270


# Task ID82: 167140

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167140)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.91516 | eval_logloss: 9.66    |  0:00:09s
epoch 1  | loss: 1.24728 | eval_logloss: 8.02218 |  0:00:18s
epoch 2  | loss: 1.03928 | eval_logloss: 8.29849 |  0:00:27s
epoch 3  | loss: 0.89406 | eval_logloss: 7.43972 |  0:00:37s
epoch 4  | loss: 0.72824 | eval_logloss: 8.38782 |  0:00:45s
epoch 5  | loss: 0.62032 | eval_logloss: 4.78904 |  0:00:55s
epoch 6  | loss: 0.55244 | eval_logloss: 2.50983 |  0:01:04s
epoch 7  | loss: 0.46628 | eval_logloss: 3.89368 |  0:01:13s
epoch 8  | loss: 0.35348 | eval_logloss: 2.46433 |  0:01:22s
epoch 9  | loss: 0.30849 | eval_logloss: 2.10161 |  0:01:31s
Stop training because you reached max_epochs = 10 with best_epoch = 9 and best_eval_logloss = 2.10161




Validation Accuracy: 0.8182


# Task ID83: 167125

In [None]:
# Load data, split, and remove classes
X, y, cat_dims = load_openml_data(task_id=167125)  # Example task ID from OpenML
args = Args(X, cat_dims)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

seed = 42
params = get_random_parameters(TabNetModel, seed)

# Initialize and train the model
tabnet_model = TabNetModel(params, args)
train_loss, val_loss = tabnet_model.fit(X_train.values, y_train, X_val.values, y_val)

# Make predictions and evaluate accuracy
y_pred = tabnet_model.predict(X_val.values)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted classes

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred_classes)
print(f'Validation Accuracy: {accuracy:.4f}')




epoch 0  | loss: 1.42006 | eval_logloss: 6.82898 |  0:00:19s
epoch 1  | loss: 0.63548 | eval_logloss: 2.79478 |  0:00:41s
epoch 2  | loss: 0.37057 | eval_logloss: 2.72187 |  0:01:02s
epoch 3  | loss: 0.30258 | eval_logloss: 1.99227 |  0:01:23s
epoch 4  | loss: 0.20639 | eval_logloss: 2.03623 |  0:01:44s
epoch 5  | loss: 0.1757  | eval_logloss: 1.50746 |  0:02:05s
epoch 6  | loss: 0.1877  | eval_logloss: 2.17526 |  0:02:25s
epoch 7  | loss: 0.16209 | eval_logloss: 0.72361 |  0:02:45s
epoch 8  | loss: 0.14063 | eval_logloss: 1.62026 |  0:03:06s
epoch 9  | loss: 0.12295 | eval_logloss: 2.25064 |  0:03:28s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_eval_logloss = 0.72361




Validation Accuracy: 0.5960


# Task ID84: 167124

# Task ID85: 167121

# SAINT

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Tabular Dataset class to handle data
class TabularDataset(Dataset):
    def __init__(self, features, targets, categorical_indices):
        self.features = torch.tensor(features.values, dtype=torch.float32)  # Convert to numpy array
        self.targets = torch.tensor(targets, dtype=torch.long)  # Convert to long for multiclass classification
        self.categorical_indices = categorical_indices

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

# Load the data from OpenML
def load_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    cat_columns = [col for col in X.columns if X[col].dtype.name == 'category']
    num_columns = [col for col in X.columns if X[col].dtype in ['int64', 'float64']]

    return X, y, cat_columns, num_columns

def preprocess_data(X, cat_columns):
    # Handle missing values in categorical columns
    for col in cat_columns:
        X[col] = X[col].astype('category')  # Convert the column to 'category' type if not already
        X[col] = X[col].cat.add_categories('Unknown')  # Add 'Unknown' as a new category
        X[col] = X[col].fillna('Unknown')  # Replace NaN with 'Unknown'

    # Apply label encoding for categorical columns
    label_encoders = {}
    for col in cat_columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))  # Ensure string type before label encoding
        label_encoders[col] = le  # Store the encoder for future use

    # Convert all columns to float for PyTorch compatibility
    for col in X.columns:
        X[col] = X[col].astype(float)

    return X, label_encoders


# Define the SAINT model
class SAINT(nn.Module):
    def __init__(self, input_dim, params):
        super(SAINT, self).__init__()
        self.embedding_dim = params['dim']
        self.embedding = nn.Linear(input_dim, self.embedding_dim)
        self.fc = nn.Linear(self.embedding_dim, len(np.unique(params['target'])))  # Output for each class

    def forward(self, x):
        x = self.embedding(x)
        x = self.fc(x)
        return x

# Train the model
def train_model(X_train, y_train, X_val, y_val, params):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_dataset = TabularDataset(X_train, y_train, params['cat_idx'])
    val_dataset = TabularDataset(X_val, y_val, params['cat_idx'])
    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=params['val_batch_size'], shuffle=False)

    model = SAINT(input_dim=X_train.shape[1], params=params).to(device)
    criterion = nn.CrossEntropyLoss()  # For multiclass classification
    optimizer = optim.AdamW(model.parameters(), lr=params['lr'])

    # Training loop
    for epoch in range(params['epochs']):
        model.train()
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

    # Validation loop
    model.eval()
    val_accuracy = 0
    with torch.no_grad():
        all_preds = []
        all_labels = []
        for x_batch, y_batch in val_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            preds = torch.argmax(outputs, dim=1)  # Get class predictions for multiclass
            all_preds.append(preds.cpu().numpy())
            all_labels.append(y_batch.cpu().numpy())

        # Flatten the lists to compute accuracy
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        val_accuracy = np.mean(all_preds == all_labels)  # Compute accuracy

    return model, val_accuracy

# Generate random hyperparameters
def get_random_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "dim": rs.choice([32, 64, 128, 256]),
        "depth": rs.choice([1, 2, 3, 6, 12]),
        "heads": rs.choice([2, 4, 8]),
        "dropout": rs.choice([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]),
        "epochs": 100,
        "batch_size": 64,
        "val_batch_size": 32,
        "lr": 0.00003,
        "cat_idx": [],
        "num_idx": [],
    }
    return params

# Task ID1: 14965

In [None]:
# Load and preprocess the data
task_id = 14965
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8853


# Task ID2: 9977

In [None]:
# Load and preprocess the data
task_id = 9977
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9450


# Task ID3: 34539

In [None]:
# Load and preprocess the data
task_id = 34539
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9437


# Task ID4: 146606

In [None]:
# Load and preprocess the data
task_id = 146606
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6380


# Task ID5: 7592



In [None]:
# Load and preprocess the data
task_id = 7592
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8008


# Task ID6: 146195



In [None]:
# Load and preprocess the data
task_id = 146195
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6584


# Task ID7: 167119



In [None]:
# Load and preprocess the data
task_id = 167119
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6746


# Task ID8: 167120



In [None]:
# Load and preprocess the data
task_id = 167120
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5197


# Task ID11: 	168331



In [None]:
# Load and preprocess the data
task_id = 168331
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5564


# Task ID12: 	168330



In [None]:
# Load and preprocess the data
task_id = 168330
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6357


# Task ID13: 	168335



In [None]:
# Load and preprocess the data
task_id = 168335
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8809



# Task ID16: 	146212



In [None]:
# Load and preprocess the data
task_id = 146212
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9280


# Task ID19: 	168868



In [None]:
# Load and preprocess the data
task_id = 168868
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9803


# Task ID20: 31

In [None]:
# Load and preprocess the data
task_id = 31
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7200


# Task ID21: 10101

In [None]:
# Load and preprocess the data
task_id = 10101
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6533


# Task ID22: 	3913

In [None]:
# Load and preprocess the data
task_id = 3913
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5714


# Task ID23: 3

In [None]:
# Load and preprocess the data
task_id = 3
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9141


# Task ID24: 3917

In [None]:
# Load and preprocess the data
task_id = 3917
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7796


# Task ID25: 9957

In [None]:
# Load and preprocess the data
task_id = 9957
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8294


# Task ID26: 9946

In [None]:
# Load and preprocess the data
task_id = 9946
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9298


# Task ID27: 3918

In [None]:
# Load and preprocess the data
task_id = 3918
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8108


# Task ID28: 3903

In [None]:
# Load and preprocess the data
task_id = 3903
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8051


# Task ID29: 37

In [None]:
# Load and preprocess the data
task_id = 37
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7013


# Task ID30: 9971

In [None]:
# Load and preprocess the data
task_id = 9971
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7436


# Task ID31: 9952

In [None]:
# Load and preprocess the data
task_id = 9952
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7576


# Task ID32: 3902

In [None]:
# Load and preprocess the data
task_id = 3902
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6644


# Task ID33: 49

In [None]:
# Load and preprocess the data
task_id = 49
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6667


# Task ID34: 43

In [None]:
# Load and preprocess the data
task_id = 43
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9229


# Task ID35: 9978

In [None]:
# Load and preprocess the data
task_id = 9978
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9349


# Task ID36: 10093

In [None]:
# Load and preprocess the data
task_id = 10093
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9673


# Task ID37: 219

In [None]:
# Load and preprocess the data
task_id = 219
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7499


# Task ID38: 9976

In [None]:
# Load and preprocess the data
task_id = 9976
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5231


# Task ID39: 6

In [None]:
# Load and preprocess the data
task_id = 6
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7515


# Task ID40: 53

In [None]:
# Load and preprocess the data
task_id = 53
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.3706


# Task ID41: 11

In [None]:
# Load and preprocess the data
task_id = 11
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8400


# Task ID42: 15

In [None]:
# Load and preprocess the data
task_id = 15
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6786


# Task ID43: 16

In [None]:
# Load and preprocess the data
task_id = 16
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9600


# Task ID44: 14

In [None]:
# Load and preprocess the data
task_id = 14
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5575


# Task ID45: 32

In [None]:
# Load and preprocess the data
task_id = 32
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9363


# Task ID46: 3549

In [None]:
# Load and preprocess the data
task_id = 3549
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9941


# Task ID47: 12

In [None]:
# Load and preprocess the data
task_id = 12
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9000


# Task ID48: 9981

In [None]:
# Load and preprocess the data
task_id = 9981
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9444


# Task ID49: 18

In [None]:
# Load and preprocess the data
task_id = 18
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4325


# Task ID50: 28

In [None]:
# Load and preprocess the data
task_id = 28
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9600


# Task ID51: 2074

In [None]:
# Load and preprocess the data
task_id = 2074
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8375


# Task ID52: 29

In [None]:
# Load and preprocess the data
task_id = 29
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.5072


# Task ID53: 45

In [None]:
# Load and preprocess the data
task_id = 45
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7994


# Task ID54: 125922

In [None]:
# Load and preprocess the data
task_id = 125922
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8882


# Task ID55: 9960

In [None]:
# Load and preprocess the data
task_id = 9960
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6822


# Task ID56: 9964

In [None]:
# Load and preprocess the data
task_id = 9964
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8777


# Task ID57: 22

In [None]:
# Load and preprocess the data
task_id = 22
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7800


# Task ID58: 2079

In [None]:
# Load and preprocess the data
task_id = 2079
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.1486


# Task ID59: 14969

In [None]:
# Load and preprocess the data
task_id = 14969
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4385


# Task ID60: 3560

In [None]:
# Load and preprocess the data
task_id = 3560
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.1500


# Task ID61: 14952

In [None]:
# Load and preprocess the data
task_id = 14952
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9172


# Task ID62: 125920

In [None]:
# Load and preprocess the data
task_id = 125920
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6000


# Task ID63: 23

In [None]:
# Load and preprocess the data
task_id = 23
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4847


# Task ID64: 3904

In [None]:
# Load and preprocess the data
task_id = 3904
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8075


# Task ID65: 3022

In [None]:
# Load and preprocess the data
task_id = 3022
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.2323


# Task ID66: 9985

In [None]:
# Load and preprocess the data
task_id = 9985
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4436


# Task ID67: 9910

In [None]:
# Load and preprocess the data
task_id = 9910
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.7630


# Task ID68: 14970

In [None]:
# Load and preprocess the data
task_id = 14970
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9738


# Task ID69: 3021

In [None]:
# Load and preprocess the data
task_id = 3021
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9497


# Task ID70: 3481

In [None]:
# Load and preprocess the data
task_id = 3481
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9449


# Task ID71: 3573

In [None]:
# Load and preprocess the data
task_id = 3573
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9059


# Task ID72: 146824

In [None]:
# Load and preprocess the data
task_id = 146824
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9650


# Task ID73: 146820

In [None]:
# Load and preprocess the data
task_id = 146820
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9421


# Task ID74: 146822

In [None]:
# Load and preprocess the data
task_id = 146822
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8333


# Task ID75: 146195

In [None]:
# Load and preprocess the data
task_id = 146195
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6585


# Task ID76: 146800

In [None]:
# Load and preprocess the data
task_id = 146800
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.1481


# Task ID77: 146817

In [None]:
# Load and preprocess the data
task_id = 146817
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4473


# Task ID78: 146819

In [None]:
# Load and preprocess the data
task_id = 146819
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9074


# Task ID79: 146821

In [None]:
# Load and preprocess the data
task_id = 146821
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.6676


# Task ID80: 14954

In [None]:
# Load and preprocess the data
task_id = 14954
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.4444


# Task ID81: 167141

In [None]:
# Load and preprocess the data
task_id = 167141
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.8620


# Task ID82: 167140

In [None]:
# Load and preprocess the data
task_id = 167140
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9342


# Task ID83: 167125

In [None]:
# Load and preprocess the data
task_id = 167125
X, y, cat_columns, num_columns = load_data(task_id)
X, label_encoders = preprocess_data(X, cat_columns)

# Encode the target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)  # Encode the target labels to integers

params = get_random_parameters(seed=42)
params['cat_idx'] = [X.columns.get_loc(c) for c in cat_columns]
params['num_idx'] = [X.columns.get_loc(c) for c in num_columns if c in X.columns]
params['target'] = np.unique(y_encoded)  # Add the unique targets for the output layer size

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model, accuracy = train_model(X_train, y_train, X_val, y_val, params)
print(f'Validation Accuracy: {accuracy:.4f}')


Validation Accuracy: 0.9573


# Task ID84: 167124

# Task ID85: 167121

# VIME

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import openml

class VIMESelf(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, input_dim)
        self.mask_layer = nn.Linear(input_dim, input_dim)
        self.feat_layer = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        x = F.relu(self.input_layer(x))
        out_mask = torch.sigmoid(self.mask_layer(x))
        out_feat = torch.sigmoid(self.feat_layer(x))
        return out_mask, out_feat


class VIMESemi(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=100, n_layers=5):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.layers = nn.ModuleList(
            [nn.Linear(hidden_dim, hidden_dim) for _ in range(n_layers - 1)]
        )
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.input_layer(x))
        for layer in self.layers:
            x = F.relu(layer(x))
        out = self.output_layer(x)
        return out


class VIME:
    def __init__(self, params, args):
        self.params = params
        self.args = args
        self.model_self = VIMESelf(args['num_features']).to(args['device'])
        self.model_semi = VIMESemi(args['num_features'], args['num_classes']).to(args['device'])

        if args['data_parallel']:
            self.model_self = nn.DataParallel(self.model_self)
            self.model_semi = nn.DataParallel(self.model_semi)

    def fit(self, X, y, X_val=None, y_val=None):
        self.fit_self(X)
        return self.fit_semi(X, y, X_val, y_val)

    def fit_self(self, X):
        optimizer = optim.RMSprop(self.model_self.parameters(), lr=0.001)
        loss_func_mask = nn.BCELoss()
        loss_func_feat = nn.MSELoss()

        m_unlab = self.mask_generator(0.3, X)
        m_label, x_tilde = self.pretext_generator(m_unlab, X)

        x_tilde = torch.tensor(x_tilde).float().to(self.args['device'])
        m_label = torch.tensor(m_label).float().to(self.args['device'])
        X = torch.tensor(X).float().to(self.args['device'])
        train_dataset = TensorDataset(x_tilde, m_label, X)
        train_loader = DataLoader(train_dataset, batch_size=self.args['batch_size'], shuffle=True)

        for epoch in range(10):
            for batch_X, batch_mask, batch_feat in train_loader:
                out_mask, out_feat = self.model_self(batch_X)
                loss_mask = loss_func_mask(out_mask, batch_mask)
                loss_feat = loss_func_feat(out_feat, batch_feat)
                loss = loss_mask + loss_feat * self.params['alpha']

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

    def fit_semi(self, X, y, X_val=None, y_val=None):
        optimizer = optim.AdamW(self.model_semi.parameters())
        X = torch.tensor(X).float().to(self.args['device'])
        y = torch.tensor(y).long().to(self.args['device'])
        X_val = torch.tensor(X_val).float().to(self.args['device'])
        y_val = torch.tensor(y_val).long().to(self.args['device'])

        loss_func_supervised = nn.CrossEntropyLoss()

        train_dataset = TensorDataset(X, y)
        train_loader = DataLoader(train_dataset, batch_size=self.args['batch_size'], shuffle=True)

        for epoch in range(self.args['epochs']):
            for batch_X, batch_y in train_loader:
                y_hat = self.model_semi(batch_X)

                loss = loss_func_supervised(y_hat, batch_y)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # Evaluation on validation set
        with torch.no_grad():
            val_preds = self.model_semi(X_val)
            val_preds = torch.argmax(val_preds, dim=1)
            accuracy = (val_preds == y_val).float().mean().item()
            return accuracy

    @staticmethod
    def mask_generator(p_m, x):
        mask = np.random.binomial(1, p_m, x.shape)
        return mask

    @staticmethod
    def pretext_generator(m, x):
        no, dim = x.shape
        x_bar = np.zeros([no, dim])
        for i in range(dim):
            idx = np.random.permutation(no)
            x_bar[:, i] = x[idx, i]

        x_tilde = x * (1 - m) + x_bar * m
        m_new = 1 * (x != x_tilde)
        return m_new, x_tilde

def load_openml_data(task_id):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Identify categorical columns (exclude numeric columns)
    categorical_columns = X.select_dtypes(include=['object', 'category']).columns

    # Apply One-Hot Encoding to categorical columns
    X = pd.get_dummies(X, columns=categorical_columns, drop_first=True)

    # Handle non-finite values by filling NaNs with zeros (or use an appropriate strategy for your case)
    X.replace([np.inf, -np.inf], np.nan, inplace=True)
    X.fillna(0, inplace=True)

    # Ensure all columns in X are numeric
    X = X.apply(pd.to_numeric, errors='coerce')
    X.fillna(0, inplace=True)

    # Check for non-numeric columns
    non_numeric_columns = X.select_dtypes(exclude=['number']).columns
    if len(non_numeric_columns) > 0:
        # Drop non-numeric columns
        X = X.drop(columns=non_numeric_columns)

    # Convert y to numeric if it's categorical
    if y.dtype == 'object' or isinstance(y.dtype, pd.CategoricalDtype):
        y = pd.factorize(y)[0]

    # Ensure y is a numpy array and has the correct type
    y = np.asarray(y).astype(np.int64)

    # Final check for non-numeric types in X
    if not np.issubdtype(X.values.dtype, np.number):
        raise ValueError("There are still non-numeric columns in the dataset after conversion.")

    return X.to_numpy(), y


# Task ID1: 14965

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 14965  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8857


# Task ID2: 9977

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9977  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9479


# Task ID3: 34539

In [None]:
# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 34539  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.9437


# Task ID4: 146606

In [None]:
# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146606  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6869


# Task ID5: 7592



In [None]:
# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 7592  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7945


# Task ID6: 146195



In [None]:
# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146195  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.6569


# Task ID7: 167119



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 167119 # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8648


# Task ID8: 167120



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 167120  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.5195


# Task ID11: 	168331



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 168331  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6197


# Task ID12: 	168330



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 168330  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6627


# Task ID13: 	168335



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 168335  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9218



# Task ID16: 	146212



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146212  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9977


# Task ID19: 	168868



In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 168868  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9803


# Task ID20: 31

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 31  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6800


# Task ID21: 10101

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 10101  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7533


# Task ID22: 	3913

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3913  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7619


# Task ID23: 3

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.5312


# Task ID24: 3917

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3917  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8436


# Task ID25: 9957

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9957  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8673


# Task ID26: 9946

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9946  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9649


# Task ID27: 3918

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3918  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9144


# Task ID28: 3903

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3903  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8978


# Task ID29: 37

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 37  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7403


# Task ID30: 9971

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9971  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7436


# Task ID31: 9952

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9952  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8511


# Task ID32: 3902

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3902  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7842


# Task ID33: 49

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 49  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.6510


# Task ID34: 43

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 43  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9338


# Task ID35: 9978

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9978  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9349


# Task ID36: 10093

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 10093  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 1.0000


# Task ID37: 219

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 219  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7723


# Task ID38: 9976

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9976  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.4942


# Task ID39: 6

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 6  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9143


# Task ID40: 53

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 53  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.5176


# Task ID41: 11

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 11  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9840


# Task ID42: 15

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 15  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9857


# Task ID43: 16

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 16  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9725


# Task ID44: 14

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 14  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6975


# Task ID45: 32

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 32  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9914


# Task ID46: 3549

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id =  3549  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9941


# Task ID47: 12

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 12  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8675


# Task ID48: 9981

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9981  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9398


# Task ID49: 18

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 18  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.3050


# Task ID50: 28

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 28  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9564


# Task ID51: 2074

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 2074  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9020


# Task ID52: 29

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 29  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6304


# Task ID53: 45

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 45  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.5000


# Task ID54: 125922

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 125922  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9164


# Task ID55: 9960

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9960  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8718


# Task ID56: 9964

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9964  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8934


# Task ID57: 22

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 22  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8325


# Task ID58: 2079

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 2079  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.4730


# Task ID59: 14969

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 14969  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.4678


# Task ID60: 3560

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3560  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.2438


# Task ID61: 14952

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 14952  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.5676


# Task ID62: 125920

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 125920  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.5800


# Task ID63: 23

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 23  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.4949


# Task ID64: 3904

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3904  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8066


# Task ID65: 3022

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3022  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.6970


# Task ID66: 9985

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9985  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.5245


# Task ID67: 9910

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 9910  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.7816


# Task ID68: 14970

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 14970  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9107


# Task ID69: 3021

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3021  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9497


# Task ID70: 3481

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3481  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9064


# Task ID71: 3573

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 3573  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


# Task ID72: 146824

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146824  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9675


# Task ID73: 146820

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146820  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9669


# Task ID74: 146822

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146822  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8745


# Task ID75: 146195

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146195  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.6569


# Task ID76: 146800

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146800  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.5093


# Task ID77: 146817

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146817  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.3753


# Task ID78: 146819

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146819  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.9074


# Task ID79: 146821

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 146821 # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.6792


# Task ID80: 14954

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 14954  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.4444


# Task ID81: 167141

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 167141 # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8650


# Task ID82: 167140

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 167140  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")




Validation Accuracy: 0.4969


# Task ID83: 167125

In [None]:


# Main execution
if __name__ == "__main__":
    # Specify OpenML task ID
    task_id = 167125  # Replace with your desired task ID
    X, y = load_openml_data(task_id)

    # Determine number of features and classes from the dataset
    num_features = X.shape[1]
    num_classes = len(np.unique(y))

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define parameters and arguments
    params = {
        "p_m": 0.5,
        "alpha": 3.0,
    }
    args = {
        "num_features": num_features,
        "num_classes": num_classes,
        "batch_size": 32,
        "epochs": 10,
        "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        "data_parallel": False,
    }

    # Initialize and train VIME model
    model = VIME(params, args)
    accuracy = model.fit(X_train, y_train, X_val, y_val)

    # Print only validation accuracy
    print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8933


# Task ID84: 167124

# Task ID85: 167121

In [None]:
pip install pandas scikit-learn openml pymfe tqdm

Collecting openml
  Downloading openml-0.15.0-py3-none-any.whl.metadata (9.9 kB)
Collecting pymfe
  Downloading pymfe-0.4.3-py3-none-any.whl.metadata (14 kB)
Collecting liac-arff>=2.4.0 (from openml)
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xmltodict (from openml)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting minio (from openml)
  Downloading minio-7.2.10-py3-none-any.whl.metadata (6.5 kB)
Collecting texttable (from pymfe)
  Downloading texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting igraph>=0.10.1 (from pymfe)
  Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting gower (from pymfe)
  Downloading gower-0.1.2-py3-none-any.whl.metadata (3.7 kB)
Collecting pycryptodome (from minio->openml)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading op

In [None]:
from pathlib import Path
import openml
import pandas as pd
from pymfe.mfe import MFE
from tqdm import tqdm
import warnings

# Suppress precision and invalid value warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)


# Meta-feature extraction settings
groups = ["landmarking", "general", "statistical", "model-based", "info-theory", "relative"]
summary_funcs = ["mean", "sd", "count", "histogram", "iq_range", "kurtosis", "max", "median", "min", "quantiles", "range", "skewness"]
scoring = "balanced-accuracy"

def featurize_dataset(task_id):
    # Load data using OpenML task ID
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

    # Convert to numpy arrays
    X = X.to_numpy()
    y = y.to_numpy()

    # Identify categorical columns
    categorical_cols = list(dataset.get_features_by_type('nominal', [dataset.default_target_attribute]))

    # Check if the dataset is classification
    if dataset.qualities["NumberOfClasses"] <= 1:
        print("Unsupported target type. Skipping.")
        return None

    print(f"Processing dataset: {dataset.name}")
    metafeats = []

    # Extract metafeatures
    mfe = MFE(groups=groups, summary=summary_funcs, random_state=0, score=scoring)
    mfe.fit(X, y, cat_cols=categorical_cols, transform_num=False, transform_cat=None)
    ft = mfe.extract()

    # Consolidate results
    fold_metafeats = {"dataset_name": dataset.name}
    for group in groups:
        ft_group = mfe.parse_by_group(group, ft)
        fold_metafeats.update(
            {f"f__pymfe.{group}.{name}": value for name, value in zip(*ft_group)}
        )
    metafeats.append(fold_metafeats)

    return metafeats

def featurize_all_datasets(task_ids):
    output_file = Path("metafeatures.csv")
    if output_file.exists():
        computed_features = pd.read_csv(output_file)
        computed_features.set_index("dataset_name", inplace=True)
    else:
        computed_features = None

    for task_id in task_ids:
        dataset_name = openml.tasks.get_task(task_id).get_dataset().name
        if computed_features is not None and dataset_name in computed_features.index:
            continue

        print(f"Featurizing task ID: {task_id}")
        dataset_metafeatures = featurize_dataset(task_id)
        if dataset_metafeatures is None:
            continue

        dataset_metafeatures = pd.DataFrame(dataset_metafeatures)
        dataset_metafeatures.set_index("dataset_name", inplace=True)

        if computed_features is None:
            computed_features = dataset_metafeatures
            computed_features = computed_features[sorted(computed_features.columns)]
        else:
            computed_features = pd.concat([dataset_metafeatures, computed_features])

        print("Writing. Do not interrupt...")
        computed_features.to_csv(output_file)

# Specify OpenML task IDs
task_ids = [14965, 9977, 34539, 146606, 7592, 146195, 167119, 167120, 168331, 168330, 168335, 146212,
            168868, 31, 10101, 3913, 3917, 9957, 9946, 3918,
            3903, 37, 9971, 9952, 3902, 49, 43, 9978, 10093, 219, 9976, 6, 53, 11, 15, 16, 14, 32, 3549,
            12, 9981, 18, 28, 2074, 29, 45, 125922, 9960, 9964, 22, 2079, 14969, 3560, 14952, 125920, 23,
            3904, 3022, 9985, 9910, 14970, 3021, 3481, 146824, 146820, 146822, 146195, 146800, 146817,
            146819, 146821, 14954, 167141, 167140, 167125]
featurize_all_datasets(task_ids)




Featurizing task ID: 14965
Processing dataset: bank-marketing


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
  res = hypotest_fun_out(*samples, **kwds)


Writing. Do not interrupt...
Featurizing task ID: 9977
Processing dataset: nomao


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 34539
Processing dataset: Amazon_employee_access
Writing. Do not interrupt...
Featurizing task ID: 146606
Processing dataset: higgs


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 7592
Processing dataset: adult


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
TypeError("'<' not supported between instances of 'float' and 'str'").


Writing. Do not interrupt...
Featurizing task ID: 146195
Processing dataset: connect-4
Writing. Do not interrupt...
Featurizing task ID: 167119
Processing dataset: jungle_chess_2pcs_raw_endgame_complete


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
  res = hypotest_fun_out(*samples, **kwds)


Writing. Do not interrupt...
Featurizing task ID: 167120
Processing dataset: numerai28.6


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 168331
Processing dataset: volkert


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
  res = hypotest_fun_out(*samples, **kwds)


Writing. Do not interrupt...
Featurizing task ID: 168330
Processing dataset: jannis


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 168335
Processing dataset: MiniBooNE


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146212
Processing dataset: shuttle


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 168868
Processing dataset: APSFailure


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 31
Processing dataset: credit-g


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 10101
Processing dataset: blood-transfusion-service-center


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3913
Processing dataset: kc2


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3917
Processing dataset: kc1


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9957
Processing dataset: qsar-biodeg


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9946
Processing dataset: wdbc


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3918
Processing dataset: pc1


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3903
Processing dataset: pc3


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 37
Processing dataset: diabetes


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9971
Processing dataset: ilpd


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9952
Processing dataset: phoneme


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3902
Processing dataset: pc4


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 49
Processing dataset: tic-tac-toe
Writing. Do not interrupt...
Featurizing task ID: 43
Processing dataset: spambase


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9978
Processing dataset: ozone-level-8hr


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 10093
Processing dataset: banknote-authentication


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 219
Processing dataset: electricity


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
  res = hypotest_fun_out(*samples, **kwds)


Writing. Do not interrupt...
Featurizing task ID: 9976
Processing dataset: madelon


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 6
Processing dataset: letter


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 53
Processing dataset: vehicle


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 11
Processing dataset: balance-scale


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 15
Processing dataset: breast-w


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 16
Processing dataset: mfeat-karhunen


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 14
Processing dataset: mfeat-fourier


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 32
Processing dataset: pendigits


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3549
Processing dataset: analcatdata_authorship


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 12
Processing dataset: mfeat-factors


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9981
Processing dataset: cnae-9


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 18
Processing dataset: mfeat-morphological


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 28
Processing dataset: optdigits


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
  res = hypotest_fun_out(*samples, **kwds)


Writing. Do not interrupt...
Featurizing task ID: 2074
Processing dataset: satimage


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 29
Processing dataset: credit-approval


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
TypeError("'<' not supported between instances of 'str' and 'float'").


Writing. Do not interrupt...
Featurizing task ID: 45
Processing dataset: splice
Writing. Do not interrupt...
Featurizing task ID: 125922
Processing dataset: texture


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9960
Processing dataset: wall-robot-navigation


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9964
Processing dataset: semeion


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 22
Processing dataset: mfeat-zernike


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 2079
Processing dataset: eucalyptus


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 14969
Processing dataset: GesturePhaseSegmentationProcessed


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3560
Processing dataset: analcatdata_dmft
Writing. Do not interrupt...
Featurizing task ID: 14952
Processing dataset: PhishingWebsites
Writing. Do not interrupt...
Featurizing task ID: 125920
Processing dataset: dresses-sales


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
TypeError("'<' not supported between instances of 'float' and 'str'").


Writing. Do not interrupt...
Featurizing task ID: 23
Processing dataset: cmc


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3904
Processing dataset: jm1


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3022
Processing dataset: vowel


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9985
Processing dataset: first-order-theorem-proving


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 9910
Processing dataset: Bioresponse


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 14970
Processing dataset: har


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 3021
Processing dataset: sick


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
TypeError("'<' not supported between instances of 'float' and 'str'").


Writing. Do not interrupt...
Featurizing task ID: 3481
Processing dataset: isolet


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146824
Processing dataset: mfeat-pixel


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146820
Processing dataset: wilt


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146822
Processing dataset: segment


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146800
Processing dataset: MiceProtein


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146817
Processing dataset: steel-plates-fault


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146819
Processing dataset: climate-model-simulation-crashes


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 146821
Processing dataset: car
Writing. Do not interrupt...
Featurizing task ID: 14954
Processing dataset: cylinder-bands


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").
TypeError("'<' not supported between instances of 'float' and 'str'").


Writing. Do not interrupt...
Featurizing task ID: 167141
Processing dataset: churn


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...
Featurizing task ID: 167140
Processing dataset: dna
Writing. Do not interrupt...
Featurizing task ID: 167125
Processing dataset: Internet-Advertisements


TypeError("OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'").


Writing. Do not interrupt...


In [None]:
!pip list --format=freeze > requirements.txt