# GBDTs: XGBoost, CatBoost and LightGBM

In [None]:
pip install pandas scikit-learn openml

Collecting openml
  Downloading openml-0.15.0-py3-none-any.whl.metadata (9.9 kB)
Collecting liac-arff>=2.4.0 (from openml)
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xmltodict (from openml)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting minio (from openml)
  Downloading minio-7.2.10-py3-none-any.whl.metadata (6.5 kB)
Collecting pycryptodome (from minio->openml)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading openml-0.15.0-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.0/158.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading minio-7.2.10-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.9/93.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Downloading pyc

In [None]:
pip install catboost xgboost lightgbm

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
pip install dask[dataframe]

Collecting dask-expr<1.2,>=1.1 (from dask[dataframe])
  Downloading dask_expr-1.1.17-py3-none-any.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of dask-expr to determine which version is compatible with other requirements. This could take a while.
  Downloading dask_expr-1.1.16-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.15-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.14-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.13-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.12-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.11-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.10-py3-none-any.whl.metadata (2.5 kB)
Downloading dask_expr-1.1.10-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.2/242.2 kB[0m [31m468.8 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dask-expr
Successfully installed dask-expr-1.1.10


In [None]:
import numpy as np
import pandas as pd
import openml
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, mean_squared_error
from catboost import CatBoostClassifier, CatBoostRegressor
from xgboost import XGBClassifier, XGBRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score





# Preprocessing

In [None]:
def load_preprocess_task(task_id, model_type="xgboost", task_type="classification", target_encode=None):
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=task.target_name)

    print(f"Dataset shape: {X.shape}")

    # Encode target variable if classification
    if target_encode or (target_encode is None and task_type == "classification"):
        le = LabelEncoder()
        y = le.fit_transform(y)

    # Define feature columns
    categorical_cols = X.select_dtypes(include=['object', 'category']).columns
    numeric_cols = X.select_dtypes(include=['number']).columns
    cat_features = []

    # Define transformers for numeric and categorical columns
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

    # Configure encoders based on model type
    if model_type == "xgboost":
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])
    elif model_type == "catboost":
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('ordinal', OrdinalEncoder())
        ])
        cat_features = [X.columns.get_loc(col) for col in categorical_cols]
    elif model_type == "lightgbm":
        # Ensure one-hot encoding for LightGBM as it doesn't handle categorical strings natively
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])
        cat_features = [X.columns.get_loc(col) for col in categorical_cols]

    # Create preprocessor and preprocess data
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_cols),
            ('cat', categorical_transformer, categorical_cols)
        ])
    X_preprocessed = preprocessor.fit_transform(X)

    return X_preprocessed, y, cat_features if model_type in ["catboost", "lightgbm"] else None




# Cross Validation

In [None]:
def cross_validate_model(model, X, y, task_type="classification", n_folds=10):
    if task_type == "classification":
        cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = accuracy_score
    elif task_type == "regression":
        cv = KFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = mean_squared_error
    else:
        raise ValueError("Invalid task type. Use 'classification' or 'regression'.")

    scores = []
    for train_index, test_index in cv.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        score = scoring_func(y_test, y_pred)
        if task_type == "regression":
            score = np.sqrt(score)  # RMSE

        scores.append(score)

    avg_score = np.mean(scores)
    metric_name = "Accuracy" if task_type == "classification" else "RMSE"
    print(f"Average {metric_name}: {avg_score:.4f}")

    return avg_score





# XGBoost

In [None]:
def get_random_xgboost_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12))))),
        "alpha": np.power(10, rs.uniform(-8, 0)),
        "lambda": np.power(10, rs.uniform(-8, 0)),
        "eta": 3.0 * np.power(10, rs.uniform(-2, -1)),
    }
    return params

def run_xgboost(X, y, seed=42, task_type="classification"):
    params = get_random_xgboost_parameters(seed)
    model = XGBClassifier(**params) if task_type == "classification" else XGBRegressor(**params)
    scoring = 'accuracy' if task_type == "classification" else 'neg_root_mean_squared_error'

    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"XGBoost with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")




# CatBoost

In [None]:
def get_random_catboost_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "learning_rate": 3.0 * np.power(10, rs.uniform(-2, -1)),
        "max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12))))),
        "l2_leaf_reg": 0.5 * np.power(60, rs.uniform(0, 1)),
    }
    return params

def run_catboost(X, y, seed=42, task_type="classification", num_classes=None):
    params = get_random_catboost_parameters(seed)  # Removed task_type

    if task_type == "regression":
        model = CatBoostRegressor(iterations=1000, verbose=0, **params)
        scoring = 'neg_root_mean_squared_error'
    elif task_type == "classification":
        model = CatBoostClassifier(iterations=1000, classes_count=num_classes, verbose=0, **params)
        scoring = 'accuracy'

    # Perform cross-validation with suppressed verbosity
    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"CatBoost with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")




# LightGBM

In [None]:
def get_random_lightgbm_parameters(seed):
    rs = np.random.RandomState(seed)
    params = {
        "num_leaves": int(np.round(np.power(2, rs.uniform(1, 12)))),
        "lambda_l1": np.power(10, rs.uniform(-8, 1)),
        "lambda_l2": np.power(10, rs.uniform(-8, 1)),
        "learning_rate": 3.0 * np.power(10, rs.uniform(-2, 1)),
    }
    return params


def run_lightgbm(X, y, seed=42, task_type="classification"):
    params = get_random_lightgbm_parameters(seed)

    # Determine num_classes based on unique target values for LightGBM compatibility
    num_classes = len(np.unique(y))

    if task_type == "regression":
        params["objective"] = "regression"
        params["metric"] = "mse"
        model = LGBMRegressor(**params, verbose=-1)
        scoring = 'neg_root_mean_squared_error'
    elif task_type == "classification":
        params["objective"] = "multiclass" if num_classes > 2 else "binary"
        params["num_class"] = num_classes if num_classes > 2 else None
        model = LGBMClassifier(**params, verbose=-1)
        scoring = 'accuracy'

    # Perform cross-validation
    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"LightGBM with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")


# Task ID1: 14965

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14965)  # Replace with an actual task ID

Dataset shape: (45211, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7360 ± 0.1495


In [None]:
run_catboost(X, y, seed=42)


CatBoost with random params: {'learning_rate': 0.07106591851092234, 'max_depth': 11, 'l2_leaf_reg': 10.013039911391246}
10-fold CV accuracy: 0.6491 ± 0.1581


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7733 ± 0.1460


# Task ID2: 9977

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9977)  # Replace with an actual task ID


Dataset shape: (34465, 118)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9488 ± 0.0254


In [None]:
run_catboost(X, y, seed=42)


CatBoost with random params: {'learning_rate': 0.07106591851092234, 'max_depth': 11, 'l2_leaf_reg': 10.013039911391246}
10-fold CV accuracy: 0.9531 ± 0.0224


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8318 ± 0.0689


# Task ID3: 34539

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(34539)  # Replace with an actual task ID


Dataset shape: (32769, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9431 ± 0.0006


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9131 ± 0.0140


# Task ID4: 146606

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146606)  # Replace with an actual task ID


Dataset shape: (98050, 28)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7192 ± 0.0041


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6664 ± 0.0094


# Task ID5: 7592



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(7592)  # Replace with an actual task ID


Dataset shape: (48842, 14)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8721 ± 0.0031


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6680 ± 0.1565


# Task ID6: 146195



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6903 ± 0.0501


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4384 ± 0.1732


# Task ID7: 167119



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167119)  # Replace with an actual task ID


Dataset shape: (44819, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7006 ± 0.0713


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4532 ± 0.1107


# Task ID8: 167120



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167120)  # Replace with an actual task ID


Dataset shape: (96320, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5177 ± 0.0038


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5047 ± 0.0052


# Task ID9: 146825



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146825)  # Replace with an actual task ID


Dataset shape: (70000, 784)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5047 ± 0.0052


# Task ID10: 	3945



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3945)  # Replace with an actual task ID


Dataset shape: (50000, 230)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9821 ± 0.0001


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9682 ± 0.0092


# Task ID11: 	168331



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168331)  # Replace with an actual task ID


Dataset shape: (58310, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6448 ± 0.0050


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.1990 ± 0.0474


# Task ID12: 	168330



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168330)  # Replace with an actual task ID


Dataset shape: (83733, 54)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7014 ± 0.0031


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID13: 	168335



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168335)  # Replace with an actual task ID


Dataset shape: (130064, 50)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9355 ± 0.0020


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8926 ± 0.0137


# Task ID14: 	168332



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168332)  # Replace with an actual task ID


Dataset shape: (10000, 7200)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID15: 	168337



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168337)  # Replace with an actual task ID


Dataset shape: (20000, 4296)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

Exception ignored on calling ctypes callback function: <bound method DataIter._next_wrapper of <xgboost.data.SingleBatchInternalIter object at 0x781c108b4400>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 582, in _next_wrapper
    def _next_wrapper(self, this: None) -> int:  # pylint: disable=unused-argument
KeyboardInterrupt: 
Exception ignored on calling ctypes callback function: <bound method DataIter._next_wrapper of <xgboost.data.SingleBatchInternalIter object at 0x781c108b4a00>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 582, in _next_wrapper
    def _next_wrapper(self, this: None) -> int:  # pylint: disable=unused-argument
KeyboardInterrupt: 


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751



# Task ID16: 	146212



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146212)  # Replace with an actual task ID


Dataset shape: (58000, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9999 ± 0.0001


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 7}
10-fold CV accuracy: 0.8166 ± 0.1577


# Task ID17: 	168329



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168329)  # Replace with an actual task ID


Dataset shape: (65196, 27)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID18: 	168338



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168338)  # Replace with an actual task ID


Dataset shape: (20000, 4296)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

KeyboardInterrupt: 

In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4279 ± 0.0751


# Task ID19: 	168868



In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(168868)  # Replace with an actual task ID


Dataset shape: (76000, 170)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9944 ± 0.0007


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9757 ± 0.0072


# Task ID20: 31

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(31)  # Replace with an actual task ID


Dataset shape: (1000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7560 ± 0.0310


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7030 ± 0.0650


# Task ID21: 10101

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(10101)  # Replace with an actual task ID


Dataset shape: (748, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7127 ± 0.1362


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6781 ± 0.1610


# Task ID22: 	3913

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3913)  # Replace with an actual task ID


Dataset shape: (522, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7966 ± 0.0775


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7910 ± 0.1004


# Task ID23: 3

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3)  # Replace with an actual task ID


Dataset shape: (3196, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9687 ± 0.0289


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8393 ± 0.1783


# Task ID24: 3917

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3917)  # Replace with an actual task ID


Dataset shape: (2109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8308 ± 0.0354


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7942 ± 0.0312


# Task ID25: 9957

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9957)  # Replace with an actual task ID


Dataset shape: (1055, 41)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8511 ± 0.0545


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8019 ± 0.0565


# Task ID26: 9946

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9946)  # Replace with an actual task ID


Dataset shape: (569, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9701 ± 0.0208


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9526 ± 0.0272


# Task ID27: 3918

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3918)  # Replace with an actual task ID


Dataset shape: (1109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9297 ± 0.0311


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8765 ± 0.0353


# Task ID28: 3903

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3903)  # Replace with an actual task ID


Dataset shape: (1563, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8925 ± 0.0144


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8421 ± 0.0472


# Task ID29: 37

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(37)  # Replace with an actual task ID


Dataset shape: (768, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7578 ± 0.0416


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7005 ± 0.0261


# Task ID30: 9971

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9971)  # Replace with an actual task ID


Dataset shape: (583, 10)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6793 ± 0.0824


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6211 ± 0.0865


# Task ID31: 9952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9952)  # Replace with an actual task ID


Dataset shape: (5404, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8769 ± 0.0154


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7089 ± 0.0997


# Task ID32: 3902

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3902)  # Replace with an actual task ID


Dataset shape: (1458, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9108 ± 0.0126


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8560 ± 0.0283


# Task ID33: 49

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(49)  # Replace with an actual task ID


Dataset shape: (958, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8779 ± 0.1370


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8237 ± 0.1105


# Task ID34: 43

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(43)  # Replace with an actual task ID


Dataset shape: (4601, 57)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9422 ± 0.0324


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8111 ± 0.0454


# Task ID35: 9978

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9978)  # Replace with an actual task ID


Dataset shape: (2534, 72)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9325 ± 0.0174


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9069 ± 0.0168


# Task ID36: 10093

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(10093)  # Replace with an actual task ID


Dataset shape: (1372, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9956 ± 0.0048


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9818 ± 0.0188


# Task ID37: 219

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(219)  # Replace with an actual task ID


Dataset shape: (45312, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7299 ± 0.0614


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7130 ± 0.0965


# Task ID38: 9976

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9976)  # Replace with an actual task ID


Dataset shape: (2600, 500)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7835 ± 0.0221


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.6935 ± 0.0315


# Task ID39: 6

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(6)  # Replace with an actual task ID


Dataset shape: (20000, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9321 ± 0.0028


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 26}
10-fold CV accuracy: 0.1738 ± 0.0415


# Task ID40: 53

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(53)  # Replace with an actual task ID


Dataset shape: (846, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7696 ± 0.0277


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.4054 ± 0.0701


# Task ID41: 11

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(11)  # Replace with an actual task ID


Dataset shape: (625, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7426 ± 0.1043


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.8131 ± 0.0642


# Task ID42: 15

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(15)  # Replace with an actual task ID


Dataset shape: (699, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9586 ± 0.0322


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9529 ± 0.0313


# Task ID43: 16

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(16)  # Replace with an actual task ID


Dataset shape: (2000, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9460 ± 0.0118


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3185 ± 0.1023


# Task ID44: 14

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14)  # Replace with an actual task ID


Dataset shape: (2000, 76)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8380 ± 0.0176


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.2800 ± 0.0680


# Task ID45: 32

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(32)  # Replace with an actual task ID


Dataset shape: (10992, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9889 ± 0.0031


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3439 ± 0.1041


# Task ID46: 3549

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3549)  # Replace with an actual task ID


Dataset shape: (841, 70)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9857 ± 0.0139


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.9013 ± 0.0547


# Task ID47: 12

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(12)  # Replace with an actual task ID


Dataset shape: (2000, 216)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9600 ± 0.0140


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.4495 ± 0.1933


# Task ID48: 9981

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9981)  # Replace with an actual task ID


Dataset shape: (1080, 856)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9176 ± 0.0308


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 9}
10-fold CV accuracy: 0.1278 ± 0.0522


# Task ID49: 18

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(18)  # Replace with an actual task ID


Dataset shape: (2000, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7065 ± 0.0166


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.2690 ± 0.0521


# Task ID50: 28

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(28)  # Replace with an actual task ID


Dataset shape: (5620, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9733 ± 0.0092


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3534 ± 0.1022


# Task ID51: 2074

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(2074)  # Replace with an actual task ID


Dataset shape: (6430, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9106 ± 0.0123


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.3664 ± 0.0641


# Task ID52: 29

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(29)  # Replace with an actual task ID


Dataset shape: (690, 15)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8391 ± 0.1284


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8087 ± 0.1184


# Task ID53: 45

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(45)  # Replace with an actual task ID


Dataset shape: (3190, 60)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9583 ± 0.0141


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.5639 ± 0.0946


# Task ID54: 125922

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(125922)  # Replace with an actual task ID


Dataset shape: (5500, 40)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9842 ± 0.0056


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 11}
10-fold CV accuracy: 0.4633 ± 0.1052


# Task ID55: 9960

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9960)  # Replace with an actual task ID


Dataset shape: (5456, 24)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9941 ± 0.0086


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.7197 ± 0.2254


# Task ID56: 9964

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9964)  # Replace with an actual task ID


Dataset shape: (1593, 256)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9272 ± 0.0229


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3132 ± 0.0912


# Task ID57: 22

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(22)  # Replace with an actual task ID


Dataset shape: (2000, 47)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7790 ± 0.0221


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3195 ± 0.0914


# Task ID58: 2079

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(2079)  # Replace with an actual task ID


Dataset shape: (736, 19)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5601 ± 0.0555


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 5}
10-fold CV accuracy: 0.2785 ± 0.0715


# Task ID59: 14969

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14969)  # Replace with an actual task ID


Dataset shape: (9873, 32)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.4783 ± 0.0596


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 5}
10-fold CV accuracy: 0.2969 ± 0.0562


# Task ID60: 3560

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3560)  # Replace with an actual task ID


Dataset shape: (797, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.1896 ± 0.0429


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.1595 ± 0.0293


# Task ID61: 14952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14952)  # Replace with an actual task ID


Dataset shape: (11055, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9557 ± 0.0059


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8177 ± 0.1445


# Task ID62: 125920

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(125920)  # Replace with an actual task ID


Dataset shape: (500, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6020 ± 0.0433


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5620 ± 0.0745


# Task ID63: 23

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(23)  # Replace with an actual task ID


Dataset shape: (1473, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5601 ± 0.0400


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4351 ± 0.0485


# Task ID64: 3904

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3904)  # Replace with an actual task ID


Dataset shape: (10885, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8066 ± 0.0187


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7150 ± 0.0319


# Task ID65: 3022

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3022)  # Replace with an actual task ID


Dataset shape: (990, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6545 ± 0.0697


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 11}
10-fold CV accuracy: 0.1808 ± 0.0672


# Task ID66: 9985

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9985)  # Replace with an actual task ID


Dataset shape: (6118, 51)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.5414 ± 0.0602


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.2517 ± 0.1198


# Task ID67: 9910

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(9910)  # Replace with an actual task ID


Dataset shape: (3751, 1776)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7974 ± 0.0180


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5692 ± 0.0653


# Task ID68: 14970

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14970)  # Replace with an actual task ID


Dataset shape: (10299, 561)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9555 ± 0.0293


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 6}
10-fold CV accuracy: 0.4214 ± 0.1325


# Task ID69: 3021

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3021)  # Replace with an actual task ID


Dataset shape: (3772, 29)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9913 ± 0.0052


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9327 ± 0.0411


# Task ID70: 3481

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3481)  # Replace with an actual task ID


Dataset shape: (7797, 617)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9478 ± 0.0121


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 26}
10-fold CV accuracy: 0.2706 ± 0.0419


# Task ID71: 3573

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(3573)  # Replace with an actual task ID


Dataset shape: (70000, 784)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

In [None]:
run_lightgbm(X, y, seed=42)


# Task ID72: 146824

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146824)  # Replace with an actual task ID


Dataset shape: (2000, 240)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9630 ± 0.0131


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 10}
10-fold CV accuracy: 0.3550 ± 0.0862


# Task ID73: 146820

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146820)  # Replace with an actual task ID


Dataset shape: (4839, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9845 ± 0.0111


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9653 ± 0.0158


# Task ID74: 146822

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146822)  # Replace with an actual task ID


Dataset shape: (2310, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9342 ± 0.0116


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 7}
10-fold CV accuracy: 0.4701 ± 0.1103


# Task ID75: 146195

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6903 ± 0.0501


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.4384 ± 0.1732


# Task ID76: 146800

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146800)  # Replace with an actual task ID


Dataset shape: (1080, 77)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.7093 ± 0.1207


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 8}
10-fold CV accuracy: 0.3481 ± 0.1663


# Task ID77: 146817

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146817)  # Replace with an actual task ID


Dataset shape: (1941, 27)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6353 ± 0.0864


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 7}
10-fold CV accuracy: 0.2788 ± 0.0751


# Task ID78: 146819

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146819)  # Replace with an actual task ID


Dataset shape: (540, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9463 ± 0.0255


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.9370 ± 0.0343


# Task ID79: 146821

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(146821)  # Replace with an actual task ID


Dataset shape: (1728, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.8773 ± 0.0711


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 4}
10-fold CV accuracy: 0.5679 ± 0.1789


# Task ID80: 14954

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(14954)  # Replace with an actual task ID


Dataset shape: (540, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.6241 ± 0.0837


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.5648 ± 0.1022


# Task ID81: 167141

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167141)  # Replace with an actual task ID


Dataset shape: (5000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9518 ± 0.0099


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.7800 ± 0.0865


# Task ID82: 167140

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167140)  # Replace with an actual task ID


Dataset shape: (3186, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9601 ± 0.0113


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'multiclass', 'num_class': 3}
10-fold CV accuracy: 0.5577 ± 0.1720


# Task ID83: 167125

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167125)  # Replace with an actual task ID


Dataset shape: (3279, 1558)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

XGBoost with random params: {'max_depth': 4, 'alpha': 0.4033800832600378, 'lambda': 0.00717714192799201, 'eta': 0.11906379991333114}
10-fold CV accuracy: 0.9695 ± 0.0201


In [None]:
run_lightgbm(X, y, seed=42)


LightGBM with random params: {'num_leaves': 35, 'lambda_l1': 3.6010467344475314, 'lambda_l2': 0.038720902953704145, 'learning_rate': 1.8754120723565242, 'objective': 'binary', 'num_class': None}
10-fold CV accuracy: 0.8750 ± 0.0508


# Task ID84: 167124

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167124)  # Replace with an actual task ID


Dataset shape: (60000, 3072)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

In [None]:
run_lightgbm(X, y, seed=42)


# Task ID85: 167121

In [None]:
# Load and preprocess data using OpenML Task ID
X, y, cat_features = load_preprocess_task(167121)  # Replace with an actual task ID


Dataset shape: (92000, 1024)


In [None]:
# Choose and run models with 10-fold cross-validation
run_xgboost(X, y)

In [None]:
run_lightgbm(X, y, seed=42)


In [None]:
pip freeze > requirements.txt