# Baselines: LR, KNN, SVM, DT, RF

In [None]:
pip install pandas scikit-learn openml

Collecting openml
  Downloading openml-0.15.0-py3-none-any.whl.metadata (9.9 kB)
Collecting liac-arff>=2.4.0 (from openml)
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting xmltodict (from openml)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting minio (from openml)
  Downloading minio-7.2.10-py3-none-any.whl.metadata (6.5 kB)
Collecting pycryptodome (from minio->openml)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading openml-0.15.0-py3-none-any.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.0/158.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading minio-7.2.10-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.9/93.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Downloading pyc

In [None]:
import os
import numpy as np
import pandas as pd
import openml
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold


# Set OpenML configuration directory to a temporary location
os.environ["OPENML_CONFIG"] = "/tmp/openml_config"



# Preprocessing

In [None]:
def load_preprocess_task(task_id, task_type="classification", target_encode=None, cat_feature_encode=True):
    """
    Load and preprocess dataset from OpenML based on task type (classification or regression).
    Args:
        task_id (int): OpenML task ID
        task_type (str): Either 'classification' or 'regression'
        target_encode (bool or None): Encode target if classification task (default: True if not regression)
        cat_feature_encode (bool): Whether to one-hot encode categorical features (default: True)
    Returns:
        X_preprocessed (ndarray): Preprocessed feature set
        y (ndarray): Target values (encoded for classification tasks)
    """
    # Load task from OpenML using the task ID
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, _, _ = dataset.get_data(target=task.target_name)

    print(f"Dataset shape: {X.shape}")

    # Infer and apply target encoding based on task type and target_encode flag
    is_regression = (task_type == "regression")
    if (target_encode is None and not is_regression) or target_encode:
        le = LabelEncoder()
        y = le.fit_transform(y)

    # Detect and preprocess categorical features
    categorical_cols = X.select_dtypes(include=['object', 'category']).columns
    numeric_cols = X.select_dtypes(include=['number']).columns

    # Preprocessing pipelines for numeric and categorical features
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())])

    if cat_feature_encode:
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_cols),
                ('cat', categorical_transformer, categorical_cols)])
    else:
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_cols)])

    # Apply transformations
    X_preprocessed = preprocessor.fit_transform(X)

    return X_preprocessed, y



# Cross Validation

In [None]:
def cross_validate_model(model, X, y, task_type="classification", n_folds=10):
    """
    Cross-validate model based on task type (classification or regression).
    Args:
        model: Machine learning model to train and evaluate
        X (ndarray): Feature matrix
        y (ndarray): Target vector
        task_type (str): 'classification' or 'regression'
        n_folds (int): Number of cross-validation folds
    Returns:
        avg_score (float): Average cross-validated score
    """
    if task_type == "classification":
        cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = accuracy_score
    elif task_type == "regression":
        cv = KFold(n_splits=n_folds, shuffle=True, random_state=42)
        scoring_func = mean_squared_error
    else:
        raise ValueError("Invalid task type. Use 'classification' or 'regression'.")

    scores = []
    for train_index, test_index in cv.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Calculate accuracy for classification or RMSE for regression
        score = scoring_func(y_test, y_pred)
        if task_type == "regression":
            score = np.sqrt(score)  # RMSE

        scores.append(score)

    avg_score = np.mean(scores)
    metric_name = "Accuracy" if task_type == "classification" else "RMSE"
    print(f"Average {metric_name}: {avg_score:.4f}")

    return avg_score



# Logistic Regression

In [None]:
def linear_model_trial(trial, task_type="classification"):
    if task_type == "classification":
        model = LogisticRegression(n_jobs=-1)
    else:
        raise NotImplementedError("Linear regression not implemented.")
    return model

def run_linear_model(X, y, task_type="classification"):
    model = linear_model_trial(None, task_type)
    scoring = 'accuracy' if task_type == "classification" else 'neg_root_mean_squared_error'
    scores = cross_val_score(model, X, y, cv=10, scoring=scoring)
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")


# KNN

In [None]:
def get_random_knn_parameters(seed, task_type="classification"):
    rs = np.random.RandomState(seed)
    params = {
        "n_neighbors": 1 + 2 * rs.randint(1, 21),
        "knn_alg": rs.choice(["kd_tree", "ball_tree"]),
        "leaf_size": rs.choice([30, 50, 70, 100, 300]),
    }
    return params

def run_knn(X, y, seed=42, task_type="classification"):
    params = get_random_knn_parameters(seed, task_type)
    if task_type == "classification":
        knn = KNeighborsClassifier(n_neighbors=params["n_neighbors"],
                                   algorithm=params["knn_alg"],
                                   leaf_size=params["leaf_size"],
                                   n_jobs=-1)
        scoring = 'accuracy'
    else:
        knn = KNeighborsRegressor(n_neighbors=params["n_neighbors"],
                                  algorithm=params["knn_alg"],
                                  leaf_size=params["leaf_size"],
                                  n_jobs=-1)
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(knn, X, y, cv=10, scoring=scoring)
    print(f"KNN with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")

# SVM

In [None]:
def get_random_svm_parameters(seed: int):
    # Generate random parameters for SVM
    rs = np.random.RandomState(seed)
    params = {"C": np.power(10, rs.uniform(-10, 10))}
    return params


def run_svm(X, y, seed=42, task_type="classification"):
    params = get_random_svm_parameters(seed)

    if task_type == "classification":
        svm_model = SVC(C=params["C"], probability=True)
        scoring = 'accuracy'
    else:
        svm_model = SVR(C=params["C"])
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(svm_model, X, y, cv=10, scoring=scoring)
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")

# Decision Tree

In [None]:
def get_random_decision_tree_parameters(seed, task_type="classification"):
    rs = np.random.RandomState(seed)
    params = {"max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12)))))}
    return params

def run_decision_tree(X, y, seed=42, task_type="classification"):
    params = get_random_decision_tree_parameters(seed, task_type)
    if task_type == "classification":
        dt = DecisionTreeClassifier(max_depth=params["max_depth"], random_state=seed)
        scoring = 'accuracy'
    else:
        dt = DecisionTreeRegressor(max_depth=params["max_depth"], random_state=seed)
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(dt, X, y, cv=10, scoring=scoring)
    print(f"Decision Tree with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")


# Random Forest

In [None]:
def get_random_forest_parameters(seed, task_type="classification"):
    rs = np.random.RandomState(seed)
    params = {
        "max_depth": int(np.round(np.power(2, rs.uniform(1, np.log2(12))))),
        "n_estimators": int(np.round(np.power(5, rs.uniform(1, np.log2(100) / np.log2(5)))))
    }
    return params

def run_random_forest(X, y, seed=42, task_type="classification"):
    params = get_random_forest_parameters(seed, task_type)
    if task_type == "classification":
        rf = RandomForestClassifier(n_estimators=params["n_estimators"],
                                    max_depth=params["max_depth"],
                                    random_state=seed,
                                    n_jobs=-1)
        scoring = 'accuracy'
    else:
        rf = RandomForestRegressor(n_estimators=params["n_estimators"],
                                   max_depth=params["max_depth"],
                                   random_state=seed,
                                   n_jobs=-1)
        scoring = 'neg_root_mean_squared_error'

    scores = cross_val_score(rf, X, y, cv=10, scoring=scoring)
    print(f"Random Forest with random params: {params}")
    print(f"10-fold CV {scoring}: {scores.mean():.4f} ± {scores.std():.4f}")

# Task ID1: 14965

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14965)  # Replace with an actual task ID


Dataset shape: (45211, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8142 ± 0.1492


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8398 ± 0.0424


In [None]:
run_svm(X, y, seed=42)


10-fold CV accuracy: 0.7432 ± 0.1386


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8235 ± 0.1123


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8769 ± 0.0159


# Task ID2: 9977

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9977)  # Replace with an actual task ID


Dataset shape: (34465, 118)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.9493


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.9274 ± 0.0276


In [None]:
run_svm(X, y, seed=42)


10-fold CV Accuracy: 0.9408 ± 0.0296


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.8235 ± 0.1123


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.8769 ± 0.0159


# Task ID3: 34539

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(34539)  # Replace with an actual task ID


Dataset shape: (32769, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.9472


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.9460 ± 0.0018


In [None]:
run_svm(X, y, seed=42)


10-fold CV Accuracy: 0.9482 ± 0.0011


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.9437 ± 0.0009


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.9421 ± 0.0001


# Task ID4: 146606

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146606)  # Replace with an actual task ID


Dataset shape: (98050, 28)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.6410


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6434 ± 0.0053


In [None]:
run_svm(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.6586 ± 0.0030


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.6645 ± 0.0038


# Task ID5: 7592



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(7592)  # Replace with an actual task ID


Dataset shape: (48842, 14)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.8516


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.8434 ± 0.0033


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.8432 ± 0.0032


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.8257 ± 0.0040


# Task ID6: 146195



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run classification models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.7571


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6227 ± 0.0459


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.5900 ± 0.1039


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.6601 ± 0.0034


# Task ID7: 167119



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167119)  # Replace with an actual task ID


Dataset shape: (44819, 6)


In [None]:
# Choose and run classification models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6539 ± 0.0666


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7219 ± 0.0975


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6412 ± 0.0753


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7051 ± 0.0600


# Task ID8: 167120



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167120)  # Replace with an actual task ID


Dataset shape: (96320, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.5234


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.5088 ± 0.0045


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.5177 ± 0.0036


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.5193 ± 0.0034


# Task ID10: 	3945



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3945)  # Replace with an actual task ID


Dataset shape: (50000, 230)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.9806


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.9822 ± 0.0000


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.9821 ± 0.0004


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.9822 ± 0.0000


# Task ID11: 	168331



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168331)  # Replace with an actual task ID


Dataset shape: (58310, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.5827


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6704 ± 0.0050


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.4678 ± 0.0045


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.4994 ± 0.0040


# Task ID12: 	168330



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168330)  # Replace with an actual task ID


Dataset shape: (83733, 54)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

Average Accuracy: 0.6436


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV Accuracy: 0.6266 ± 0.0041


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV Accuracy: 0.6169 ± 0.0041


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV Accuracy: 0.6377 ± 0.0032





# Task ID13: 	168335



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168335)  # Replace with an actual task ID


Dataset shape: (130064, 50)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8829 ± 0.0028


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8966 ± 0.0025


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8779 ± 0.0030


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8846 ± 0.0039


# Task ID16: 	146212



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146212)  # Replace with an actual task ID


Dataset shape: (58000, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9665 ± 0.0021


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9981 ± 0.0004


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9979 ± 0.0004


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9971 ± 0.0003


# Task ID19: 	168868



In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(168868)  # Replace with an actual task ID


Dataset shape: (76000, 170)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9910 ± 0.0014


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9892 ± 0.0010


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9888 ± 0.0011


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9896 ± 0.0008


#  OpenML-CC18 Curated Classification benchmark

# Task ID20: 31

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(31)  # Replace with an actual task ID


Dataset shape: (1000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7500 ± 0.0498


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7320 ± 0.0232


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7080 ± 0.0199


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7190 ± 0.0255


# Task ID21: 10101

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(10101)  # Replace with an actual task ID


Dataset shape: (748, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7687 ± 0.0119


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7301 ± 0.0881


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7488 ± 0.1405


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7674 ± 0.0698


# Task ID22: 	3913

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3913)  # Replace with an actual task ID


Dataset shape: (522, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8406 ± 0.0733


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8216 ± 0.0625


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7851 ± 0.0811


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8272 ± 0.0782


# Task ID23: 3

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3)  # Replace with an actual task ID


Dataset shape: (3196, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9393 ± 0.0471


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7844 ± 0.0701


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9409 ± 0.0568


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9324 ± 0.0642


# Task ID24: 3917

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3917)  # Replace with an actual task ID


Dataset shape: (2109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8521 ± 0.0241


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8379 ± 0.0212


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8312 ± 0.0342


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8492 ± 0.0202


# Task ID25: 9957

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9957)  # Replace with an actual task ID


Dataset shape: (1055, 41)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8606 ± 0.0387


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8454 ± 0.0580


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7809 ± 0.0668


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8170 ± 0.0715


# Task ID26: 9946

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9946)  # Replace with an actual task ID


Dataset shape: (569, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9807 ± 0.0146


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9613 ± 0.0233


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9158 ± 0.0449


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9596 ± 0.0342


# Task ID27: 3918

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3918)  # Replace with an actual task ID


Dataset shape: (1109, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9297 ± 0.0184


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9270 ± 0.0102


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9315 ± 0.0128


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9297 ± 0.0053


# Task ID28: 3903

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3903)  # Replace with an actual task ID


Dataset shape: (1563, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9021 ± 0.0112


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8938 ± 0.0109


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8925 ± 0.0080


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8957 ± 0.0042


# Task ID29: 37

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(37)  # Replace with an actual task ID


Dataset shape: (768, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7722 ± 0.0362


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7474 ± 0.0413


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7226 ± 0.0444


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7643 ± 0.0380


# Task ID30: 9971

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9971)  # Replace with an actual task ID


Dataset shape: (583, 10)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7222 ± 0.0191


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6724 ± 0.0427


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6810 ± 0.0272


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7084 ± 0.0337


# Task ID31: 9952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9952)  # Replace with an actual task ID


Dataset shape: (5404, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7513 ± 0.0092


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8618 ± 0.0150


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7966 ± 0.0105


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8168 ± 0.0105


# Task ID32: 3902

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3902)  # Replace with an actual task ID


Dataset shape: (1458, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9095 ± 0.0147


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8937 ± 0.0140


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8944 ± 0.0192


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8923 ± 0.0061


# Task ID33: 49

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(49)  # Replace with an actual task ID


Dataset shape: (958, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9759 ± 0.0492


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8613 ± 0.0866


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6870 ± 0.1060


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7464 ± 0.0594


# Task ID34: 43

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(43)  # Replace with an actual task ID


Dataset shape: (4601, 57)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9170 ± 0.0287


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8935 ± 0.0367


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8907 ± 0.0374


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9178 ± 0.0314


# Task ID35: 9978

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9978)  # Replace with an actual task ID


Dataset shape: (2534, 72)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9357 ± 0.0217


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9369 ± 0.0058


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9037 ± 0.0376


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9317 ± 0.0168


# Task ID36: 10093

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(10093)  # Replace with an actual task ID


Dataset shape: (1372, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9810 ± 0.0099


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9993 ± 0.0022


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9519 ± 0.0199


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9723 ± 0.0112


# Task ID37: 219

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(219)  # Replace with an actual task ID


Dataset shape: (45312, 8)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7427 ± 0.0722


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7250 ± 0.0611


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7227 ± 0.0958


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7346 ± 0.0932


# Task ID38: 9976

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9976)  # Replace with an actual task ID


Dataset shape: (2600, 500)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.5519 ± 0.0359


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5723 ± 0.0309


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7327 ± 0.0382


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6446 ± 0.0194


# Task ID39: 6

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(6)  # Replace with an actual task ID


Dataset shape: (20000, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7728 ± 0.0109


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9352 ± 0.0066


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.2533 ± 0.0039


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6069 ± 0.0110


# Task ID40: 53

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(53)  # Replace with an actual task ID


Dataset shape: (846, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7991 ± 0.0267


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7011 ± 0.0452


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6631 ± 0.0390


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7116 ± 0.0266


# Task ID41: 11

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(11)  # Replace with an actual task ID


Dataset shape: (625, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8497 ± 0.0556


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8369 ± 0.0542


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6932 ± 0.0861


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6800 ± 0.1023


# Task ID42: 15

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(15)  # Replace with an actual task ID


Dataset shape: (699, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9671 ± 0.0300


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9671 ± 0.0293


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9428 ± 0.0325


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9686 ± 0.0246


# Task ID43: 16

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(16)  # Replace with an actual task ID


Dataset shape: (2000, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9490 ± 0.0118


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9420 ± 0.0121


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5880 ± 0.0308


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9135 ± 0.0204


# Task ID44: 14

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14)  # Replace with an actual task ID


Dataset shape: (2000, 76)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8210 ± 0.0143


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8040 ± 0.0219


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5405 ± 0.0430


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7790 ± 0.0258


# Task ID45: 32

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(32)  # Replace with an actual task ID


Dataset shape: (10992, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9503 ± 0.0105


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9881 ± 0.0039


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7196 ± 0.0143


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8659 ± 0.0170


# Task ID46: 3549

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3549)  # Replace with an actual task ID


Dataset shape: (841, 70)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9964 ± 0.0054


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9941 ± 0.0079


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9133 ± 0.0492


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9834 ± 0.0109


# Task ID47: 12

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(12)  # Replace with an actual task ID


Dataset shape: (2000, 216)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9785 ± 0.0078


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9600 ± 0.0105


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5235 ± 0.0166


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9255 ± 0.0175


# Task ID48: 9981

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9981)  # Replace with an actual task ID


Dataset shape: (1080, 856)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9509 ± 0.0227


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8343 ± 0.0322


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4278 ± 0.0301


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8806 ± 0.0232


# Task ID49: 18

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(18)  # Replace with an actual task ID


Dataset shape: (2000, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7360 ± 0.0206


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7170 ± 0.0125


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6355 ± 0.0584


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7040 ± 0.0211


# Task ID50: 28

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(28)  # Replace with an actual task ID


Dataset shape: (5620, 64)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9635 ± 0.0065


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9676 ± 0.0054


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5235 ± 0.0325


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9034 ± 0.0192


# Task ID51: 2074

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(2074)  # Replace with an actual task ID


Dataset shape: (6430, 36)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8575 ± 0.0111


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8994 ± 0.0089


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7876 ± 0.0145


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8375 ± 0.0076


# Task ID52: 29

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(29)  # Replace with an actual task ID


Dataset shape: (690, 15)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8420 ± 0.1574


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8406 ± 0.1126


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8377 ± 0.1391


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8536 ± 0.1334


# Task ID53: 45

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(45)  # Replace with an actual task ID


Dataset shape: (3190, 60)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9411 ± 0.0174


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8379 ± 0.0260


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9210 ± 0.0135


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9088 ± 0.0332


# Task ID54: 125922

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(125922)  # Replace with an actual task ID


Dataset shape: (5500, 40)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9955 ± 0.0017


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9735 ± 0.0068


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5316 ± 0.0084


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8138 ± 0.0123


# Task ID55: 9960

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9960)  # Replace with an actual task ID


Dataset shape: (5456, 24)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6692 ± 0.0426


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7572 ± 0.0626


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9062 ± 0.0403


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9256 ± 0.0394


# Task ID56: 9964

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9964)  # Replace with an actual task ID


Dataset shape: (1593, 256)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9178 ± 0.0191


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9021 ± 0.0295


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5776 ± 0.0403


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8173 ± 0.0345


# Task ID57: 22

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(22)  # Replace with an actual task ID


Dataset shape: (2000, 47)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8245 ± 0.0199


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8040 ± 0.0237


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4935 ± 0.0535


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6935 ± 0.0265


# Task ID58: 2079

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(2079)  # Replace with an actual task ID


Dataset shape: (736, 19)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.4828 ± 0.0909


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.4245 ± 0.1145




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5655 ± 0.0622


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.4557 ± 0.0925


# Task ID59: 14969

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14969)  # Replace with an actual task ID


Dataset shape: (9873, 32)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.4598 ± 0.0498


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.4234 ± 0.0623


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4444 ± 0.0496


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.4620 ± 0.0466


# Task ID60: 3560

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3560)  # Replace with an actual task ID


Dataset shape: (797, 4)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.2034 ± 0.0436


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.1895 ± 0.0194




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.1969 ± 0.0485


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.1997 ± 0.0395


# Task ID61: 14952

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14952)  # Replace with an actual task ID


Dataset shape: (11055, 30)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9372 ± 0.0068


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9438 ± 0.0119


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9174 ± 0.0077


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9233 ± 0.0099


# Task ID62: 125920

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(125920)  # Replace with an actual task ID


Dataset shape: (500, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6060 ± 0.0664


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5900 ± 0.0694




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5840 ± 0.0578


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6120 ± 0.0421


# Task ID63: 23

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(23)  # Replace with an actual task ID


Dataset shape: (1473, 9)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.5098 ± 0.0369


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5153 ± 0.0181


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5520 ± 0.0307


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.5268 ± 0.0260


# Task ID64: 3904

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3904)  # Replace with an actual task ID


Dataset shape: (10885, 21)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8100 ± 0.0211


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7984 ± 0.0167


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7962 ± 0.0164


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8084 ± 0.0126


# Task ID65: 3022

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3022)  # Replace with an actual task ID


Dataset shape: (990, 12)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.5495 ± 0.0663


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6071 ± 0.0481


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4222 ± 0.0952


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.5808 ± 0.0713


# Task ID66: 9985

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9985)  # Replace with an actual task ID


Dataset shape: (6118, 51)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.4580 ± 0.0622


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5031 ± 0.0568


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.4333 ± 0.0421


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.4706 ± 0.0492


# Task ID67: 9910

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(9910)  # Replace with an actual task ID


Dataset shape: (3751, 1776)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7337 ± 0.0228


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7438 ± 0.0349


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7643 ± 0.0167


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7254 ± 0.0181


# Task ID68: 14970

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14970)  # Replace with an actual task ID


Dataset shape: (10299, 561)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9581 ± 0.0266


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9054 ± 0.0171


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8490 ± 0.0343


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8851 ± 0.0166


# Task ID69: 3021

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3021)  # Replace with an actual task ID


Dataset shape: (3772, 29)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9669 ± 0.0057


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9642 ± 0.0101


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9849 ± 0.0063


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9443 ± 0.0060


# Task ID70: 3481

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3481)  # Replace with an actual task ID


Dataset shape: (7797, 617)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9556 ± 0.0138


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9151 ± 0.0279


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.3926 ± 0.0204


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7816 ± 0.0239


# Task ID71: 3573

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(3573)  # Replace with an actual task ID


Dataset shape: (70000, 784)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9173 ± 0.0069


In [None]:
run_knn(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


In [None]:
run_random_forest(X, y, seed=42)

# Task ID72: 146824

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146824)  # Replace with an actual task ID


Dataset shape: (2000, 240)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9640 ± 0.0080


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9695 ± 0.0123


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7735 ± 0.0281


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9405 ± 0.0149


# Task ID73: 146820

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146820)  # Replace with an actual task ID


Dataset shape: (4839, 5)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9465 ± 0.0321


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9599 ± 0.0096


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9777 ± 0.0107


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9702 ± 0.0088


# Task ID74: 146822

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146822)  # Replace with an actual task ID


Dataset shape: (2310, 16)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8745 ± 0.0099


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8831 ± 0.0190


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7558 ± 0.0116


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8485 ± 0.0130


# Task ID75: 146195

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146195)  # Replace with an actual task ID


Dataset shape: (67557, 42)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.7244 ± 0.0440


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6227 ± 0.0459


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5900 ± 0.1039


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6601 ± 0.0034


# Task ID76: 146800

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146800)  # Replace with an actual task ID


Dataset shape: (1080, 77)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8407 ± 0.0767


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.5741 ± 0.1253


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5139 ± 0.1086


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6426 ± 0.1076


# Task ID77: 146817

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146817)  # Replace with an actual task ID


Dataset shape: (1941, 27)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6322 ± 0.0939


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6132 ± 0.1021


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.5534 ± 0.0802


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.5931 ± 0.0807


# Task ID78: 146819

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146819)  # Replace with an actual task ID


Dataset shape: (540, 18)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9611 ± 0.0175


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9148 ± 0.0091


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9259 ± 0.0166


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9148 ± 0.0091


# Task ID79: 146821

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(146821)  # Replace with an actual task ID


Dataset shape: (1728, 6)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8327 ± 0.0780


In [None]:
run_knn(X, y, seed=42)




KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.7617 ± 0.0899




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.7326 ± 0.0674


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.7344 ± 0.0587


# Task ID80: 14954

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(14954)  # Replace with an actual task ID


Dataset shape: (540, 37)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.6019 ± 0.1229


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.6278 ± 0.0770




In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.6111 ± 0.1356


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.6444 ± 0.1239


# Task ID81: 167141

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167141)  # Replace with an actual task ID


Dataset shape: (5000, 20)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.8546 ± 0.0073


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8778 ± 0.0050


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9264 ± 0.0088


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8892 ± 0.0077


# Task ID82: 167140

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167140)  # Replace with an actual task ID


Dataset shape: (3186, 180)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9466 ± 0.0115


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.8528 ± 0.0167


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.8939 ± 0.0155


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.9005 ± 0.0119


# Task ID83: 167125

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167125)  # Replace with an actual task ID


Dataset shape: (3279, 1558)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

10-fold CV accuracy: 0.9634 ± 0.0248


In [None]:
run_knn(X, y, seed=42)


KNN with random params: {'n_neighbors': 15, 'knn_alg': 'ball_tree', 'leaf_size': 300}
10-fold CV accuracy: 0.9341 ± 0.0289


In [None]:
run_decision_tree(X, y, seed=42)


Decision Tree with random params: {'max_depth': 4}
10-fold CV accuracy: 0.9558 ± 0.0231


In [None]:
run_random_forest(X, y, seed=42)

Random Forest with random params: {'max_depth': 4, 'n_estimators': 86}
10-fold CV accuracy: 0.8990 ± 0.0204


# Task ID84: 167124

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167124)  # Replace with an actual task ID


Dataset shape: (60000, 3072)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

In [None]:
run_knn(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


In [None]:
run_random_forest(X, y, seed=42)

# Task ID85: 167121

In [None]:
# Load and preprocess data using OpenML Task ID
X, y = load_preprocess_task(167121)  # Replace with an actual task ID


Dataset shape: (92000, 1024)


In [None]:
# Choose and run models with 10-fold cross-validation
run_linear_model(X, y)

In [None]:
run_knn(X, y, seed=42)


In [None]:
run_decision_tree(X, y, seed=42)


In [None]:
run_random_forest(X, y, seed=42)

In [None]:
pip freeze > requirements.txt