In [34]:
from sklearn.model_selection import cross_val_score, KFold
import numpy as np

In [52]:
from sklearn.datasets import load_iris, load_digits, load_wine, load_breast_cancer

loaders = {
    "iris": load_iris,
    "digits": load_digits,
    "wine": load_wine,
    "breast_cancer": load_breast_cancer
}

In [60]:
from sklearn.tree import DecisionTreeClassifier

models = {
    "DecisionTree": DecisionTreeClassifier(criterion="entropy"),
}

In [64]:
def benchmark_dataset(ds_name, loader, model):
    dataset = loader()
    X, y = dataset["data"], dataset["target"]
    cv_scores = cross_val_score(model, X, y, cv=KFold(n_splits=5, shuffle=True))
    cv_score = np.mean(cv_scores)
    return cv_score


In [67]:
from prettytable import PrettyTable

def benchmark(loaders, models):
    leaderboard = PrettyTable()
    dataset_names = list(loaders.keys())
    leaderboard.add_column("Dataset", dataset_names)

    for model_name, model in models.items():
        results = [
            f"{benchmark_dataset(ds_name, loaders[ds_name], model):.3f}"
            for ds_name in dataset_names
        ]
        leaderboard.add_column(model_name, results)

    print(leaderboard)

In [68]:
benchmark(loaders, models)

+---------------+--------------+
|    Dataset    | DecisionTree |
+---------------+--------------+
|      iris     |    0.960     |
|     digits    |    0.862     |
|      wine     |    0.894     |
| breast_cancer |    0.919     |
+---------------+--------------+
