In [20]:
import numpy as np
import pandas as pd
import openml

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [55]:
classifiers = {
    'KNeighborsClassifier': (KNeighborsClassifier, {'n_neighbors': [1,2,3,4,5]}),
    'GaussianNB': (GaussianNB, {'var_smoothing': [1e-9, 1e-5, 1e-1]}),
    # 'LogisticRegression': (LogisticRegression(max_iter=1000), {'C': [0.1, 0.5, 1.0, 2.0, 5.0]}),
    # 'DecisionTreeClassifier': (DecisionTreeClassifier, {'max_depth': [1, 3, 5, 8, 10]}),
    # 'GradientBoostingClassifier': (GradientBoostingClassifier, {'n_estimators': [1, 3, 5, 8, 10]}),
    # 'RandomForestClassifier': (RandomForestClassifier, {'n_estimators': [1, 3, 5, 8, 10]}),
    # 'MLPClassifier': (MLPClassifier(max_iter=1000), {'alpha': [1e-5, 1e-3, 0.1, 10]})
}

In [19]:
n_neighbors_list = classifiers['KNeighborsClassifier'][1]

print(n_neighbors_list)

{'n_neighbors': [1, 2, 3, 4, 5]}


In [24]:
datasets = {
    'steel-plates-fault': openml.datasets.get_dataset(1504).get_data()[0],
    'ionosphere': openml.datasets.get_dataset(59).get_data()[0],
    'banknote-authentication': openml.datasets.get_dataset(1462).get_data()[0]
}

  'steel-plates-fault': openml.datasets.get_dataset(1504).get_data()[0],
  'ionosphere': openml.datasets.get_dataset(59).get_data()[0],
  'banknote-authentication': openml.datasets.get_dataset(1462).get_data()[0]


In [62]:
results = {}

for dataset_name, data in datasets.items():
    X = data.iloc[:, :-1]  # Select all columns except the last as features
    y = data.iloc[:, -1]   # Select the last column as the target

    results[dataset_name] = {}
    for clf_name, (clf_class, param_grid) in classifiers.items():
        results[dataset_name][clf_name] = []
        for param, values in param_grid.items():
            for value in values:
                accuracies = []

                for i in range(50):
                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=i)
                    clf = clf_class()
                    clf.set_params(**{param: value})
                    clf.fit(X_train, y_train)
                    y_pred = clf.predict(X_test)
                    accuracies.append(accuracy_score(y_test, y_pred))
                
                results[dataset_name][clf_name].append((param, value, accuracies))
