# Load in dataset

In [3]:
import openml
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')

benchmark_suite = openml.study.get_suite('OpenML100')  # obtain the benchmark suite

# DecisionTreeClassifier

In [5]:
# build a scikit-learn classifier
clf = sklearn.pipeline.make_pipeline(sklearn.preprocessing.Imputer(), DecisionTreeClassifier())

for task_id in benchmark_suite.tasks:  # iterate over all tasks
    try:
        task = openml.tasks.get_task(task_id)  # download the OpenML task
        openml.config.apikey = '204cdba18d110fd68ad24b131ea92030'  # set the OpenML Api Key
        run = openml.runs.run_model_on_task(clf, task)  # run the classifier on the task
        score = run.get_metric_fn(sklearn.metrics.accuracy_score)  # print accuracy score
        print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name,score.mean()))
    #     run.publish()  # publish the experiment on OpenML (optional, requires internet and an API key)
    #     print('URL for run: %s/run/%d' %(openml.config.server,run.run_id))
    except:
        print('Error in' + str(task_id))
# runs = openml.runs.list_runs(task=benchmark_suite.tasks, limit=1000)

Data set: kr-vs-kp; Accuracy: 1.00
Data set: letter; Accuracy: 0.88
Data set: balance-scale; Accuracy: 0.77
Data set: mfeat-factors; Accuracy: 0.89
Data set: mfeat-fourier; Accuracy: 0.74
Data set: breast-w; Accuracy: 0.95
Data set: mfeat-karhunen; Accuracy: 0.82
Data set: mfeat-morphological; Accuracy: 0.65
Data set: mfeat-pixel; Accuracy: 0.86
Data set: car; Accuracy: 0.98
Data set: mfeat-zernike; Accuracy: 0.67
Data set: cmc; Accuracy: 0.48
Data set: mushroom; Accuracy: 1.00
Data set: optdigits; Accuracy: 0.90
Data set: credit-approval; Accuracy: 0.81
Data set: credit-g; Accuracy: 0.70
Data set: pendigits; Accuracy: 0.96
Data set: segment; Accuracy: 0.96
Data set: diabetes; Accuracy: 0.70
Data set: soybean; Accuracy: 0.93
Data set: spambase; Accuracy: 0.92
Data set: splice; Accuracy: 0.91
Data set: tic-tac-toe; Accuracy: 0.88
Data set: vehicle; Accuracy: 0.70
Data set: waveform-5000; Accuracy: 0.75
Data set: electricity; Accuracy: 0.89
Data set: satimage; Accuracy: 0.86
Data set: eu

# RandomForestClassifier

In [6]:
# build a scikit-learn classifier
clf = sklearn.pipeline.make_pipeline(sklearn.preprocessing.Imputer(),
                                     sklearn.ensemble.RandomForestClassifier())

for task_id in benchmark_suite.tasks:  # iterate over all tasks
    try:
        task = openml.tasks.get_task(task_id) # download the OpenML task
        #X, y = task.get_X_and_y() # get the data (not used in this example)
        openml.config.apikey = 'c9ea8896542dd998ea42685f14e2bc14'  # set the OpenML Api Key
        run = openml.runs.run_model_on_task(clf, task) # run classifier on splits (requires API key)
        score = run.get_metric_fn(sklearn.metrics.accuracy_score) # print accuracy score
        print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name,score.mean()))
    except:
        print('Error in' + str(task_id))

Data set: kr-vs-kp; Accuracy: 0.99
Data set: letter; Accuracy: 0.94
Data set: balance-scale; Accuracy: 0.83
Data set: mfeat-factors; Accuracy: 0.95
Data set: mfeat-fourier; Accuracy: 0.79
Data set: breast-w; Accuracy: 0.96
Data set: mfeat-karhunen; Accuracy: 0.91
Data set: mfeat-morphological; Accuracy: 0.70
Data set: mfeat-pixel; Accuracy: 0.96
Data set: car; Accuracy: 0.97
Data set: mfeat-zernike; Accuracy: 0.75
Data set: cmc; Accuracy: 0.52
Data set: mushroom; Accuracy: 1.00
Data set: optdigits; Accuracy: 0.97
Data set: credit-approval; Accuracy: 0.86
Data set: credit-g; Accuracy: 0.74
Data set: pendigits; Accuracy: 0.99
Data set: segment; Accuracy: 0.98
Data set: diabetes; Accuracy: 0.74
Data set: soybean; Accuracy: 0.94
Data set: spambase; Accuracy: 0.94
Data set: splice; Accuracy: 0.94
Data set: tic-tac-toe; Accuracy: 0.93
Data set: vehicle; Accuracy: 0.74
Data set: waveform-5000; Accuracy: 0.81
Data set: electricity; Accuracy: 0.89
Data set: satimage; Accuracy: 0.90
Data set: eu