# Linear vs. Non-linear classifiers: An extensive benchmark on OpenML

In [None]:
import collections
import matplotlib.pyplot as plt
import numpy as np
import openml
import sklearn
%matplotlib inline

In [None]:
# determine some examplar setup ids

examplar_setup_ids_svm = (7130285, 7130286)
examplar_setup_ids_ann = (7130157, 7130159)
examplar_setup_ids_dt  = (7130853, 7130854)

examplar_setup_ids_all = {'Support Vector Machines': examplar_setup_ids_svm, 
                          'Neural Networks': examplar_setup_ids_ann, 
                          'Decision Trees': examplar_setup_ids_dt}

study_id = 123
tag = 'study_%d' % study_id
study = openml.study.get_study(study_id, 'tasks')
measure = 'predictive_accuracy'

meta_feature_x_axis = 'NumberOfInstances'
meta_feature_y_axis = 'NumberOfFeatures'

In [None]:
# Obtain the meta-features per task. 
# Note: this will download (and cache) all involved datasets; might take a while
# The output of this cell is only required for the scatterplots

task_metafeatures = dict()
for task_id in study.tasks:
    current_task = openml.tasks.get_task(task_id)
    task_metafeatures[task_id] = current_task.get_dataset().qualities


In [None]:
# obtain the evaluations
setup_task_evaluation = collections.defaultdict(dict)
for classifier_family, examplars in examplar_setup_ids_all.items():
    for setup_id in examplars:
        examplar_setup = openml.setups.get_setup(setup_id)
        all_setups = openml.setups.list_partial_setups(examplar_setup, ['categorical_features', 'random_state'])
        evaluations = openml.evaluations.list_evaluations(measure,
                                                          flow=[examplar_setup.flow_id], 
                                                          setup=all_setups.keys(), 
                                                          task=study.tasks)
        for run_id, evaluation in evaluations.items():
            setup_task_evaluation[setup_id][evaluation.task_id] = evaluation.value
        print('[%s] Obtained %d evaluations for examplar setup_id %d' % (classifier_family, 
                                                                       len(setup_task_evaluation[setup_id]),
                                                                       setup_id))


# Difference plots
Produces the differences plots as in the paper. These give an idea how often a given type (linear / non-linear) is better and with how much. We expect the non-linear classifier to be better in most cases. 

In [None]:
# S-Plot:
for classifier_family, examplars in examplar_setup_ids_all.items():
    id_linear = examplars[0]
    id_nonlinear = examplars[1]
    differences = []
    for task_id in setup_task_evaluation[id_linear].keys():
        if task_id in setup_task_evaluation[id_nonlinear].keys():
            score_linear = setup_task_evaluation[id_linear][task_id]
            score_nonlinear = setup_task_evaluation[id_nonlinear][task_id]
            differences.append(score_linear - score_nonlinear)
    fig, ax = plt.subplots()
    ax.plot(range(len(differences)), sorted(differences))
    ax.set_title(classifier_family)
    ax.set_xlabel('Dataset (sorted)')
    ax.set_ylabel('perf non-linear - perf linear')
    ax.grid(linestyle='--', axis='y')

# Scatter Plots 
This script produces scatterplots similar to the ones presented in the paper. Note that there is a big difference: The scatterplots in the paper are produced based on a statistical test, the scatterplots in this notebook are based on absolute difference. 

In [None]:
# scatterplots
colors = ['blue', 'gray', 'red']
labels = ['linear', 'equal', 'non-linear']

for classifier_family, examplars in examplar_setup_ids_all.items():
    id_linear = examplars[0]
    id_nonlinear = examplars[1]
    all_results = collections.defaultdict(list)
    
    for task_id in setup_task_evaluation[id_linear].keys():
        if task_id in setup_task_evaluation[id_nonlinear].keys():
            task_coords = [task_metafeatures[task_id][meta_feature_x_axis], 
                           task_metafeatures[task_id][meta_feature_y_axis]]
            score_linear = setup_task_evaluation[id_linear][task_id]
            score_nonlinear = setup_task_evaluation[id_nonlinear][task_id]
            
            # fill the all_results dict using the keys in labels
            if score_linear > score_nonlinear:
                all_results['linear'].append(task_coords)
            elif score_nonlinear > score_linear:
                all_results['non-linear'].append(task_coords)
            else:
                all_results['equal'].append(task_coords)
    
    fig, ax = plt.subplots()
    for idx, result in enumerate(labels):
        # make results numpy arrays, for slicing
        all_results[result] = np.array(all_results[result])
        plt.scatter(all_results[result][:, 0], all_results[result][:, 1], c=colors[idx], alpha=0.5, label=labels[idx])
    ax.set_title(classifier_family)
    ax.set_xlabel(meta_feature_x_axis)
    ax.set_ylabel(meta_feature_y_axis)
    ax.legend()
    ax.set_xscale('log')
    ax.set_yscale('log')