In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from vorace import Vorace
import pandas as pd
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
import warnings
import time
import json
warnings.filterwarnings("ignore")

In [2]:
DATA_PATH = "data"

def pd_read(path, class_="class"):
    df = pd.read_csv(path)
    X = df.drop(class_, axis=1)
    y = df[class_]
    return np.array(X), np.array(y)

In [3]:
# https://archive.ics.uci.edu/ml/datasets/wine
# wine_classes = 3
# X_wine, y_wine = pd_read(os.path.join(DATA_PATH, "wine.csv"))
# y_wine = y_wine - 1 # ensure onehot starts at zero
# y_hot_wine = to_categorical(y_wine, num_classes=wine_classes)

In [4]:
# https://archive.ics.uci.edu/ml/datasets/dermatology
derm_classes = 6
X_derm, y_derm = pd_read(os.path.join(DATA_PATH, "dermatology.csv"))
y_derm = y_derm - 1
y_hot_derm = to_categorical(y_derm, num_classes=derm_classes)

In [5]:
def run_experiments(X, y, y_hot, voting_rule, n_classes, num_models, scale=False, n_exp=10, n_folds=10):
    exp_scores = []
    exp_times = []
    dims = X.shape
    batch_size = 32 #int(2**np.ceil(np.log2(dims[0] / 100)))
    
    for exp in range(n_exp):
               
        vorace = Vorace(n_models=num_models, profile_type=3, nInput=dims[1], nClasses=n_classes, batch_size=batch_size)
        k_fold = StratifiedKFold(n_splits=n_folds, shuffle=True)
        scaler = StandardScaler()
        k_scores = []
        k_times = []

        for train_idx, test_idx in k_fold.split(X, y):
            if scale:
                X_train = scaler.fit_transform(X[train_idx])
                X_test = scaler.transform(X[test_idx])
            else:
                X_train = X[train_idx]
                X_test = X[test_idx]
            
            vorace.reset()
            vorace.fit(X_train, y[train_idx], y_hot[train_idx])
            
            start_time = time.time()
            y_pred_vorace, _ = vorace.predict(voting=voting_rule, x=X_test, nClasses=n_classes, argMax=True, tiebreak="best")
            end_time = time.time()
            
            f1score = f1_score(y[test_idx], y_pred_vorace, average="micro")
            k_scores.append(f1score)
            k_times.append(end_time - start_time)
        
        exp_scores.append(sum(k_scores) / len(k_scores))
        exp_times.append(sum(k_times) / len(k_times))
    
    return exp_scores, exp_times

In [None]:
def exp(n_models, voting_rule, X, y, y_hot, n_classes, data):
    scores, times = run_experiments(X, y, y_hot, voting_rule, n_classes, n_models)
    data[str(n_models) + " " + voting_rule] = (scores, times)
    print(n_models, voting_rule)
    print(scores)
    print(np.mean(scores))
    print(times)
    print(np.mean(times))
    print("------------")

models = [3, 5, 10, 20, 30, 40, 50, 60]
voting_rules = ["Borda", "Copeland", "Sum", "Kemeny", "Plurality"]

data = dict()
exp(1, "Plurality", X_derm, y_derm, y_hot_derm, derm_classes, data)

for m in models:
    for v in voting_rules:
        exp(m, v, X_derm, y_derm, y_hot_derm, derm_classes, data)
        
with open("data1.json", "w") as f:
    json.dump(data, f)

1 Plurality
[0.9384920634920635, 0.9298412698412699, 0.9637301587301588, 0.9356349206349208, 0.9525396825396826, 0.9442063492063493, 0.9749206349206349, 0.8269047619047619, 0.9330952380952381, 0.8515079365079365]
0.9250873015873016
[0.0008739471435546875, 0.0016165733337402343, 0.05015065670013428, 0.0016283750534057616, 0.0007111549377441407, 0.0006911516189575196, 0.04888334274291992, 0.001807856559753418, 0.0008887290954589844, 0.0017297744750976562]
0.01089815616607666
------------
3 Borda
[0.9384126984126985, 0.972063492063492, 0.9496031746031746, 0.9552380952380952, 0.9413492063492065, 0.9608730158730159, 0.9692857142857143, 0.9555555555555557, 0.9692857142857143, 0.9552380952380952]
0.9566904761904762
[0.0022203683853149413, 0.0030582666397094725, 0.02578270435333252, 0.0265824556350708, 0.0022039175033569335, 0.0024642467498779295, 0.05425050258636475, 0.002538299560546875, 0.050245237350463864, 0.0027600765228271485]
0.01721060752868652
------------
3 Copeland
[0.9637301587301

20 Sum
[0.9637301587301588, 0.9665079365079366, 0.974920634920635, 0.9694444444444444, 0.9749206349206349, 0.9776984126984127, 0.9663492063492063, 0.9720634920634922, 0.9664285714285714, 0.9692063492063492]
0.9701269841269842
[0.17078251838684083, 0.24695134162902832, 0.22304773330688477, 0.18524975776672364, 0.2727374792098999, 0.1555853843688965, 0.09563267230987549, 0.27976417541503906, 0.19949860572814943, 0.2774435758590698]
0.21066932439804079
------------
20 Kemeny
[0.9661904761904763, 0.9413492063492065, 0.9443650793650793, 0.9719841269841272, 0.9414285714285715, 0.9471428571428572, 0.8546031746031746, 0.9413492063492065, 0.9609523809523809, 0.9582539682539682]
0.9427619047619048
[0.2713937282562256, 0.2861216068267822, 0.16928136348724365, 0.27551586627960206, 0.27352108955383303, 0.3274494409561157, 0.20065760612487793, 0.26460819244384765, 0.26960666179656984, 0.36141128540039064]
0.2699566841125488
------------
20 Plurality
[0.9693650793650793, 0.9495238095238095, 0.9552380