In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from vorace import Vorace
import pandas as pd
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
import warnings
import time
import json
warnings.filterwarnings("ignore")

In [None]:
DATA_PATH = "data"

def pd_read(path, class_="class"):
    df = pd.read_csv(path)
    X = df.drop(class_, axis=1)
    y = df[class_]
    return np.array(X), np.array(y)

In [None]:
# https://archive.ics.uci.edu/ml/datasets/wine
wine_classes = 3
X_wine, y_wine = pd_read(os.path.join(DATA_PATH, "wine.csv"))
y_wine = y_wine - 1 # ensure onehot starts at zero
y_hot_wine = to_categorical(y_wine, num_classes=wine_classes)

In [None]:
# https://archive.ics.uci.edu/ml/datasets/dermatology
derm_classes = 6
X_derm, y_derm = pd_read(os.path.join(DATA_PATH, "dermatology.csv"))
y_derm = y_derm - 1
y_hot_derm = to_categorical(y_derm, num_classes=derm_classes)

In [None]:
def run_experiments(X, y, y_hot, voting_rule, n_classes, num_models, scale=False, n_exp=10, n_folds=10):
    exp_scores = []
    exp_times = []
    dims = X.shape
    batch_size = 32 #int(2**np.ceil(np.log2(dims[0] / 100)))
    
    for exp in range(n_exp):
               
        vorace = Vorace(n_models=num_models, profile_type=3, nInput=dims[1], nClasses=n_classes, batch_size=batch_size)
        k_fold = StratifiedKFold(n_splits=n_folds, shuffle=True)
        scaler = StandardScaler()
        k_scores = []
        k_times = []

        for train_idx, test_idx in k_fold.split(X, y):
            if scale:
                X_train = scaler.fit_transform(X[train_idx])
                X_test = scaler.transform(X[test_idx])
            else:
                X_train = X[train_idx]
                X_test = X[test_idx]
            
            vorace.reset()
            vorace.fit(X_train, y[train_idx], y_hot[train_idx])
            
            start_time = time.time()
            y_pred_vorace, _ = vorace.predict(voting=voting_rule, x=X_test, nClasses=n_classes, argMax=True, tiebreak="best")
            end_time = time.time()
            
            f1score = f1_score(y[test_idx], y_pred_vorace, average="micro")
            k_scores.append(f1score)
            k_times.append(end_time - start_time)
        
        exp_scores.append(sum(k_scores) / len(k_scores))
        exp_times.append(sum(k_times) / len(k_times))
    
    return exp_scores, exp_times

In [None]:
def exp(n_models, voting_rule, X, y, y_hot, n_classes, data):
    scores, times = run_experiments(X, y, y_hot, voting_rule, n_classes, n_models)
    data[str(n_models) + " " + voting_rule] = (scores, times)
    print(n_models, voting_rule)
    print(scores)
    print(np.mean(scores))
    print(times)
    print(np.mean(times))
    print("------------")

models = [3, 5, 10, 20, 30, 40, 50, 60]
voting_rules = ["Borda", "Copeland", "Sum", "Kemeny", "Plurality"]

data = dict()
exp(1, "Plurality", X_derm, y_derm, y_hot_derm, derm_classes, data)

for m in models:
    for v in voting_rules:
        exp(m, v, X_derm, y_derm, y_hot_derm, derm_classes, data)
        
with open("data.json", "w") as f:
    json.dump(data, f)