In [7]:
from vorace import Vorace
import dataload
import os
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import warnings 

warnings.filterwarnings("ignore")

In [8]:
X, y = dataload.pd_read(os.path.join("data", "lymphography.csv"))
y = y - 1
y_hot = to_categorical(y, num_classes=4)

In [None]:

def run_experiments(X, y, y_hot, voting_rule, n_classes, n_exp=10, n_folds=10):
    exp_scores = []
    
    for exp in tqdm(range(n_exp)):

        vorace = Vorace(n_models=3, profile_type=3, nInput=18, nClasses=4, batch_size=16)
        k_fold = StratifiedKFold(n_splits=n_folds, shuffle=True)
        k_scores = []

        for train_idx, test_idx in k_fold.split(X, y):
            vorace.reset()
            vorace.fit(X[train_idx], y[train_idx], y_hot[train_idx])
            y_pred_vorace, _ = vorace.predict(voting=voting_rule, x=X[test_idx], nClasses=n_classes, argMax=True, tiebreak="best")
            f1score = f1_score(y[test_idx], y_pred_vorace, average="micro")
            k_scores.append(f1score)
        
        exp_scores.append(sum(k_scores) / len(k_scores))
    
    return exp_scores

scores = run_experiments(X, y, y_hot, "Plurality", 4)
print(scores)
print(np.mean(scores))

 50%|██████████████████████                      | 5/10 [00:06<00:05,  1.15s/it]