## Test implementation
----

Import necessary libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

from CFW import *
from mdlp.discretization import MDLP

import pandas as pd
import numpy as np
from time import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score

Load a Monk dataset in order to evaluate the calculated parameters

In [None]:
# read the monk dataset
data = pd.read_csv('./data/monks-1.train', header=None, sep=' ')

# define inputs and outputs
y = data.iloc[:,0]
x = data.iloc[:,1:-1]

# fit model
model = CBFW()
model.fit(x, y)

# get parameters values
print('relevance: ', np.round(model.NIAC,4))
print('redundancy: ', np.round(model.NIAA,4))
print('weights: ', np.round(model.W,4))

Define a scheme for training the model using the K-Fold cross validation on a given dataset

In [None]:
def k_fold_train(dataset, K=10, n_runs=10):
    X, y, continuos = dataset()
    # convert all label values to integers
    y = LabelEncoder().fit_transform(y)
    
    # make splits for K fold
    skf = StratifiedKFold(n_splits=K)
    skf.get_n_splits(X, y)
    
    # iterate over folds and accumulate the accuracy
    accs, clls, aucs, times = [],[],[],[]
    for _ in range(n_runs):
        for train_index, test_index in skf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            # quantize x values using MDLP algorithm
            if len(continuos)>0:
                discretizer = MDLP()
                discretizer.fit(X_train[:,continuos], y_train)
                X_train[:,continuos] = discretizer.transform(X_train[:,continuos])
                X_test[:,continuos] = discretizer.transform(X_test[:,continuos])
            
            # fit model
            model = CBFW()
            init = time()
            model.fit(X_train, y_train)
            times.append(time()-init)
            
            # predict and evaluate model
            nb_classes = len(np.unique([*y_train, *y_test]))
            y_test  = np.eye(nb_classes)[y_test]
            y_pred  = np.eye(nb_classes)[model.predict(X_test)]
            y_score = np.zeros_like(y_test)
            y_score[:,:len(np.unique(y_train))] = model.predict(X_test, proba=True)
            
            accs.append(accuracy_score(y_test, y_pred))
            try:
                aucs.append(roc_auc_score(y_test, y_score, multi_class='ovo'))
            except:
                pass
        
    return (np.round(np.mean(accs)*100,2), np.round(np.std(accs)*100,2)),\
           (np.round(np.mean(aucs)*100,2), np.round(np.std(aucs)*100,2)),\
           (np.round(np.mean(times),4), np.round(np.std(times),4))

Iterate over all datasets in order to evaluate them

In [None]:
print('Dataset:\tAccuracy:\tAUC:\tElapsed time:')
for name,data in datasets.items():
    acc,auc,eltime = k_fold_train(data)
    print(f'{name}\t{acc[0]}±{acc[1]}\t{auc[0]}±{auc[1]}\t{eltime[0]}±{eltime[1]}')