## Test implementation
----

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#import warnings
#warnings.filterwarnings("ignore")

#import sys
#np.set_printoptions(threshold=sys.maxsize)

from CBFW import CBFW
from mdlp.discretization import MDLP

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from data_utils import datasets

In [3]:
# evaluate on monks
data = pd.read_csv('./data/monks-1.train', header=None, sep=' ')

y = data.iloc[:,0]
x = data.iloc[:,1:-1]

model = CBFW()
model.fit(x, y)

print('relevance: ', np.round(model.NIAC,4))
print('redundancy: ', np.round(model.NIAA,4))
print('weights: ', np.round(model.W,4))

relevance:  [1.1308 0.0916 0.0733 0.398  4.2921 0.0142]
redundancy:  [1.6902 1.2031 0.1635 1.1302 1.4003 0.4127]
weights:  [0.3637 0.2476 0.4775 0.3247 0.9474 0.4017]


In [4]:
def k_fold_train(dataset, K=10):
    X, y, continuos = dataset()
    # convert all label values to integers
    y = LabelEncoder().fit_transform(y)
    
    # make splits for K fold
    skf = StratifiedKFold(n_splits=K)
    skf.get_n_splits(X, y)
    
    # iterate over folds and accumulate the accuracy
    accs = []
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # quantize x values using MDLP algorithm
        if len(continuos)>0:
            discretizer = MDLP()
            discretizer.fit(X_train[:,continuos], y_train)
            X_train[:,continuos] = discretizer.transform(X_train[:,continuos])
            X_test[:,continuos] = discretizer.transform(X_test[:,continuos])
        
        # fit model
        model = CBFW()
        model.fit(X_train, y_train)
        
        # predict and evaluate model
        y_pred = model.predict(X_test)
        accs.append(accuracy_score(y_test, y_pred))
        
    return np.round(np.mean(accs)*100,2), np.round(np.std(accs)*100,2)

In [5]:
print('Dataset:\tMean:\tStd:')
for name,data in datasets.items():
    mean,std = k_fold_train(data)
    print(f'{name}\t{mean}\t{std}')

Dataset:	Mean:	Std:
audiology	69.0	10.44
balance-scale	69.73	8.89
breast-cancer	70.5	9.05
breast-cancer-w	97.28	2.25
colic	72.33	5.17
credit-a	85.51	13.0
credit-g	74.8	4.35
diabetes	65.48	3.73
glass	92.58	14.94
heart-c	58.42	6.48
heart-h	78.9	14.98
heart-statlog	82.59	6.43
hepatitis	84.42	6.13
ionosphere	90.87	6.24
iris	94.67	4.0
kr-vs-kp	87.92	7.29
letter	75.64	1.36
lymph	80.24	12.47
mushroom	95.29	9.16
primary-tumor	42.75	7.83
segmentation	88.57	5.71
sonar	70.64	11.15
waveform-5000	82.42	1.44
zoo	96.0	4.9
