In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
def load_datasets(path):
    with open(path, 'rb') as f:
        info = pickle.load(f)
    return info

def best_result(path, methods, extension = '.txt', dp = 3):
    metrics = ['ARI','FRG']
    n_meth = len(methods)
    n_metr = len(metrics)
    results = np.zeros((n_meth,n_metr))
    
    for k in range(n_meth):
        mdl = methods[k]
        dir = path + '/' + mdl+ extension
        X = pd.read_csv(dir)

        names_columns = list(X.columns)
        for mtr in ['FRG']:
            if mtr not in names_columns:
                X[mtr] = np.nan

        idx_min = np.argmin(X['J'])
        Y = (X.loc[:,metrics]).to_numpy()
        results[k] = Y[idx_min]

    rs = pd.DataFrame(results, index = methods, columns = metrics)

    return round(rs, dp)



     

In [3]:
list_of_datasets = ['wdbc', 'vehicle', 'chemical_composition','vertebral_column_2C', 
                    'vertebral_column', 'breast_tissue', 'abalone',  'fruit', 'gtzan', 'tox_171']

list_of_methods = ['fdk','wfdk','gkfdk','wgkfdk','dwgkfdk']

In [6]:
dtsets = list_of_datasets
rs = np.zeros((len(dtsets), 3))
for i in range(len(dtsets)):
    path = 'datasets/' + dtsets[i]
    X, y,_ = load_datasets(path)
    class_number = np.unique(y, return_counts=True)[1]
    IR = np.max(class_number)/np.min(class_number)
    N,P = X.shape
    K = len(np.unique(y))
    rs[i] = np.array([N,P,K])
rs = rs.astype('int')
rs = pd.DataFrame(rs, index = dtsets, columns=['N','P','K'])
rs


Unnamed: 0,N,P,K
wdbc,569,30,2
vehicle,846,18,4
chemical_composition,88,17,2
vertebral_column_2C,310,6,2
vertebral_column,310,6,3
breast_tissue,106,9,6
abalone,4177,8,3
fruit,898,34,7
gtzan,1000,58,10
tox_171,171,5748,4


##### ARI

In [7]:
folder_name = 'results_real'
N = len(list_of_methods)
n_dts = len(list_of_datasets)
metric = ['ARI']
avr_result = pd.DataFrame(np.zeros((N, n_dts)), index = list_of_methods, columns = list_of_datasets)
for i in range(n_dts):
    dts = list_of_datasets[i]
    path = folder_name + '/' + dts 
    rs = best_result(path, list_of_methods)
    rs = rs.loc[:,metric]
    rs = rs.loc[list_of_methods]
    avr_result.iloc[:,i] = rs
    rs.index = list_of_methods

names = ['FDK','WFDK','GKFDK','WGKFDK', 'DWGKHDK']
avr_result.index = names
avr_result.T


fdk      0.659
wfdk     0.528
gkfdk    0.694
wgkfdk   0.799
dwgkfdk  0.780' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  avr_result.iloc[:,i] = rs


Unnamed: 0,FDK,WFDK,GKFDK,WGKFDK,DWGKHDK
wdbc,0.659,0.528,0.694,0.799,0.78
vehicle,0.07,0.116,0.081,0.141,0.131
chemical_composition,0.593,1.0,0.524,0.955,1.0
vertebral_column_2C,0.111,0.063,0.122,0.136,0.142
vertebral_column,0.235,0.014,0.249,0.255,0.338
breast_tissue,0.266,0.311,0.328,0.302,0.363
abalone,0.135,0.166,0.143,0.147,0.16
fruit,0.642,0.339,0.658,0.567,0.233
gtzan,0.176,0.065,0.167,0.031,0.181
tox_171,0.153,0.073,0.038,0.067,0.153


In [8]:
np.round(avr_result.mean(axis=1),3)

FDK        0.304
WFDK       0.267
GKFDK      0.300
WGKFDK     0.340
DWGKHDK    0.348
dtype: float64

In [9]:
ranking = avr_result.T.rank(axis=1, method="min", ascending=False)
ranking

Unnamed: 0,FDK,WFDK,GKFDK,WGKFDK,DWGKHDK
wdbc,4.0,5.0,3.0,1.0,2.0
vehicle,5.0,3.0,4.0,1.0,2.0
chemical_composition,4.0,1.0,5.0,3.0,1.0
vertebral_column_2C,4.0,5.0,3.0,2.0,1.0
vertebral_column,4.0,5.0,3.0,2.0,1.0
breast_tissue,5.0,3.0,2.0,4.0,1.0
abalone,5.0,1.0,4.0,3.0,2.0
fruit,2.0,4.0,1.0,3.0,5.0
gtzan,2.0,4.0,3.0,5.0,1.0
tox_171,1.0,3.0,5.0,4.0,1.0


In [10]:
np.round(ranking.mean(axis=0),3)

FDK        3.6
WFDK       3.4
GKFDK      3.3
WGKFDK     2.8
DWGKHDK    1.7
dtype: float64

##### FRG

In [11]:
folder_name = 'results_real'
N = len(list_of_methods)
n_dts = len(list_of_datasets)
metric = ['FRG']
avr_result = pd.DataFrame(np.zeros((N, n_dts)), index = list_of_methods, columns = list_of_datasets)
for i in range(n_dts):
    dts = list_of_datasets[i]
    path = folder_name + '/' + dts 
    rs = best_result(path, list_of_methods)
    rs = rs.loc[:,metric]
    rs = rs.loc[list_of_methods]
    avr_result.iloc[:,i] = rs
    rs.index = list_of_methods

names = ['FDK','WFDK','GKFDK','WGKFDK', 'DWGKHDK']
avr_result.index = names
avr_result.T


fdk      0.827
wfdk     0.772
gkfdk    0.846
wgkfdk   0.745
dwgkfdk  0.891' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  avr_result.iloc[:,i] = rs


Unnamed: 0,FDK,WFDK,GKFDK,WGKFDK,DWGKHDK
wdbc,0.827,0.772,0.846,0.745,0.891
vehicle,0.651,0.65,0.646,0.672,0.663
chemical_composition,0.797,0.996,0.762,0.977,0.989
vertebral_column_2C,0.543,0.533,0.559,0.546,0.55
vertebral_column,0.639,0.558,0.623,0.638,0.646
breast_tissue,0.726,0.753,0.792,0.793,0.811
abalone,0.612,0.615,0.612,0.613,0.618
fruit,0.9,0.817,0.888,0.834,0.737
gtzan,0.838,0.821,0.849,0.821,0.848
tox_171,0.663,0.627,0.627,0.627,0.682


In [16]:
np.round(avr_result.mean(axis=1),3)

FDK        0.720
WFDK       0.714
GKFDK      0.720
WGKFDK     0.727
DWGKHDK    0.744
dtype: float64

In [13]:
ranking = avr_result.T.rank(axis=1, method="min", ascending=False)
ranking

Unnamed: 0,FDK,WFDK,GKFDK,WGKFDK,DWGKHDK
wdbc,3.0,4.0,2.0,5.0,1.0
vehicle,3.0,4.0,5.0,1.0,2.0
chemical_composition,4.0,1.0,5.0,3.0,2.0
vertebral_column_2C,4.0,5.0,1.0,3.0,2.0
vertebral_column,2.0,5.0,4.0,3.0,1.0
breast_tissue,5.0,4.0,3.0,2.0,1.0
abalone,4.0,2.0,4.0,3.0,1.0
fruit,1.0,4.0,2.0,3.0,5.0
gtzan,3.0,4.0,1.0,4.0,2.0
tox_171,2.0,3.0,3.0,3.0,1.0


In [15]:
np.round(ranking.mean(axis=0),3)

FDK        3.1
WFDK       3.6
GKFDK      3.0
WGKFDK     3.0
DWGKHDK    1.8
dtype: float64