In [1]:
import itertools

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# Set seed
tf.random.set_seed(42)

In [4]:
# Path of all datasets
num_datasets = ["./data/NORM_BCW.csv", "./data/NORM_Ecoli.csv", "./data/NORM_Iris.csv", "./data/NORM_ISOLET.csv", "./data/NORM_SDD.csv", "./data/NORM_PBC.csv", "./data/NORM_CMSC.csv", "./data/NORM_MagicGT.csv", "./data/NORM_Wine.csv"]
cat_datasets = ["./data/OH_BalanceScale.csv", "./data/OH_CarEvaluation.csv", "./data/OH_HayesRoth.csv", "./data/OH_Chess.csv", "./data/OH_Lymphography.csv", "./data/OH_Nursery.csv", "./data/OH_SoybeanSmall.csv", "./data/OH_TicTacToe.csv"]
mix_datasets = ["./data/OH_NORM_DefaultOfCCC.csv", "./data/OH_NORM_StudentPerf.csv", "./data/OH_NORM_Adult.csv", "./data/OH_NORM_InternetAdv.csv", "./data/OH_NORM_StatlogGC.csv"]

In [5]:
def data_splitter(df, classes):
    """ Splitter for binary class DataFrames into train, validation and test in
    the proportion 60 20 20, respectively. It explicitly enforces the same proportion
    for each class.
    
    Args:
        df (DataFrame): DataFrame to be splitted
        classes (list): The DataFrame classes
    Returns:
        (DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, 
            DataFrame): DataFrames with the Train_X, Validation_X, Test_X, Train_y, Validation_y, Test_y
        
    """
    
    # Assert the output classes are binary
    assert len(list(set(classes) - {0,1})) == 0
    
    # Sets
    X_trains = []
    X_validations = []
    X_tests = []
    y_trains = []
    y_validations = []
    y_tests = []
    
    
    # Iteration for each class
    for c in classes:
        df_s = df[df['output']==c]
        X_train, X_test_validation, y_train, y_test_validation = train_test_split(
            df_s.drop(columns=['output']), df_s['output'], test_size=0.4, random_state=42)
        
        X_test, X_validation, y_test, y_validation = train_test_split(
            X_test_validation, y_test_validation, test_size=0.5, random_state=42)
        
        X_trains.append(X_train)
        X_validations.append(X_validation)
        X_tests.append(X_test)
        y_trains.append(y_train)
        y_validations.append(y_validation)
        y_tests.append(y_test)
    
    return pd.concat(X_trains), pd.concat(X_validations), pd.concat(X_tests), pd.concat(y_trains), pd.concat(y_validations), pd.concat(y_tests)

In [6]:
import itertools

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


def generate_models(paths_datasets):
    """ Model generator for a dataset in a specific path. It creates and saves the model, the
    train, validation and test indexes, each model parameter for the gridsearch and the parameters
    for the best model chosen (based on validation AUC)
    
    Args:
        path_datasets (list): A list containing the paths for each dataset to be analyzed
    Returns:
        (None)
    
    """
    for path_dataset in paths_datasets:
        # Generate df
        df = pd.read_csv(path_dataset)
        
        # Define the majority class as 0 and the other classes as 1, binarization
        most_common_class = df['output'].value_counts().index[0]
        df['output'] = df['output'].apply(lambda x: 0 if x==most_common_class else 1)
        
        # Get the name of the dataset
        ds_name = path_dataset.split('.')[1].split('_')[-1]
        
        # Get the possible classes of DS
        classes = list(df['output'].unique())
        
        # Split DataFrame to train, validation and test
        X_train, X_validation, X_test, y_train, y_validation, y_test = data_splitter(df, classes)
        
        # Make y multiclass (as we need a 2 class output)
        y_train = pd.concat([y_train, y_train.map({0:1, 1:0})], axis=1)
        y_validation = pd.concat([y_validation, y_validation.map({0:1, 1:0})], axis=1)
        y_test = pd.concat([y_test, y_test.map({0:1, 1:0})], axis=1)
        
        print('##########################################')
        print(f'Size train: {len(X_train)}, {len(y_train)} \n')
        print(f'Size validation: {len(X_validation)}, {len(y_validation)} \n')
        print(f'Size test: {len(X_test)}, {len(y_test)} \n')
        print('##########################################')

        # Save model train Data indexes
        pd.DataFrame(y_train.index).rename(columns={0: 'index'}).to_csv(f'./idxstrain/{ds_name}.csv', index=False)
        
        # Save model validation Data indexes
        pd.DataFrame(y_validation.index).rename(columns={0: 'index'}).to_csv(f'./idxsvalidation/{ds_name}.csv', index=False)

        # Save test Data indexes
        pd.DataFrame(y_test.index).rename(columns={0: 'index'}).to_csv(f'./idxstest/{ds_name}.csv', index=False)


        # GridSearch Parameters
        learning_rates = [0.01, 0.001, 0.0001]
        epoch_numbers = [50, 100, 500]
        nn_sizes = []
        for i in range(1, 6):
            nnsize = int((X_train.shape[1]*2+1)*i/5)
            if nnsize not in nn_sizes:
                nn_sizes.append(nnsize)
        parameters = [learning_rates, epoch_numbers, nn_sizes]
        comb_param = list(itertools.product(*parameters))

        # best scores placeholders
        best_model = []
        best_params = []
        best_auc = 0.0

        for params in comb_param:
            
            # Get parameters
            lr = params[0]
            epoch = params[1]
            nn_size = params[2]


            # Create model
            model = keras.Sequential(
                [layers.Dense(nn_size, activation="relu", name="layer1"),
                 layers.Dense(2, activation="softmax", name="outputLayer"),
                ])

            # Configure optimizer
            opt = tf.keras.optimizers.RMSprop(learning_rate=lr, name='RMSprop')

            # Compile
            model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"])

            # Train
            model.fit(X_train, y_train, epochs=epoch, verbose=0)

            # Get Prediction for train and validation set
            y_train_pred = model.predict(X_train)
            y_validation_pred = model.predict(X_validation)
            y_test_pred = model.predict(X_test)


            #Get Accuracies
            train_acc = accuracy_score(np.apply_along_axis(np.argmax, 1, y_train), np.apply_along_axis(np.argmax, 1, y_train_pred))
            validation_acc = accuracy_score(np.apply_along_axis(np.argmax, 1, y_validation), np.apply_along_axis(np.argmax, 1, y_validation_pred))
            test_acc = accuracy_score(np.apply_along_axis(np.argmax, 1, y_test), np.apply_along_axis(np.argmax, 1, y_test_pred))

            print(f"\n\nModel for {ds_name}:\nTrain Accuracy:{train_acc}\nValidation Accuracy:{validation_acc}\nTest Accuracy: {test_acc}\nClass Balance={y_train.sum()/y_train.shape[0]}\n\n")


            # Calculate auc score for Train and Validation
            fpr_train, tpr_train, thresholds_train = roc_curve(y_train.iloc[:,1:], pd.DataFrame(y_train_pred)[1])
            fpr_validation, tpr_validation, threshold_validations = roc_curve(y_validation.iloc[:,1:], pd.DataFrame(y_validation_pred)[1])
            
            auc_m_train = auc(fpr_train, tpr_train)
            auc_m_validation = auc(fpr_validation, tpr_validation)
            
            # Report auc for the params
            with open('./all_params.txt', 'a') as f:
                f.write(f'{ds_name} {auc_m_validation} {params} \n')

            if auc_m_validation > best_auc:
                best_auc = auc_m_validation
                best_model = model
                best_params = params
                
            # Release GPU memory
            tf.keras.backend.clear_session()
            del model

        # Report the best params and auc score for the dataset and class
        with open('./best_params.txt', 'a') as f:
            f.write(f'{ds_name} {best_auc} {best_params} \n')

        # Save
        best_model.save("./models/"+ds_name+".h5")

In [7]:
# Create models for numerical datasets
generate_models(num_datasets)
# Create models for categorical datasets
generate_models(cat_datasets)
# Create models for mixed datasets
generate_models(mix_datasets)

##########################################
Size train: 118, 118 

Size validation: 41, 41 

Size test: 39, 39 

##########################################


Model for BCW:
Train Accuracy:1.0
Validation Accuracy:0.8536585365853658
Test Accuracy: 0.7948717948717948
Class Balance=output    0.237288
output    0.762712
dtype: float64




Model for BCW:
Train Accuracy:1.0
Validation Accuracy:0.8292682926829268
Test Accuracy: 0.7948717948717948
Class Balance=output    0.237288
output    0.762712
dtype: float64




Model for BCW:
Train Accuracy:1.0
Validation Accuracy:0.7804878048780488
Test Accuracy: 0.7435897435897436
Class Balance=output    0.237288
output    0.762712
dtype: float64




Model for BCW:
Train Accuracy:0.940677966101695
Validation Accuracy:0.7073170731707317
Test Accuracy: 0.6153846153846154
Class Balance=output    0.237288
output    0.762712
dtype: float64




Model for BCW:
Train Accuracy:1.0
Validation Accuracy:0.7804878048780488
Test Accuracy: 0.717948717948718
Class Balan



Model for BCW:
Train Accuracy:0.9322033898305084
Validation Accuracy:0.8292682926829268
Test Accuracy: 0.717948717948718
Class Balance=output    0.237288
output    0.762712
dtype: float64


##########################################
Size train: 200, 200 

Size validation: 68, 68 

Size test: 68, 68 

##########################################


Model for Ecoli:
Train Accuracy:0.975
Validation Accuracy:0.9558823529411765
Test Accuracy: 0.9705882352941176
Class Balance=output    0.575
output    0.425
dtype: float64




Model for Ecoli:
Train Accuracy:0.98
Validation Accuracy:0.9558823529411765
Test Accuracy: 0.9705882352941176
Class Balance=output    0.575
output    0.425
dtype: float64




Model for Ecoli:
Train Accuracy:0.98
Validation Accuracy:0.9558823529411765
Test Accuracy: 0.9705882352941176
Class Balance=output    0.575
output    0.425
dtype: float64




Model for Ecoli:
Train Accuracy:0.975
Validation Accuracy:0.9411764705882353
Test Accuracy: 0.9705882352941176
Class Balance=



Model for Iris:
Train Accuracy:0.9555555555555556
Validation Accuracy:0.9333333333333333
Test Accuracy: 0.9333333333333333
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Iris:
Train Accuracy:1.0
Validation Accuracy:0.9333333333333333
Test Accuracy: 0.9666666666666667
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Iris:
Train Accuracy:1.0
Validation Accuracy:0.9
Test Accuracy: 0.9666666666666667
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Iris:
Train Accuracy:1.0
Validation Accuracy:0.9
Test Accuracy: 0.9
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Iris:
Train Accuracy:1.0
Validation Accuracy:0.9
Test Accuracy: 0.9
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Iris:
Train Accuracy:1.0
Validation Accuracy:0.9333333333333333
Test Accuracy: 0.9666666666666667
Class Balance=output    0.666667
output    0.333333
dty



Model for ISOLET:
Train Accuracy:1.0
Validation Accuracy:0.9993589743589744
Test Accuracy: 0.9993585631815266
Class Balance=output    0.961522
output    0.038478
dtype: float64




Model for ISOLET:
Train Accuracy:1.0
Validation Accuracy:0.9987179487179487
Test Accuracy: 0.9967928159076331
Class Balance=output    0.961522
output    0.038478
dtype: float64




Model for ISOLET:
Train Accuracy:1.0
Validation Accuracy:0.9961538461538462
Test Accuracy: 0.9967928159076331
Class Balance=output    0.961522
output    0.038478
dtype: float64




Model for ISOLET:
Train Accuracy:1.0
Validation Accuracy:0.9967948717948718
Test Accuracy: 0.9980756895445798
Class Balance=output    0.961522
output    0.038478
dtype: float64




Model for ISOLET:
Train Accuracy:1.0
Validation Accuracy:0.9987179487179487
Test Accuracy: 0.9980756895445798
Class Balance=output    0.961522
output    0.038478
dtype: float64




Model for ISOLET:
Train Accuracy:1.0
Validation Accuracy:0.9974358974358974
Test Accuracy: 0.



Model for SDD:
Train Accuracy:1.0
Validation Accuracy:0.9998290890446078
Test Accuracy: 0.9998290890446078
Class Balance=output    0.909101
output    0.090899
dtype: float64




Model for SDD:
Train Accuracy:1.0
Validation Accuracy:0.9998290890446078
Test Accuracy: 0.9997436335669116
Class Balance=output    0.909101
output    0.090899
dtype: float64




Model for SDD:
Train Accuracy:1.0
Validation Accuracy:0.9998290890446078
Test Accuracy: 0.9998290890446078
Class Balance=output    0.909101
output    0.090899
dtype: float64




Model for SDD:
Train Accuracy:1.0
Validation Accuracy:0.9998290890446078
Test Accuracy: 0.9998290890446078
Class Balance=output    0.909101
output    0.090899
dtype: float64




Model for SDD:
Train Accuracy:1.0
Validation Accuracy:0.9998290890446078
Test Accuracy: 0.9998290890446078
Class Balance=output    0.909101
output    0.090899
dtype: float64




Model for SDD:
Train Accuracy:1.0
Validation Accuracy:0.9998290890446078
Test Accuracy: 0.9996581780892155
C



Model for PBC:
Train Accuracy:0.9643618641486446
Validation Accuracy:0.9598173515981735
Test Accuracy: 0.9643835616438357
Class Balance=output    0.102345
output    0.897655
dtype: float64




Model for PBC:
Train Accuracy:0.9719768504416693
Validation Accuracy:0.9643835616438357
Test Accuracy: 0.9625570776255707
Class Balance=output    0.102345
output    0.897655
dtype: float64




Model for PBC:
Train Accuracy:0.9728906487968322
Validation Accuracy:0.971689497716895
Test Accuracy: 0.9689497716894977
Class Balance=output    0.102345
output    0.897655
dtype: float64




Model for PBC:
Train Accuracy:0.9701492537313433
Validation Accuracy:0.9643835616438357
Test Accuracy: 0.9643835616438357
Class Balance=output    0.102345
output    0.897655
dtype: float64




Model for PBC:
Train Accuracy:0.9731952482485532
Validation Accuracy:0.9634703196347032
Test Accuracy: 0.9707762557077626
Class Balance=output    0.102345
output    0.897655
dtype: float64




Model for PBC:
Train Accuracy:0.97



Model for PBC:
Train Accuracy:0.9728906487968322
Validation Accuracy:0.971689497716895
Test Accuracy: 0.9662100456621004
Class Balance=output    0.102345
output    0.897655
dtype: float64


##########################################
Size train: 323, 323 

Size validation: 109, 109 

Size test: 108, 108 

##########################################


Model for CMSC:
Train Accuracy:1.0
Validation Accuracy:0.9357798165137615
Test Accuracy: 0.9259259259259259
Class Balance=output    0.083591
output    0.916409
dtype: float64




Model for CMSC:
Train Accuracy:1.0
Validation Accuracy:0.9541284403669725
Test Accuracy: 0.9351851851851852
Class Balance=output    0.083591
output    0.916409
dtype: float64




Model for CMSC:
Train Accuracy:1.0
Validation Accuracy:0.926605504587156
Test Accuracy: 0.9259259259259259
Class Balance=output    0.083591
output    0.916409
dtype: float64




Model for CMSC:
Train Accuracy:1.0
Validation Accuracy:0.944954128440367
Test Accuracy: 0.9166666666666666
Clas



Model for CMSC:
Train Accuracy:0.9969040247678018
Validation Accuracy:0.9541284403669725
Test Accuracy: 0.9259259259259259
Class Balance=output    0.083591
output    0.916409
dtype: float64




Model for CMSC:
Train Accuracy:0.9969040247678018
Validation Accuracy:0.9541284403669725
Test Accuracy: 0.9351851851851852
Class Balance=output    0.083591
output    0.916409
dtype: float64


##########################################
Size train: 11411, 11411 

Size validation: 3805, 3805 

Size test: 3804, 3804 

##########################################


Model for MagicGT:
Train Accuracy:0.8610113048812549
Validation Accuracy:0.8530880420499343
Test Accuracy: 0.8593585699263933
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicGT:
Train Accuracy:0.8700376829375164
Validation Accuracy:0.8620236530880421
Test Accuracy: 0.8633017875920084
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicGT:
Train Accuracy:0.8740688809043



Model for MagicGT:
Train Accuracy:0.8652177723249496
Validation Accuracy:0.8525624178712221
Test Accuracy: 0.8625131440588854
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicGT:
Train Accuracy:0.8574182806064324
Validation Accuracy:0.845203679369251
Test Accuracy: 0.8559411146161935
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicGT:
Train Accuracy:0.8695995092454649
Validation Accuracy:0.8601839684625493
Test Accuracy: 0.867770767613039
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicGT:
Train Accuracy:0.8712645692752607
Validation Accuracy:0.8636005256241787
Test Accuracy: 0.862776025236593
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicGT:
Train Accuracy:0.8743317851196214
Validation Accuracy:0.8612352168199737
Test Accuracy: 0.8646161934805467
Class Balance=output    0.351591
output    0.648409
dtype: float64




Model for MagicG



Model for Wine:
Train Accuracy:0.6886792452830188
Validation Accuracy:0.6756756756756757
Test Accuracy: 0.6571428571428571
Class Balance=output    0.603774
output    0.396226
dtype: float64




Model for Wine:
Train Accuracy:0.9716981132075472
Validation Accuracy:0.9459459459459459
Test Accuracy: 1.0
Class Balance=output    0.603774
output    0.396226
dtype: float64




Model for Wine:
Train Accuracy:0.9905660377358491
Validation Accuracy:0.972972972972973
Test Accuracy: 1.0
Class Balance=output    0.603774
output    0.396226
dtype: float64




Model for Wine:
Train Accuracy:1.0
Validation Accuracy:0.972972972972973
Test Accuracy: 1.0
Class Balance=output    0.603774
output    0.396226
dtype: float64




Model for Wine:
Train Accuracy:1.0
Validation Accuracy:0.972972972972973
Test Accuracy: 1.0
Class Balance=output    0.603774
output    0.396226
dtype: float64


##########################################
Size train: 374, 374 

Size validation: 126, 126 

Size test: 125, 125 

#######



Model for BalanceScale:
Train Accuracy:0.9812834224598931
Validation Accuracy:0.9523809523809523
Test Accuracy: 0.936
Class Balance=output    0.540107
output    0.459893
dtype: float64




Model for BalanceScale:
Train Accuracy:0.9973262032085561
Validation Accuracy:0.9603174603174603
Test Accuracy: 0.96
Class Balance=output    0.540107
output    0.459893
dtype: float64




Model for BalanceScale:
Train Accuracy:0.9973262032085561
Validation Accuracy:0.9682539682539683
Test Accuracy: 0.96
Class Balance=output    0.540107
output    0.459893
dtype: float64




Model for BalanceScale:
Train Accuracy:1.0
Validation Accuracy:0.9682539682539683
Test Accuracy: 0.952
Class Balance=output    0.540107
output    0.459893
dtype: float64




Model for BalanceScale:
Train Accuracy:1.0
Validation Accuracy:0.9761904761904762
Test Accuracy: 0.976
Class Balance=output    0.540107
output    0.459893
dtype: float64


##########################################
Size train: 1036, 1036 

Size validation: 34



Model for CarEvaluation:
Train Accuracy:0.972972972972973
Validation Accuracy:0.953757225433526
Test Accuracy: 0.9508670520231214
Class Balance=output    0.299228
output    0.700772
dtype: float64




Model for CarEvaluation:
Train Accuracy:0.9700772200772201
Validation Accuracy:0.9421965317919075
Test Accuracy: 0.9393063583815029
Class Balance=output    0.299228
output    0.700772
dtype: float64




Model for CarEvaluation:
Train Accuracy:0.9777992277992278
Validation Accuracy:0.9566473988439307
Test Accuracy: 0.9624277456647399
Class Balance=output    0.299228
output    0.700772
dtype: float64




Model for CarEvaluation:
Train Accuracy:0.9913127413127413
Validation Accuracy:0.9710982658959537
Test Accuracy: 0.9710982658959537
Class Balance=output    0.299228
output    0.700772
dtype: float64




Model for CarEvaluation:
Train Accuracy:0.9913127413127413
Validation Accuracy:0.9797687861271677
Test Accuracy: 0.9653179190751445
Class Balance=output    0.299228
output    0.700772
dtyp



Model for HayesRoth:
Train Accuracy:0.6794871794871795
Validation Accuracy:0.6071428571428571
Test Accuracy: 0.7307692307692307
Class Balance=output    0.615385
output    0.384615
dtype: float64




Model for HayesRoth:
Train Accuracy:0.5128205128205128
Validation Accuracy:0.5357142857142857
Test Accuracy: 0.5384615384615384
Class Balance=output    0.615385
output    0.384615
dtype: float64




Model for HayesRoth:
Train Accuracy:0.5
Validation Accuracy:0.6071428571428571
Test Accuracy: 0.5
Class Balance=output    0.615385
output    0.384615
dtype: float64




Model for HayesRoth:
Train Accuracy:0.5
Validation Accuracy:0.4642857142857143
Test Accuracy: 0.7307692307692307
Class Balance=output    0.615385
output    0.384615
dtype: float64




Model for HayesRoth:
Train Accuracy:0.6410256410256411
Validation Accuracy:0.5357142857142857
Test Accuracy: 0.5384615384615384
Class Balance=output    0.615385
output    0.384615
dtype: float64




Model for HayesRoth:
Train Accuracy:0.6153846153



Model for Chess:
Train Accuracy:0.9853858492247372
Validation Accuracy:0.9802209550962224
Test Accuracy: 0.9825343076100517
Class Balance=output    0.099626
output    0.900374
dtype: float64




Model for Chess:
Train Accuracy:0.9888908691261213
Validation Accuracy:0.9853884533143265
Test Accuracy: 0.9880591694885047
Class Balance=output    0.099626
output    0.900374
dtype: float64




Model for Chess:
Train Accuracy:0.9921582605596151
Validation Accuracy:0.9882394868139701
Test Accuracy: 0.9893067189449296
Class Balance=output    0.099626
output    0.900374
dtype: float64




Model for Chess:
Train Accuracy:0.9688706707063506
Validation Accuracy:0.9618674269422666
Test Accuracy: 0.9638210657636785
Class Balance=output    0.099626
output    0.900374
dtype: float64




Model for Chess:
Train Accuracy:0.9906730826352997
Validation Accuracy:0.9907341411261582
Test Accuracy: 0.9896631616467653
Class Balance=output    0.099626
output    0.900374
dtype: float64




Model for Chess:
Train 



Model for Lymphography:
Train Accuracy:0.5795454545454546
Validation Accuracy:0.6451612903225806
Test Accuracy: 0.4482758620689655
Class Balance=output    0.454545
output    0.545455
dtype: float64




Model for Lymphography:
Train Accuracy:0.7954545454545454
Validation Accuracy:0.6451612903225806
Test Accuracy: 0.7586206896551724
Class Balance=output    0.454545
output    0.545455
dtype: float64




Model for Lymphography:
Train Accuracy:0.7840909090909091
Validation Accuracy:0.6129032258064516
Test Accuracy: 0.7586206896551724
Class Balance=output    0.454545
output    0.545455
dtype: float64




Model for Lymphography:
Train Accuracy:0.7840909090909091
Validation Accuracy:0.7741935483870968
Test Accuracy: 0.6206896551724138
Class Balance=output    0.454545
output    0.545455
dtype: float64




Model for Lymphography:
Train Accuracy:0.8295454545454546
Validation Accuracy:0.7096774193548387
Test Accuracy: 0.6551724137931034
Class Balance=output    0.454545
output    0.545455
dtype: 



Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.666667
output    0.333333
dtype: float64




Model for Nursery:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class



Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Ac



Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Ac



Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:0.4642857142857143
Validation Accuracy:0.6
Test Accuracy: 0.5555555555555556
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:0.6428571428571429
Validation Accuracy:0.5
Test Accuracy: 0.4444444444444444
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:0.7857142857142857
Validation Accuracy:0.9
Test Accuracy: 0.6666666666666666
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:0.7142857142857143
Validation Accuracy:0.6
Test Accuracy: 0.6666666666666666
Class B



Model for SoybeanSmall:
Train Accuracy:0.9642857142857143
Validation Accuracy:0.8
Test Accuracy: 0.7777777777777778
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:0.9642857142857143
Validation Accuracy:0.8
Test Accuracy: 0.8888888888888888
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:0.9
Test Accuracy: 0.7777777777777778
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143
dtype: float64




Model for SoybeanSmall:
Train Accuracy:1.0
Validation Accuracy:1.0
Test Accuracy: 1.0
Class Balance=output    0.642857
output    0.357143




Model for TicTacToe:
Train Accuracy:1.0
Validation Accuracy:0.9740932642487047
Test Accuracy: 0.9738219895287958
Class Balance=output    0.34669
output    0.65331
dtype: float64




Model for TicTacToe:
Train Accuracy:1.0
Validation Accuracy:0.9740932642487047
Test Accuracy: 0.9947643979057592
Class Balance=output    0.34669
output    0.65331
dtype: float64




Model for TicTacToe:
Train Accuracy:1.0
Validation Accuracy:0.9844559585492227
Test Accuracy: 0.9842931937172775
Class Balance=output    0.34669
output    0.65331
dtype: float64




Model for TicTacToe:
Train Accuracy:1.0
Validation Accuracy:0.9844559585492227
Test Accuracy: 0.9738219895287958
Class Balance=output    0.34669
output    0.65331
dtype: float64




Model for TicTacToe:
Train Accuracy:1.0
Validation Accuracy:0.9792746113989638
Test Accuracy: 0.9895287958115183
Class Balance=output    0.34669
output    0.65331
dtype: float64




Model for TicTacToe:
Train Accuracy:1.0
Validation Accuracy:0.9844559585492227
Test Accu



Model for DefaultOfCCC:
Train Accuracy:0.8291016167564865
Validation Accuracy:0.8091984669221797
Test Accuracy: 0.8085
Class Balance=output    0.221179
output    0.778821
dtype: float64




Model for DefaultOfCCC:
Train Accuracy:0.8306017000944497
Validation Accuracy:0.807032161306449
Test Accuracy: 0.8068333333333333
Class Balance=output    0.221179
output    0.778821
dtype: float64




Model for DefaultOfCCC:
Train Accuracy:0.8308239346630368
Validation Accuracy:0.8203632727878687
Test Accuracy: 0.8151666666666667
Class Balance=output    0.221179
output    0.778821
dtype: float64




Model for DefaultOfCCC:
Train Accuracy:0.8320462247902661
Validation Accuracy:0.8178636893851025
Test Accuracy: 0.8141666666666667
Class Balance=output    0.221179
output    0.778821
dtype: float64




Model for DefaultOfCCC:
Train Accuracy:0.8382132340685594
Validation Accuracy:0.8120313281119813
Test Accuracy: 0.8083333333333333
Class Balance=output    0.221179
output    0.778821
dtype: float64




M



Model for StudentPerf:
Train Accuracy:1.0
Validation Accuracy:0.7099236641221374
Test Accuracy: 0.7
Class Balance=output    0.463918
output    0.536082
dtype: float64




Model for StudentPerf:
Train Accuracy:1.0
Validation Accuracy:0.6870229007633588
Test Accuracy: 0.6692307692307692
Class Balance=output    0.463918
output    0.536082
dtype: float64




Model for StudentPerf:
Train Accuracy:1.0
Validation Accuracy:0.6870229007633588
Test Accuracy: 0.6307692307692307
Class Balance=output    0.463918
output    0.536082
dtype: float64




Model for StudentPerf:
Train Accuracy:1.0
Validation Accuracy:0.6946564885496184
Test Accuracy: 0.6615384615384615
Class Balance=output    0.463918
output    0.536082
dtype: float64




Model for StudentPerf:
Train Accuracy:1.0
Validation Accuracy:0.7251908396946565
Test Accuracy: 0.6692307692307692
Class Balance=output    0.463918
output    0.536082
dtype: float64




Model for StudentPerf:
Train Accuracy:1.0
Validation Accuracy:0.7099236641221374
Te



Model for StudentPerf:
Train Accuracy:0.9381443298969072
Validation Accuracy:0.7404580152671756
Test Accuracy: 0.7
Class Balance=output    0.463918
output    0.536082
dtype: float64




Model for StudentPerf:
Train Accuracy:0.9716494845360825
Validation Accuracy:0.732824427480916
Test Accuracy: 0.7153846153846154
Class Balance=output    0.463918
output    0.536082
dtype: float64


##########################################
Size train: 19536, 19536 

Size validation: 6513, 6513 

Size test: 6512, 6512 

##########################################


Model for Adult:
Train Accuracy:0.8567260442260443
Validation Accuracy:0.8360202671579917
Test Accuracy: 0.8341523341523341
Class Balance=output    0.240786
output    0.759214
dtype: float64




Model for Adult:
Train Accuracy:0.8582104832104832
Validation Accuracy:0.8374021188392445
Test Accuracy: 0.8349201474201474
Class Balance=output    0.240786
output    0.759214
dtype: float64




Model for Adult:
Train Accuracy:0.8678337428337428
Vali



Model for Adult:
Train Accuracy:0.8706490581490581
Validation Accuracy:0.8578228159066482
Test Accuracy: 0.8579545454545454
Class Balance=output    0.240786
output    0.759214
dtype: float64




Model for Adult:
Train Accuracy:0.8805282555282555
Validation Accuracy:0.8515277138031629
Test Accuracy: 0.8528869778869779
Class Balance=output    0.240786
output    0.759214
dtype: float64




Model for Adult:
Train Accuracy:0.883087633087633
Validation Accuracy:0.8470750806080147
Test Accuracy: 0.8511977886977887
Class Balance=output    0.240786
output    0.759214
dtype: float64




Model for Adult:
Train Accuracy:0.8874897624897625
Validation Accuracy:0.8486104713649624
Test Accuracy: 0.8504299754299754
Class Balance=output    0.240786
output    0.759214
dtype: float64




Model for Adult:
Train Accuracy:0.8955262080262081
Validation Accuracy:0.8476892369107938
Test Accuracy: 0.8487407862407862
Class Balance=output    0.240786
output    0.759214
dtype: float64


##########################



Model for InternetAdv:
Train Accuracy:0.9978783592644979
Validation Accuracy:0.9640591966173362
Test Accuracy: 0.9661016949152542
Class Balance=output    0.161245
output    0.838755
dtype: float64




Model for InternetAdv:
Train Accuracy:0.9978783592644979
Validation Accuracy:0.9619450317124736
Test Accuracy: 0.9661016949152542
Class Balance=output    0.161245
output    0.838755
dtype: float64




Model for InternetAdv:
Train Accuracy:0.9985855728429985
Validation Accuracy:0.9619450317124736
Test Accuracy: 0.9661016949152542
Class Balance=output    0.161245
output    0.838755
dtype: float64




Model for InternetAdv:
Train Accuracy:0.9992927864214993
Validation Accuracy:0.9619450317124736
Test Accuracy: 0.961864406779661
Class Balance=output    0.161245
output    0.838755
dtype: float64




Model for InternetAdv:
Train Accuracy:0.9992927864214993
Validation Accuracy:0.9619450317124736
Test Accuracy: 0.961864406779661
Class Balance=output    0.161245
output    0.838755
dtype: float64



Model for StatlogGC:
Train Accuracy:0.9966666666666667
Validation Accuracy:0.745
Test Accuracy: 0.695
Class Balance=output    0.3
output    0.7
dtype: float64




Model for StatlogGC:
Train Accuracy:0.995
Validation Accuracy:0.76
Test Accuracy: 0.705
Class Balance=output    0.3
output    0.7
dtype: float64


