## Introduction

In [90]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [91]:
import sys

sys.path.append('./src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate

from model import kchain
from data_functions import make_adversarial, make_spirals, standardize, normalize, load_cross_validation_data, load_complete_data
from experiment_functions import *

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier  

In [81]:
def pretty_print_results(res: list, order: list):

    table = []

    for i, r in enumerate(res):
        row = [order[i]]
        for k, v in r.items():
            row.append(f"{v['mean']:.3f} +- {v['std']:.3f}")
        table.append(row)
    
    print(tabulate(table, headers=list(res[0].keys())))
        

# def print_model_results(res, model_name):

#     print(f"   {model_name} results")

#     print("==========================")
#     print(f"train acc: {res[1]['train_acc']['mean']:.3f} +- {res[1]['train_acc']['std']:.3f}")
#     print(f"test acc: {res[1]['test_acc']['mean']:.3f} +- {res[1]['test_acc']['std']:.3f}")
#     print("--------------------------\n")

In [82]:
## load the data as provided by Wu et al. (from email contact)

random = load_cross_validation_data('random')
adversarial = load_cross_validation_data('adversarial')
cancer = load_cross_validation_data('cancer')
# wu_car = load_cross_validation_data('car') Omitted because it is too large for the kchain.
# wu_cifar10 = load_cross_validation_data('cifar10') Omitted because it is too large for the kchain.
divorce = load_cross_validation_data('divorce')
face = load_cross_validation_data('face')
spiral = load_cross_validation_data('spiral')
wine = load_cross_validation_data('wine')

In [83]:
_, random_Y = load_complete_data('random')
_, adversarial_Y = load_complete_data('adversarial')
_, cancer_Y = load_complete_data('cancer')
_, divorce_Y = load_complete_data('divorce')
_, face_Y = load_complete_data('face')
_, spiral_Y = load_complete_data('spiral')
_, wine_Y = load_complete_data('wine')

del _


In [84]:
print(f"Random unique Y's: {np.unique(random_Y)}") 
print(f"Adversarial unique Y's: {np.unique(adversarial_Y)}")
print(f"Cancer unique Y's: {np.unique(cancer_Y)}")
print(f"Divorce unique Y's: {np.unique(divorce_Y)}")
print(f"Face unique Y's: {np.unique(face_Y)}")
print(f"Spiral unique Y's: {np.unique(spiral_Y)}")
print(f"Wine unique Y's: {np.unique(wine_Y)}")

Random unique Y's: [0 1]
Adversarial unique Y's: [0 1]
Cancer unique Y's: [2 4]
Divorce unique Y's: [0 1]
Face unique Y's: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
Spiral unique Y's: [1 2 3]
Wine unique Y's: [1 2 3]


In [108]:
print(f"Random # samples in each fold:      {random[1]['X_train'].shape[0]}")
print(f"Adversarial # samples in each fold: {adversarial[1]['X_train'].shape[0]}")
print(f"Cancer # samples in each fold:      {cancer[1]['X_train'].shape[0]}")
print(f"Divorce # samples in each fold:     {divorce[1]['X_train'].shape[0]}")
print(f"Face # samples in each fold:        {face[1]['X_train'].shape[0]}")
print(f"Spiral # samples in each fold:      {spiral[1]['X_train'].shape[0]}")
print(f"Wine # samples in each fold:        {wine[1]['X_train'].shape[0]}")

Random # samples in each fold:      54
Adversarial # samples in each fold: 72
Cancer # samples in each fold:      553
Divorce # samples in each fold:     153
Face # samples in each fold:        561
Spiral # samples in each fold:      1080
Wine # samples in each fold:        160


## Standardizing the data: mapping to {-1, 1}

In [92]:
_, kchain_random_results = cv_experiment_kchain(random, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')
_, kchain_adversarial_results = cv_experiment_kchain(adversarial, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')
_, kchain_cancer_results = cv_experiment_kchain(cancer, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')
_, kchain_divorce_results = cv_experiment_kchain(divorce, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')
_, kchain_face_results = cv_experiment_kchain(face, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')
_, kchain_spiral_results = cv_experiment_kchain(spiral, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')
_, kchain_wine_results = cv_experiment_kchain(wine, kchain, fit_kwargs={'verbose':False}, data_transform='standardize')


In [93]:
res = [kchain_random_results, kchain_adversarial_results, kchain_cancer_results, kchain_divorce_results, kchain_face_results, kchain_spiral_results, kchain_wine_results]
order = ['random', 'adversarial', 'cancer', 'divorce', 'face', 'spiral', 'wine']

pretty_print_results(res, order)

             time             layers          acc_train       acc_test        f1_train        f1_test         mse_train       mse_test         hsic_train         hsic_test        knn_acc_train    knn_acc_test    gnb_acc_train    gnb_acc_test
-----------  ---------------  --------------  --------------  --------------  --------------  --------------  --------------  ---------------  -----------------  ---------------  ---------------  --------------  ---------------  --------------
random       0.183 +- 0.081   4.400 +- 1.428  0.944 +- 0.059  0.517 +- 0.252  0.944 +- 0.059  0.513 +- 0.268  0.056 +- 0.059  0.483 +- 0.252   8.524 +- 4.010     0.289 +- 0.163   1.000 +- 0.000   0.583 +- 0.250  0.946 +- 0.046   0.517 +- 0.263
adversarial  0.287 +- 0.157   5.000 +- 1.000  0.879 +- 0.107  0.338 +- 0.224  0.879 +- 0.108  0.324 +- 0.224  0.121 +- 0.107  0.662 +- 0.224   10.021 +- 5.679    0.425 +- 0.443   1.000 +- 0.000   0.338 +- 0.177  0.883 +- 0.110   0.338 +- 0.210
cancer       4.234 +- 0.56

In [103]:
_, knn_random_res = cv_experiment(random, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')
_, knn_adversarial_res = cv_experiment(adversarial, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')
_, knn_cancer_res = cv_experiment(cancer, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')
_, knn_divorce_res = cv_experiment(divorce, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')
_, knn_face_res = cv_experiment(face, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')
_, knn_spiral_res = cv_experiment(spiral, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')
_, knn_wine_res = cv_experiment(wine, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='standardize')

_, gnb_random_res = cv_experiment(random, GaussianNB, data_transform='standardize')
_, gnb_adversarial_res = cv_experiment(adversarial, GaussianNB, data_transform='standardize')
_, gnb_cancer_res = cv_experiment(cancer, GaussianNB, data_transform='standardize')
_, gnb_divorce_res = cv_experiment(divorce, GaussianNB, data_transform='standardize')
_, gnb_face_res = cv_experiment(face, GaussianNB, data_transform='standardize')
_, gnb_spiral_res = cv_experiment(spiral, GaussianNB, data_transform='standardize')
_, gnb_wine_res = cv_experiment(wine, GaussianNB, data_transform='standardize')

_, svm_random_res = cv_experiment(random, SVC, data_transform='standardize')
_, svm_adversarial_res = cv_experiment(adversarial, SVC, data_transform='standardize')
_, svm_cancer_res = cv_experiment(cancer, SVC, data_transform='standardize')
_, svm_divorce_res = cv_experiment(divorce, SVC, data_transform='standardize')
_, svm_face_res = cv_experiment(face, SVC, data_transform='standardize')
_, svm_spiral_res = cv_experiment(spiral, SVC, data_transform='standardize')
_, svm_wine_res = cv_experiment(wine, SVC, data_transform='standardize')

_, mlp_random_res = cv_experiment(random, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 10000})
_, mlp_adversarial_res = cv_experiment(adversarial, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 10000})
_, mlp_cancer_res = cv_experiment(cancer, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 10000})
_, mlp_divorce_res = cv_experiment(divorce, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 10000})
_, mlp_face_res = cv_experiment(face, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 10000})
_, mlp_spiral_res = cv_experiment(spiral, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 1000})
_, mlp_wine_res = cv_experiment(wine, MLPClassifier, data_transform='standardize', model_kwargs={'max_iter': 10000})

In [104]:
knn_res = [knn_random_res, knn_adversarial_res, knn_cancer_res, knn_divorce_res, knn_face_res, knn_spiral_res, knn_wine_res]
print("KNN results")
pretty_print_results(knn_res, order)

print("\n====================================\n")

gnb_res = [gnb_random_res, gnb_adversarial_res, gnb_cancer_res, gnb_divorce_res, gnb_face_res, gnb_spiral_res, gnb_wine_res]
print("GNB results")
pretty_print_results(gnb_res, order)

print("\n====================================\n")

svm_res = [svm_random_res, svm_adversarial_res, svm_cancer_res, svm_divorce_res, svm_face_res, svm_spiral_res, svm_wine_res]
print("SVM results")
pretty_print_results(svm_res, order)

print("\n====================================\n")

mlp_res = [mlp_random_res, mlp_adversarial_res, mlp_cancer_res, mlp_divorce_res, mlp_face_res, mlp_spiral_res, mlp_wine_res]
print("MLP results")
pretty_print_results(mlp_res, order)

KNN results
             train_acc       test_acc
-----------  --------------  --------------
random       1.000 +- 0.000  0.550 +- 0.150
adversarial  1.000 +- 0.000  0.037 +- 0.080
cancer       1.000 +- 0.000  0.958 +- 0.016
divorce      1.000 +- 0.000  0.976 +- 0.039
face         1.000 +- 0.000  0.992 +- 0.011
spiral       1.000 +- 0.000  1.000 +- 0.000
wine         1.000 +- 0.000  0.955 +- 0.035


GNB results
             train_acc       test_acc
-----------  --------------  --------------
random       0.596 +- 0.031  0.533 +- 0.180
adversarial  0.528 +- 0.016  0.275 +- 0.094
cancer       0.968 +- 0.003  0.968 +- 0.023
divorce      0.980 +- 0.005  0.971 +- 0.054
face         1.000 +- 0.000  0.938 +- 0.032
spiral       0.880 +- 0.004  0.879 +- 0.035
wine         0.986 +- 0.004  0.977 +- 0.037


SVM results
             train_acc       test_acc
-----------  --------------  --------------
random       0.722 +- 0.035  0.583 +- 0.171
adversarial  0.525 +- 0.014  0.225 +- 0.109
cancer    

## Normalizing the data: mapping to {0, 1}

In [96]:
_, kchain_random_results = cv_experiment_kchain(random, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')
_, kchain_adversarial_results = cv_experiment_kchain(adversarial, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')
_, kchain_cancer_results = cv_experiment_kchain(cancer, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')
_, kchain_divorce_results = cv_experiment_kchain(divorce, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')
_, kchain_face_results = cv_experiment_kchain(face, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')
_, kchain_spiral_results = cv_experiment_kchain(spiral, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')
_, kchain_wine_results = cv_experiment_kchain(wine, kchain, fit_kwargs={'verbose':False}, data_transform='normalize')

In [97]:
res = [kchain_random_results, kchain_adversarial_results, kchain_cancer_results, kchain_divorce_results, kchain_face_results, kchain_spiral_results, kchain_wine_results]
order = ['random', 'adversarial', 'cancer', 'divorce', 'face', 'spiral', 'wine']

pretty_print_results(res, order)

             time             layers          acc_train       acc_test        f1_train        f1_test         mse_train        mse_test          hsic_train        hsic_test        knn_acc_train    knn_acc_test    gnb_acc_train    gnb_acc_test
-----------  ---------------  --------------  --------------  --------------  --------------  --------------  ---------------  ----------------  ----------------  ---------------  ---------------  --------------  ---------------  --------------
random       0.105 +- 0.018   5.100 +- 0.831  0.952 +- 0.035  0.400 +- 0.111  0.952 +- 0.036  0.386 +- 0.122  0.048 +- 0.035   0.600 +- 0.111    9.475 +- 3.157    0.214 +- 0.114   1.000 +- 0.000   0.467 +- 0.194  0.950 +- 0.039   0.350 +- 0.157
adversarial  0.214 +- 0.067   4.000 +- 1.949  0.751 +- 0.028  0.275 +- 0.146  0.750 +- 0.029  0.261 +- 0.150  0.249 +- 0.028   0.725 +- 0.146    2.662 +- 1.261    0.397 +- 0.287   1.000 +- 0.000   0.312 +- 0.140  0.744 +- 0.032   0.325 +- 0.115
cancer       3.526 +- 

In [98]:

_, knn_random_res = cv_experiment(random, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')
_, knn_adversarial_res = cv_experiment(adversarial, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')
_, knn_cancer_res = cv_experiment(cancer, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')
_, knn_divorce_res = cv_experiment(divorce, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')
_, knn_face_res = cv_experiment(face, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')
_, knn_spiral_res = cv_experiment(spiral, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')
_, knn_wine_res = cv_experiment(wine, KNeighborsClassifier, model_kwargs={'n_neighbors': 1}, data_transform='normalize')

_, gnb_random_res = cv_experiment(random, GaussianNB, data_transform='normalize')
_, gnb_adversarial_res = cv_experiment(adversarial, GaussianNB, data_transform='normalize')
_, gnb_cancer_res = cv_experiment(cancer, GaussianNB, data_transform='normalize')
_, gnb_divorce_res = cv_experiment(divorce, GaussianNB, data_transform='normalize')
_, gnb_face_res = cv_experiment(face, GaussianNB, data_transform='normalize')
_, gnb_spiral_res = cv_experiment(spiral, GaussianNB, data_transform='normalize')
_, gnb_wine_res = cv_experiment(wine, GaussianNB, data_transform='normalize')

_, svm_random_res = cv_experiment(random, SVC, data_transform='normalize')
_, svm_adversarial_res = cv_experiment(adversarial, SVC, data_transform='normalize')
_, svm_cancer_res = cv_experiment(cancer, SVC, data_transform='normalize')
_, svm_divorce_res = cv_experiment(divorce, SVC, data_transform='normalize')
_, svm_face_res = cv_experiment(face, SVC, data_transform='normalize')
_, svm_spiral_res = cv_experiment(spiral, SVC, data_transform='normalize')
_, svm_wine_res = cv_experiment(wine, SVC, data_transform='normalize')

_, mlp_random_res = cv_experiment(random, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 10000})
_, mlp_adversarial_res = cv_experiment(adversarial, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 10000})
_, mlp_cancer_res = cv_experiment(cancer, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 10000})
_, mlp_divorce_res = cv_experiment(divorce, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 10000})
_, mlp_face_res = cv_experiment(face, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 10000})
_, mlp_spiral_res = cv_experiment(spiral, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 1000})
_, mlp_wine_res = cv_experiment(wine, MLPClassifier, data_transform='normalize', model_kwargs={'max_iter': 10000})


In [99]:
knn_res = [knn_random_res, knn_adversarial_res, knn_cancer_res, knn_divorce_res, knn_face_res, knn_spiral_res, knn_wine_res]
print("KNN results")
pretty_print_results(knn_res, order)

print("\n====================================\n")

gnb_res = [gnb_random_res, gnb_adversarial_res, gnb_cancer_res, gnb_divorce_res, gnb_face_res, gnb_spiral_res, gnb_wine_res]
print("GNB results")
pretty_print_results(gnb_res, order)

print("\n====================================\n")

svm_res = [svm_random_res, svm_adversarial_res, svm_cancer_res, svm_divorce_res, svm_face_res, svm_spiral_res, svm_wine_res]
print("SVM results")
pretty_print_results(svm_res, order)

print("\n====================================\n")

mlp_res = [mlp_random_res, mlp_adversarial_res, mlp_cancer_res, mlp_divorce_res, mlp_face_res, mlp_spiral_res, mlp_wine_res]
print("MLP results")
pretty_print_results(mlp_res, order)

KNN results
             train_acc       test_acc
-----------  --------------  --------------
random       1.000 +- 0.000  0.533 +- 0.163
adversarial  1.000 +- 0.000  0.325 +- 0.127
cancer       1.000 +- 0.000  0.968 +- 0.023
divorce      1.000 +- 0.000  0.982 +- 0.027
face         1.000 +- 0.000  0.998 +- 0.005
spiral       1.000 +- 0.000  0.823 +- 0.023
wine         1.000 +- 0.000  0.859 +- 0.078


GNB results
             train_acc       test_acc
-----------  --------------  --------------
random       0.598 +- 0.035  0.517 +- 0.157
adversarial  0.522 +- 0.017  0.338 +- 0.148
cancer       0.970 +- 0.002  0.967 +- 0.021
divorce      0.976 +- 0.004  0.971 +- 0.054
face         1.000 +- 0.000  0.949 +- 0.029
spiral       0.851 +- 0.005  0.851 +- 0.032
wine         0.903 +- 0.006  0.899 +- 0.054


SVM results
             train_acc       test_acc
-----------  --------------  --------------
random       0.700 +- 0.025  0.617 +- 0.183
adversarial  0.532 +- 0.023  0.300 +- 0.100
cancer    