In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import matplotlib
import matplotlib.pyplot as plt
import time
import random

from sklearn.metrics import confusion_matrix, plot_confusion_matrix, accuracy_score, plot_roc_curve,\
                             precision_recall_curve, plot_precision_recall_curve, f1_score, average_precision_score,\
                             hinge_loss, precision_score, recall_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, KFold
from sklearn.metrics import make_scorer, accuracy_score, average_precision_score, f1_score,\
                            log_loss, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import label_binarize, LabelBinarizer, LabelEncoder, OneHotEncoder
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import resample

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import RMSprop
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical

from catboost import CatBoostClassifier

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Data load

In [2]:
fmnist = fetch_openml("Fashion-MNIST", data_home="./fmnist", cache=True)
classes = [str(x) for x in range(0, 10)]
num_classes = len(classes)

def mk_dataset(total, fmnist=fmnist, classes=classes):
    samples = int(fmnist.data.shape[0]*total)
    return resample(fmnist.data, fmnist.target, n_samples=samples)

def plot_imgs(x, y, w=28, h=28):
    plt.figure(figsize=(10,10))
    for i in range(min(25, x.shape[0])):
        plt.subplot(5,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        img = x[i]
        img = img.reshape((w, h))
        plt.imshow(img)
        plt.xlabel(y[i])
    plt.show()    

# Model evaluation

In [10]:
def binarized_scorer(metric, **kwargs):
    lb = LabelBinarizer()
    def score(y_test, y_pred, metric=metric, lb=lb, kwargs=kwargs):
        lb.fit(y_test)
        y_test = lb.transform(y_test)
        y_pred = lb.transform(y_pred)
        return metric(y_test, y_pred, **kwargs)
    return score

def mk_test(clf, name, mangling=0.0):
    def run_test(X, Y, clf=clf, name=name, mangling=mangling):
        scoring = {
            "accuracy":     binarized_scorer(accuracy_score), 
            "f1_score":     binarized_scorer(f1_score, average='macro'), 
            "log_loss":     binarized_scorer(log_loss), 
            "precision":    binarized_scorer(precision_score, average='macro'), 
            "recall":       binarized_scorer(recall_score, average='macro'), 
            "roc_auc":      binarized_scorer(roc_auc_score, average='macro'),
            # to je pole pod Precision-Recall, albo jakaś średnia. nie wiem.
            "prec_rec_auc": binarized_scorer(average_precision_score, average='macro') 
        }
        
        '''
        # Ta implementacja jest potwornie wolna dla modeli szkolonych na CPU
        # ale przynajmniej pozwala na wprowadzenie szumu do danych treningowych
        scores = {}
        Y_org = np.array(Y, copy=True)
        for train_index, test_index in KFold(n_splits=5).split(X):
            Y = np.array(Y_org, copy=True)
            to_mangle = int(len(train_index)*mangling)
            for idx in train_index[:to_mangle]:
                Y[idx] = random.choice(classes)
            clf.fit(X[train_index], Y[train_index])
            Y_true = Y[test_index]
            Y_pred = clf.predict(X[test_index])
            for (key, scorer) in scoring.items():
                if key in scores:
                    scores[key] += [scorer(Y_true, Y_pred)]
                else:
                    scores[key] = [scorer(Y_true, Y_pred)]
        '''
        
        # Bez kross-walidacji.
        scores = {}
        x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
        to_mangle = int(len(x_train)*mangling)
        for idx in range(to_mangle):
            y_train[idx] = random.choice(classes)
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        for (key, scorer) in scoring.items():
            scores[key] = [scorer(y_test, y_pred)]
    
        scores = {k: (1.0*sum(v))/len(v) for k, v in scores.items()}
        df = pd.DataFrame(scores, index=[0])
        df.insert(loc=0, column='Name', value=name)
        return df
    return run_test

# Models

In [49]:
def mk_adaboost(depth=5, n=100, seed=1):
    return AdaBoostClassifier(
        base_estimator=DecisionTreeClassifier(max_depth=depth),
        n_estimators=n,
        random_state=seed)

def mk_catboost():
    return CatBoostClassifier(iterations=1000, task_type="GPU")

class MyLittleKerasClassifier(KerasClassifier):
    # predict() nie zwracal 
    def predict(self, X):
        y_pred = KerasClassifier.predict(self, X)
        return to_categorical(y_pred, num_classes)

def mk_mlp(epochs=10):
    def build():
        model = Sequential()
        model.add(Dense(64, activation='relu', input_shape=(28*28,)))
        model.add(Dropout(0.2))
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    build().summary()
    return MyLittleKerasClassifier(build_fn=build, epochs=epochs)

def mk_cnn(epochs=10):
    def build():
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(28,28, 1), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))
        model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))
        model.add(Conv2D(128, kernel_size = 4, activation='relu'))
        model.add(BatchNormalization())
        model.add(Flatten())
        model.add(Dropout(0.4))
        model.add(Dense(10, activation='softmax'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    build().summary()
    return MyLittleKerasClassifier(build_fn=build, epochs=epochs)

# Pomiar metryk w zależności od ilości dostępnych danych

In [52]:
def test_cnn_data(data_sz):
    name = 'CNN | {}% of data'.format(data_sz*100.0)
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    x = x.reshape((x.shape[0], 28, 28, 1))
    return mk_test(mk_cnn(epochs=10), name)(x, y)

def test_mlp_data(data_sz):
    name = 'MLP | {}% of data'.format(data_sz*100.0)
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    return mk_test(mk_mlp(epochs=10), name)(x, y)

# Pomiar z użyciem time.time() to nie jest czas CPU
# ale już tam nic to. timeit() było zbyt irytujące
def test_adaboost_data(data_sz):
    print("Ada SZ: %d" % data_sz)
    name = 'AdaBoost | {}% of data'.format(data_sz*100.0)
    return mk_test(mk_adaboost(n=100), name)(*mk_dataset(data_sz))

In [53]:
tests_data = pd.concat([method(data_sz) for method in [test_adaboost_data, test_mlp_data, test_cnn_data] for data_sz in [0.1, 0.3, 0.5, 0.6, 0.8, 1.0]])
tests_data

Ada SZ: 0
Ada SZ: 0
Ada SZ: 0
Ada SZ: 0
Ada SZ: 0
Ada SZ: 1
Model: "sequential_55"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_199 (Dense)            (None, 64)                50240     
_________________________________________________________________
dropout_220 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_200 (Dense)            (None, 128)               8320      
_________________________________________________________________
dropout_221 (Dropout)        (None, 128)               0         
_________________________________________________________________
dense_201 (Dense)            (None, 256)               33024     
_________________________________________________________________
dropout_222 (Dropout)        (None, 256)               0         
___________________________________________________________

Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_61"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_235 (Dense)            (None, 64)                50240     
_________________________________________________________________
dropout_250 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_236 (Dense)            (None, 128)               8320      
_________________________________________________________________
dropout_251 (Dropout)        (None, 128)               0         
_________________________________________________________________
dense_237 (Dense)            (None, 256)               33024     
_________________________________________________________________
dropout_252 (Dropout)        (None, 256)               0         
_________________________________________________________________
dense_23

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_67"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_176 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_176 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_177 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_177 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_178 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_178 (Bat (None, 12, 12, 32)        128       
______________________________

Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_71"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_204 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_204 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_205 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_205 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_206 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_206 (Bat (None, 12, 12, 32)        128       
_______________________________________________________________

Epoch 9/10
Epoch 10/10
Model: "sequential_75"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_232 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_232 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_233 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_233 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_234 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_234 (Bat (None, 12, 12, 32)        128       
_________________________________________________________________
dropout_304 (Dropout)        (

Unnamed: 0,Name,accuracy,f1_score,log_loss,precision,recall,roc_auc,prec_rec_auc
0,AdaBoost | 10.0% of data,0.739286,0.74801,9.004752,0.753317,0.74701,0.858962,0.618273
0,AdaBoost | 30.0% of data,0.74619,0.748015,8.76627,0.75244,0.745637,0.85873,0.6242
0,AdaBoost | 50.0% of data,0.710286,0.718291,10.006377,0.728679,0.715963,0.841832,0.58349
0,AdaBoost | 60.0% of data,0.716548,0.719405,9.790098,0.732555,0.717996,0.843254,0.586704
0,AdaBoost | 80.0% of data,0.737321,0.74109,9.072596,0.745271,0.739609,0.855196,0.612381
0,AdaBoost | 100.0% of data,0.690214,0.693554,10.69962,0.709862,0.690326,0.82795,0.552017
0,MLP | 10.0% of data,0.845,0.841632,5.35351,0.843769,0.84533,0.914048,0.742177
0,MLP | 30.0% of data,0.860952,0.860672,4.802535,0.862481,0.860372,0.922443,0.769402
0,MLP | 50.0% of data,0.867571,0.864721,4.573921,0.867263,0.867241,0.926259,0.773778
0,MLP | 60.0% of data,0.871071,0.869019,4.453035,0.873707,0.870241,0.927957,0.78217


In [54]:
tests_data.to_pickle('3aa.pkl')

# Pomiar metryk w zależności od czasów treningu

In [57]:
def test_cnn_time(epochs, data_sz=0.5):
    name = 'CNN | %d epochs' % epochs
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    x = x.reshape((x.shape[0], 28, 28, 1))
    return mk_test(mk_cnn(epochs=epochs), name)(x, y)

def test_mlp_time(epochs, data_sz=0.5):
    name = 'MLP | %d epochs' % epochs
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    return mk_test(mk_mlp(epochs=epochs), name)(x, y)

# Pomiar z użyciem time.time() to nie jest czas CPU
# ale już tam nic to. timeit() było zbyt irytujące
def test_adaboost_time(epochs, data_sz=0.5):
    print("AdaBoost %d" % epochs)
    start_time = time.time()
    name = 'AdaBoost'
    result = mk_test(mk_adaboost(n=epochs), name)(*mk_dataset(data_sz))
    elapsed_time = time.time() - start_time
    result['Name'] += ' | %d secs' % (elapsed_time)
    return result

## Dla Adaboosta - czas rzeczywisty

In [58]:
tests_time = [test_adaboost_time(n) for n in [10, 25, 50, 150, 500]]

AdaBoost 10
AdaBoost 25
AdaBoost 50
AdaBoost 150
AdaBoost 500


## Dla sieci - liczba epok

In [59]:
tests_time += [method(epochs) for method in [test_mlp_time, test_cnn_time] for epochs in [1, 3, 5, 15, 25, 35]]
tests_time

Model: "sequential_79"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_283 (Dense)            (None, 64)                50240     
_________________________________________________________________
dropout_316 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_284 (Dense)            (None, 128)               8320      
_________________________________________________________________
dropout_317 (Dropout)        (None, 128)               0         
_________________________________________________________________
dense_285 (Dense)            (None, 256)               33024     
_________________________________________________________________
dropout_318 (Dropout)        (None, 256)               0         
_________________________________________________________________
dense_286 (Dense)            (None, 512)             

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_87"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_331 (Dense)            (None, 64)                50240     
_________________________________________________________________
dropout_356 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_332 (Dense)            (None, 128)               8320      
_________________________________________________________________
dropout_357 (Dropout)        (None, 128)               0         
_________________________________________________________________
dense_333 (Dense)            (None, 256)               33024     
_________________________________________________________________
dropout_358 (Dropout)    

Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
Model: "sequential_91"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_260 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_260 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_261 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_261 (Bat (None, 24, 24, 32)        128       
________________

Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "sequential_95"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_288 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_288 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_289 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_289 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_290 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_290 (Bat (None, 12, 12, 32)        128       
_________________________________________________________________
dropout_388 (Dropout)  

Model: "sequential_99"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_316 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_316 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_317 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_317 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_318 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_318 (Bat (None, 12, 12, 32)        128       
_________________________________________________________________
dropout_400 (Dropout)        (None, 12, 12, 32)      

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


[                 Name  accuracy  f1_score   log_loss  precision    recall  \
 0  AdaBoost | 41 secs  0.674143  0.660696  11.254707   0.662973  0.675028   
 
     roc_auc  prec_rec_auc  
 0  0.819431      0.517244  ,
                  Name  accuracy  f1_score   log_loss  precision    recall  \
 0  AdaBoost | 98 secs  0.673286  0.671653  11.284312   0.682087  0.675956   
 
     roc_auc  prec_rec_auc  
 0  0.819818      0.536792  ,
                   Name  accuracy  f1_score   log_loss  precision    recall  \
 0  AdaBoost | 194 secs  0.708143  0.708621  10.080389   0.720281  0.704137   
 
    roc_auc  prec_rec_auc  
 0   0.8359      0.571904  ,
                   Name  accuracy  f1_score  log_loss  precision    recall  \
 0  AdaBoost | 576 secs  0.730286  0.733719  9.315601   0.754045  0.728994   
 
     roc_auc  prec_rec_auc  
 0  0.849529      0.610027  ,
                    Name  accuracy  f1_score  log_loss  precision    recall  \
 0  AdaBoost | 1628 secs  0.771571  0.771118  7.88964

In [61]:
df = pd.concat(tests_time)
df.to_pickle('3bb.pkl')
df

Unnamed: 0,Name,accuracy,f1_score,log_loss,precision,recall,roc_auc,prec_rec_auc
0,AdaBoost | 41 secs,0.674143,0.660696,11.254707,0.662973,0.675028,0.819431,0.517244
0,AdaBoost | 98 secs,0.673286,0.671653,11.284312,0.682087,0.675956,0.819818,0.536792
0,AdaBoost | 194 secs,0.708143,0.708621,10.080389,0.720281,0.704137,0.8359,0.571904
0,AdaBoost | 576 secs,0.730286,0.733719,9.315601,0.754045,0.728994,0.849529,0.610027
0,AdaBoost | 1628 secs,0.771571,0.771118,7.889643,0.775195,0.771171,0.872892,0.648503
0,MLP | 1 epochs,0.814857,0.80775,6.394608,0.817601,0.816511,0.897969,0.701238
0,MLP | 3 epochs,0.866429,0.866259,4.613394,0.867767,0.866591,0.925871,0.774929
0,MLP | 5 epochs,0.855,0.851723,5.008123,0.853429,0.855015,0.919452,0.754478
0,MLP | 15 epochs,0.875286,0.873481,4.307479,0.876812,0.875326,0.930732,0.788716
0,MLP | 25 epochs,0.881714,0.880659,4.085444,0.881988,0.88049,0.933678,0.798737


# Moc klasyfiaktora
Tutaj trzeba przemieszać pewien % etykiet zbioru treningowego.

In [29]:
def test_cnn_rob(mangling, data_sz=0.5, epochs=10):
    name = 'CNN | {}% mangling'.format(mangling*100.0)
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    x = x.reshape((x.shape[0], 28, 28, 1))
    return mk_test(mk_cnn(epochs=epochs), name, mangling=mangling)(x, y)

def test_mlp_rob(mangling, data_sz=0.5, epochs=10):
    name = 'MLP | {}% mangling'.format(mangling*100.0)
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    return mk_test(mk_mlp(epochs=epochs), name, mangling=mangling)(x, y)

def test_adaboost_rob(mangling, data_sz=0.5, n=100):
    name = 'AdaBoost | {}% mangling'.format(mangling*100.0)
    result = mk_test(mk_adaboost(n=n), name, mangling=mangling)(*mk_dataset(data_sz))
    return result

In [51]:
#robustness_test = [method(man) for method in [test_adaboost_rob, test_mlp_rob, test_cnn_rob] for man in [0.01, 0.05, 0.15, 0.3]]
robustness_test = [method(man) for method in [test_mlp_rob, test_cnn_rob] for man in [0.0, 0.01, 0.05, 0.15, 0.3, 0.5, 1.0]]
pd.concat(robustness_test)

Model: "sequential_31"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_115 (Dense)            (None, 64)                50240     
_________________________________________________________________
dropout_124 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_116 (Dense)            (None, 128)               8320      
_________________________________________________________________
dropout_125 (Dropout)        (None, 128)               0         
_________________________________________________________________
dense_117 (Dense)            (None, 256)               33024     
_________________________________________________________________
dropout_126 (Dropout)        (None, 256)               0         
_________________________________________________________________
dense_118 (Dense)            (None, 512)             

Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_37"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_151 (Dense)            (None, 64)                50240     
_________________________________________________________________
dropout_154 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_152 (Dense)            (None, 128)               8320      
_________________________________________________________________
dropout_155 (Dropout)        (None, 128)               0         
_________________________________________________________________
dense_153 (Dense)            (None, 256)               33024     
_________________________________________________________________
dropout_156 (Dropout)        (None, 256)               0         
_________________________________________________________________
dense_154 (Dense)  

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  _warn_prf(average, modifier, msg_start, len(result))


Model: "sequential_43"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_92 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_92 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_93 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_93 (Batc (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_94 (Conv2D)           (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_94 (Batc (None, 12, 12, 32)        128       
_________________________________________________________________
dropout_184 (Dropout)        (None, 12, 12, 32)      

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_49"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_134 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_134 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_135 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_135 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_136 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_136 (Bat (None, 12, 12, 32)        128       
________

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_53"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_162 (Conv2D)          (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_162 (Bat (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_163 (Conv2D)          (None, 24, 24, 32)        9248      
_________________________________________________________________
batch_normalization_163 (Bat (None, 24, 24, 32)        128       
_________________________________________________________________
conv2d_164 (Conv2D)          (None, 12, 12, 32)        25632     
_________________________________________________________________
batch_normalization_164 (Bat (None, 12, 12, 32)        128       
____________________________________________________

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Name,accuracy,f1_score,log_loss,precision,recall,roc_auc,prec_rec_auc
0,MLP | 1.0% mangling,0.727714,0.706931,9.404415,0.735001,0.726581,0.848189,0.594932
0,MLP | 5.0% mangling,0.690714,0.675783,10.68235,0.73916,0.688164,0.826941,0.533126
0,MLP | 15.0% mangling,0.776429,0.747428,7.721884,0.737148,0.775088,0.875103,0.662083
0,MLP | 30.0% mangling,0.726429,0.694929,9.448822,0.743245,0.729286,0.849453,0.600243
0,MLP | 50.0% mangling,0.774,0.759196,7.805763,0.777235,0.773175,0.874046,0.651529
0,MLP | 100.0% mangling,0.097857,0.017827,31.15891,0.009786,0.1,0.5,0.1
0,CNN | 1.0% mangling,0.862571,0.85923,4.746615,0.862418,0.861175,0.922965,0.766735
0,CNN | 5.0% mangling,0.829714,0.818981,5.88146,0.833432,0.828163,0.904632,0.718065
0,CNN | 15.0% mangling,0.803571,0.79365,6.784403,0.807451,0.806404,0.892313,0.68542
0,CNN | 30.0% mangling,0.807429,0.805027,6.651182,0.810626,0.808755,0.893671,0.69083


In [62]:
df = pd.conrobustness_test

[                  Name  accuracy  f1_score  log_loss  precision    recall  \
 0  MLP | 1.0% mangling  0.727714  0.706931  9.404415   0.735001  0.726581   
 
     roc_auc  prec_rec_auc  
 0  0.848189      0.594932  ,
                   Name  accuracy  f1_score  log_loss  precision    recall  \
 0  MLP | 5.0% mangling  0.690714  0.675783  10.68235    0.73916  0.688164   
 
     roc_auc  prec_rec_auc  
 0  0.826941      0.533126  ,
                    Name  accuracy  f1_score  log_loss  precision    recall  \
 0  MLP | 15.0% mangling  0.776429  0.747428  7.721884   0.737148  0.775088   
 
     roc_auc  prec_rec_auc  
 0  0.875103      0.662083  ,
                    Name  accuracy  f1_score  log_loss  precision    recall  \
 0  MLP | 30.0% mangling  0.726429  0.694929  9.448822   0.743245  0.729286   
 
     roc_auc  prec_rec_auc  
 0  0.849453      0.600243  ,
                    Name  accuracy  f1_score  log_loss  precision    recall  \
 0  MLP | 50.0% mangling     0.774  0.759196  7.8