# Read Data Sample

In [1]:
import pandas as pd
import numpy as np
import os
from collections import namedtuple
pd.set_option("display.max_rows",100)
#%matplotlib inline

In [2]:
class dataset:
    kdd_train = pd.read_pickle("dataset/kdd_train.pkl")
    kdd_test = pd.read_pickle("dataset/kdd_test.pkl")

    kdd_train_label = pd.read_pickle("dataset/kdd_train_label.pkl")
    kdd_test_label = pd.read_pickle("dataset/kdd_test_label.pkl")

    

In [3]:
dataset.kdd_train.shape

(381105, 161)

In [4]:
dataset.kdd_test.shape

(350596, 161)

In [5]:
from sklearn import model_selection as ms
from sklearn import preprocessing as pp

class preprocess:
    
    output_columns_2labels = ['label']
    
    x_input = dataset.kdd_train
    y_output = dataset.kdd_train_label

    x_test_input = dataset.kdd_test
    y_test = dataset.kdd_test_label
    
    ss = pp.StandardScaler()

    x_train = ss.fit_transform(x_input)
    x_test = ss.transform(x_test_input)
    
    y_train = y_output.values
    y_test = y_test.values
    
preprocess.x_train.std()

1.0000000000000331

In [6]:
import tensorflow as tf


In [7]:
class network(object):
    
    input_dim = 161
    classes = 2
    hidden_encoder_dim = 161
    hidden_layers = 1
    latent_dim = 18

    def __init__(self, classes, hidden_layers, num_of_features):
        self.classes = classes
        self.hidden_layers = hidden_layers
        self.latent_dim = num_of_features
            
    def build_layers(self):
        tf.reset_default_graph()
        #learning_rate = tf.Variable(initial_value=0.001)

        input_dim = self.input_dim
        classes = self.classes
        hidden_encoder_dim = self.hidden_encoder_dim
        hidden_layers = self.hidden_layers
        latent_dim = self.latent_dim
        
        with tf.variable_scope("Input"):
            self.x = tf.placeholder("float", shape=[None, input_dim])
            self.y_ = tf.placeholder("float", shape=[None, classes])
            self.keep_prob = tf.placeholder("float")
            self.lr = tf.placeholder("float")
        
        with tf.variable_scope("Layer_Encoder"):

            hidden_encoder = tf.layers.dense(self.x, hidden_encoder_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
            hidden_encoder = tf.nn.dropout(hidden_encoder, self.keep_prob)
            for h in range(hidden_layers - 1):
                hidden_encoder = tf.layers.dense(hidden_encoder, latent_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                hidden_encoder = tf.nn.dropout(hidden_encoder, self.keep_prob)
            
            #hidden_encoder = tf.layers.dense(self.x, latent_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
            #hidden_encoder = tf.nn.dropout(hidden_encoder, self.keep_prob)
            
        with tf.variable_scope("Layer_Dense_Softmax"):
            self.y = tf.layers.dense(hidden_encoder, classes, activation=tf.nn.softmax)
            
        with tf.variable_scope("Loss"):
            
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y_, logits = self.y))

            #loss = tf.clip_by_value(loss, -1e-1, 1e-1)
            #loss = tf.where(tf.is_nan(loss), 1e-1, loss)
            #loss = tf.where(tf.equal(loss, -1e-1), tf.random_normal(loss.shape), loss)
            #loss = tf.where(tf.equal(loss, 1e-1), tf.random_normal(loss.shape), loss)
            
            self.regularized_loss = loss
            correct_prediction = tf.equal(tf.argmax(self.y_, 1), tf.argmax(self.y, 1))
            self.tf_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = "Accuracy")

        with tf.variable_scope("Optimizer"):
            learning_rate=self.lr
            optimizer = tf.train.AdamOptimizer(learning_rate)
            gradients, variables = zip(*optimizer.compute_gradients(self.regularized_loss))
            gradients = [
                None if gradient is None else tf.clip_by_value(gradient, -1, 1)
                for gradient in gradients]
            self.train_op = optimizer.apply_gradients(zip(gradients, variables))
            #self.train_op = optimizer.minimize(self.regularized_loss)
            
        # add op for merging summary
        #self.summary_op = tf.summary.merge_all()
        self.pred = tf.argmax(self.y, axis = 1)
        self.actual = tf.argmax(self.y_, axis = 1)

        # add Saver ops
        self.saver = tf.train.Saver()
        

In [8]:
import collections
import time

class Train:    
    
    result = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score', 'time_taken'])

    predictions = {}
    results = []
    
    best_acc = 0
    best_acc_global = 0
    
    def train(epochs, net, h,f, lrs):
        batch_iterations = 200
        train_loss = None
        Train.best_acc = 0
        os.makedirs("dataset/tf_dense_only_nsl_kdd/hidden layers_{}_features count_{}".format(epochs,h,f),
                    exist_ok = True)
        with tf.Session() as sess:
            #summary_writer_train = tf.summary.FileWriter('./logs/kdd/VAE/training', graph=sess.graph)
            #summary_writer_valid = tf.summary.FileWriter('./logs/kdd/VAE/validation')

            sess.run(tf.global_variables_initializer())
            start_time = time.perf_counter()
            for c, lr in enumerate(lrs):
                for epoch in range(1, (epochs+1)):
                    x_train, x_valid, y_train, y_valid, = ms.train_test_split(preprocess.x_train, 
                                                                              preprocess.y_train, 
                                                                              test_size=0.1)
                    batch_indices = np.array_split(np.arange(x_train.shape[0]), 
                                               batch_iterations)

                    for i in batch_indices:

                        def train_batch():
                            nonlocal train_loss
                            _, train_loss = sess.run([net.train_op, 
                                                               net.regularized_loss, 
                                                               ], #net.summary_op
                                                              feed_dict={net.x: x_train[i,:], 
                                                                         net.y_: y_train[i,:], 
                                                                         net.keep_prob:0.5, net.lr:lr})

                        train_batch()
                        #summary_writer_train.add_summary(summary_str, epoch)
                        while((train_loss > 1e4 or np.isnan(train_loss)) and epoch > 1):
                            print("Step {} | Training Loss: {:.6f}".format(epoch, train_loss))
                            net.saver.restore(sess, 
                                              tf.train.latest_checkpoint('dataset/tf_dense_only_nsl_kdd/hidden_layers_{}_features_count_{}'
                                                                         .format(epochs,h,f)))
                            train_batch()


                    valid_accuracy = sess.run(net.tf_accuracy, #net.summary_op 
                                                          feed_dict={net.x: x_valid, 
                                                                     net.y_: y_valid, 
                                                                     net.keep_prob:1, net.lr:lr})
                    #summary_writer_valid.add_summary(summary_str, epoch)


                    accuracy, pred_value, actual_value, y_pred = sess.run([net.tf_accuracy, 
                                                                   net.pred, 
                                                                   net.actual, net.y], 
                                                                  feed_dict={net.x: preprocess.x_test, 
                                                                             net.y_: preprocess.y_test, 
                                                                             net.keep_prob:1, net.lr:lr})

                    print("Step {} | Training Loss: {:.6f} | Validation Accuracy: {:.6f}".format(epoch, train_loss, valid_accuracy))
                    print("Accuracy on Test data: {}".format(accuracy))

                    if accuracy > Train.best_acc_global:
                        Train.best_acc_global = accuracy
                        Train.pred_value = pred_value
                        Train.actual_value = actual_value

                        Train.best_parameters = "Hidden Layers:{}, Features Count:{}".format(h, f)

                    if accuracy > Train.best_acc:
                        Train.best_acc = accuracy

                        if not (np.isnan(train_loss)):
                            net.saver.save(sess, 
                                       "dataset/tf_dense_only_nsl_kdd/hidden_layers_{}_features_count_{}".format(h,f),
                                        global_step = epochs)
                        curr_pred = pd.DataFrame({"Attack_prob":y_pred[:,-2], "Normal_prob":y_pred[:, -1], "Prediction":pred_value})
                        Train.predictions.update({"{}_{}_{}".format((epoch+1)*(c+1),f,h):(curr_pred, 
                                                   Train.result((epoch+1)*(c+1), f, h, valid_accuracy, accuracy, time.perf_counter() - start_time))})

                        #Train.results.append(Train.result(epochs, f, h,valid_accuracy, accuracy))


In [9]:
import itertools

df_results = []
past_scores = []

class Hyperparameters:
#    features_arr = [2, 4, 8, 16, 32, 64, 128, 256]
#    hidden_layers_arr = [2, 4, 6, 10]

    def start_training():
        global df_results
        global past_scores
        Train.predictions = {}
        Train.results = []
    
        
        features_arr = [1, 8, 32, 122]
        hidden_layers_arr = [1, 3, 5]

        epochs = [10]
        lrs = [1e-5, 1e-5, 1e-6]
        for e, h, f in itertools.product(epochs, hidden_layers_arr, features_arr):
            print("Current Layer Attributes - epochs:{} hidden layers:{} features count:{}".format(e,h,f))
            n = network(2,h,f)
            n.build_layers()
            Train.train(e, n, h,f, lrs)
            
        dict1 = {}
        dict2 = []
        for k, (v1, v2) in Train.predictions.items():
            dict1.update({k: v1})
            dict2.append(v2)
        Train.predictions = dict1
        Train.results = dict2
        df_results = pd.DataFrame(Train.results)

        temp = df_results.set_index(['no_of_features', 'hidden_layers'])

        if not os.path.isfile('dataset/tf_dense_only_nsl_kdd_scores_all.pkl'):
            past_scores = temp
        else:
            past_scores = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_scores_all.pkl")

        past_scores.append(temp).to_pickle("dataset/tf_dense_only_nsl_kdd_scores_all.pkl")

In [10]:
%%timeit -r 1
Hyperparameters.start_training()

Current Layer Attributes - epochs:10 hidden layers:1 features count:1
Step 1 | Training Loss: 0.587173 | Validation Accuracy: 0.878801
Accuracy on Test data: 0.9350563287734985
Step 2 | Training Loss: 0.548769 | Validation Accuracy: 0.892000
Accuracy on Test data: 0.9334362149238586
Step 3 | Training Loss: 0.524256 | Validation Accuracy: 0.891213
Accuracy on Test data: 0.9315251708030701
Step 4 | Training Loss: 0.510331 | Validation Accuracy: 0.892603
Accuracy on Test data: 0.9315365552902222
Step 5 | Training Loss: 0.485706 | Validation Accuracy: 0.897510
Accuracy on Test data: 0.9315422773361206
Step 6 | Training Loss: 0.477152 | Validation Accuracy: 0.896880
Accuracy on Test data: 0.93146812915802
Step 7 | Training Loss: 0.455935 | Validation Accuracy: 0.897195
Accuracy on Test data: 0.9297453761100769
Step 8 | Training Loss: 0.450861 | Validation Accuracy: 0.895411
Accuracy on Test data: 0.9297453761100769
Step 9 | Training Loss: 0.448465 | Validation Accuracy: 0.894283
Accuracy on

Step 6 | Training Loss: 0.436792 | Validation Accuracy: 0.895804
Accuracy on Test data: 0.927839994430542
Step 7 | Training Loss: 0.426808 | Validation Accuracy: 0.894624
Accuracy on Test data: 0.9278856515884399
Step 8 | Training Loss: 0.423192 | Validation Accuracy: 0.896985
Accuracy on Test data: 0.9279027581214905
Step 9 | Training Loss: 0.420419 | Validation Accuracy: 0.899635
Accuracy on Test data: 0.927919864654541
Step 10 | Training Loss: 0.427853 | Validation Accuracy: 0.899452
Accuracy on Test data: 0.9279341697692871
Step 1 | Training Loss: 0.417630 | Validation Accuracy: 0.897589
Accuracy on Test data: 0.9279341697692871
Step 2 | Training Loss: 0.418054 | Validation Accuracy: 0.897011
Accuracy on Test data: 0.9279341697692871
Step 3 | Training Loss: 0.429221 | Validation Accuracy: 0.897352
Accuracy on Test data: 0.9279341697692871
Step 4 | Training Loss: 0.421849 | Validation Accuracy: 0.896959
Accuracy on Test data: 0.9279341697692871
Step 5 | Training Loss: 0.440685 | Val

Step 1 | Training Loss: 0.628038 | Validation Accuracy: 0.876571
Accuracy on Test data: 0.6054261922836304
Step 2 | Training Loss: 0.612172 | Validation Accuracy: 0.882685
Accuracy on Test data: 0.9393176436424255
Step 3 | Training Loss: 0.594724 | Validation Accuracy: 0.889428
Accuracy on Test data: 0.941804826259613
Step 4 | Training Loss: 0.577543 | Validation Accuracy: 0.891580
Accuracy on Test data: 0.9421927332878113
Step 5 | Training Loss: 0.568255 | Validation Accuracy: 0.889822
Accuracy on Test data: 0.9421955943107605
Step 6 | Training Loss: 0.559208 | Validation Accuracy: 0.890347
Accuracy on Test data: 0.9423952102661133
Step 7 | Training Loss: 0.543535 | Validation Accuracy: 0.891003
Accuracy on Test data: 0.9423952102661133
Step 8 | Training Loss: 0.543667 | Validation Accuracy: 0.888903
Accuracy on Test data: 0.9423952102661133
Step 9 | Training Loss: 0.545873 | Validation Accuracy: 0.890425
Accuracy on Test data: 0.9423952102661133
Step 10 | Training Loss: 0.534213 | Va

Step 7 | Training Loss: 0.426209 | Validation Accuracy: 0.888851
Accuracy on Test data: 0.9423924088478088
Step 8 | Training Loss: 0.424587 | Validation Accuracy: 0.890268
Accuracy on Test data: 0.9423924088478088
Step 9 | Training Loss: 0.422928 | Validation Accuracy: 0.893863
Accuracy on Test data: 0.9423924088478088
Step 10 | Training Loss: 0.423436 | Validation Accuracy: 0.890557
Accuracy on Test data: 0.9423924088478088
Step 1 | Training Loss: 0.413036 | Validation Accuracy: 0.892367
Accuracy on Test data: 0.9423924088478088
Step 2 | Training Loss: 0.417628 | Validation Accuracy: 0.892210
Accuracy on Test data: 0.9423924088478088
Step 3 | Training Loss: 0.420000 | Validation Accuracy: 0.889140
Accuracy on Test data: 0.9423924088478088
Step 4 | Training Loss: 0.419027 | Validation Accuracy: 0.889612
Accuracy on Test data: 0.9423924088478088
Step 5 | Training Loss: 0.424428 | Validation Accuracy: 0.889559
Accuracy on Test data: 0.9423924088478088
Step 6 | Training Loss: 0.423793 | V

Step 2 | Training Loss: 0.771677 | Validation Accuracy: 0.242004
Accuracy on Test data: 0.05761047080159187
Step 3 | Training Loss: 0.744469 | Validation Accuracy: 0.653040
Accuracy on Test data: 0.06461282819509506
Step 4 | Training Loss: 0.735598 | Validation Accuracy: 0.818215
Accuracy on Test data: 0.06595911085605621
Step 5 | Training Loss: 0.712286 | Validation Accuracy: 0.854819
Accuracy on Test data: 0.19207008183002472
Step 6 | Training Loss: 0.703906 | Validation Accuracy: 0.869460
Accuracy on Test data: 0.3635922968387604
Step 7 | Training Loss: 0.692323 | Validation Accuracy: 0.885545
Accuracy on Test data: 0.39550936222076416
Step 8 | Training Loss: 0.681535 | Validation Accuracy: 0.889297
Accuracy on Test data: 0.44743236899375916
Step 9 | Training Loss: 0.670812 | Validation Accuracy: 0.892052
Accuracy on Test data: 0.9451990127563477
Step 10 | Training Loss: 0.660675 | Validation Accuracy: 0.891134
Accuracy on Test data: 0.9454500079154968
Step 1 | Training Loss: 0.6531

Step 8 | Training Loss: 0.422205 | Validation Accuracy: 0.897510
Accuracy on Test data: 0.9420700669288635
Step 9 | Training Loss: 0.420922 | Validation Accuracy: 0.896723
Accuracy on Test data: 0.941950261592865
Step 10 | Training Loss: 0.420715 | Validation Accuracy: 0.897799
Accuracy on Test data: 0.9416878819465637
Step 1 | Training Loss: 0.419609 | Validation Accuracy: 0.896355
Accuracy on Test data: 0.9416108727455139
Step 2 | Training Loss: 0.413270 | Validation Accuracy: 0.896670
Accuracy on Test data: 0.9415909051895142
Step 3 | Training Loss: 0.421535 | Validation Accuracy: 0.897195
Accuracy on Test data: 0.9415566921234131
Step 4 | Training Loss: 0.421850 | Validation Accuracy: 0.897274
Accuracy on Test data: 0.9415310025215149
Step 5 | Training Loss: 0.431131 | Validation Accuracy: 0.895594
Accuracy on Test data: 0.9414710998535156
Step 6 | Training Loss: 0.412100 | Validation Accuracy: 0.898901
Accuracy on Test data: 0.9414682388305664
Step 7 | Training Loss: 0.413879 | Va

Step 3 | Training Loss: 0.581684 | Validation Accuracy: 0.877699
Accuracy on Test data: 0.057793013751506805
Step 4 | Training Loss: 0.550572 | Validation Accuracy: 0.890242
Accuracy on Test data: 0.05779872089624405
Step 5 | Training Loss: 0.526829 | Validation Accuracy: 0.891108
Accuracy on Test data: 0.057807277888059616
Step 6 | Training Loss: 0.514268 | Validation Accuracy: 0.894466
Accuracy on Test data: 0.05782153829932213
Step 7 | Training Loss: 0.497188 | Validation Accuracy: 0.893417
Accuracy on Test data: 0.057807277888059616
Step 8 | Training Loss: 0.480563 | Validation Accuracy: 0.893941
Accuracy on Test data: 0.05781012773513794
Step 9 | Training Loss: 0.471980 | Validation Accuracy: 0.896434
Accuracy on Test data: 0.05781298130750656
Step 10 | Training Loss: 0.455577 | Validation Accuracy: 0.894309
Accuracy on Test data: 0.057807277888059616
Step 1 | Training Loss: 0.458449 | Validation Accuracy: 0.894597
Accuracy on Test data: 0.05781012773513794
Step 2 | Training Loss:

Step 8 | Training Loss: 0.596816 | Validation Accuracy: 0.888352
Accuracy on Test data: 0.9340351819992065
Step 9 | Training Loss: 0.593813 | Validation Accuracy: 0.888405
Accuracy on Test data: 0.9340465664863586
Step 10 | Training Loss: 0.593705 | Validation Accuracy: 0.889848
Accuracy on Test data: 0.934708297252655
Step 1 | Training Loss: 0.586440 | Validation Accuracy: 0.890976
Accuracy on Test data: 0.934708297252655
Step 2 | Training Loss: 0.585853 | Validation Accuracy: 0.891160
Accuracy on Test data: 0.9355383515357971
Step 3 | Training Loss: 0.584018 | Validation Accuracy: 0.891265
Accuracy on Test data: 0.9365765452384949
Step 4 | Training Loss: 0.583930 | Validation Accuracy: 0.890740
Accuracy on Test data: 0.9366193413734436
Step 5 | Training Loss: 0.593543 | Validation Accuracy: 0.892236
Accuracy on Test data: 0.9366307854652405
Step 6 | Training Loss: 0.585774 | Validation Accuracy: 0.890504
Accuracy on Test data: 0.9366307854652405
Step 7 | Training Loss: 0.578790 | Val

Step 3 | Training Loss: 0.690804 | Validation Accuracy: 0.890058
Accuracy on Test data: 0.9423924088478088
Step 4 | Training Loss: 0.689999 | Validation Accuracy: 0.890032
Accuracy on Test data: 0.9423924088478088
Step 5 | Training Loss: 0.689266 | Validation Accuracy: 0.889035
Accuracy on Test data: 0.9423924088478088
Step 6 | Training Loss: 0.688585 | Validation Accuracy: 0.890845
Accuracy on Test data: 0.9423924088478088
Step 7 | Training Loss: 0.687697 | Validation Accuracy: 0.889350
Accuracy on Test data: 0.9423924088478088
Step 8 | Training Loss: 0.686966 | Validation Accuracy: 0.891134
Accuracy on Test data: 0.9423924088478088
Step 9 | Training Loss: 0.686206 | Validation Accuracy: 0.889953
Accuracy on Test data: 0.9423924088478088
Step 10 | Training Loss: 0.685519 | Validation Accuracy: 0.890740
Accuracy on Test data: 0.9423924088478088
Step 1 | Training Loss: 0.684807 | Validation Accuracy: 0.892420
Accuracy on Test data: 0.9423924088478088
Step 2 | Training Loss: 0.683733 | V

Step 9 | Training Loss: 0.448530 | Validation Accuracy: 0.892341
Accuracy on Test data: 0.9423924088478088
Step 10 | Training Loss: 0.451908 | Validation Accuracy: 0.889533
Accuracy on Test data: 0.9423924088478088
Step 1 | Training Loss: 0.424899 | Validation Accuracy: 0.891449
Accuracy on Test data: 0.9423924088478088
Step 2 | Training Loss: 0.444370 | Validation Accuracy: 0.890242
Accuracy on Test data: 0.9423924088478088
Step 3 | Training Loss: 0.452313 | Validation Accuracy: 0.889481
Accuracy on Test data: 0.9423924088478088
Step 4 | Training Loss: 0.438309 | Validation Accuracy: 0.888536
Accuracy on Test data: 0.9423924088478088
Step 5 | Training Loss: 0.438170 | Validation Accuracy: 0.893443
Accuracy on Test data: 0.9423924088478088
Step 6 | Training Loss: 0.433253 | Validation Accuracy: 0.892367
Accuracy on Test data: 0.9423924088478088
Step 7 | Training Loss: 0.452754 | Validation Accuracy: 0.889218
Accuracy on Test data: 0.9423924088478088
Step 8 | Training Loss: 0.444480 | V

In [11]:
g = df_results.groupby(by=['no_of_features'])
idx = g['test_score'].transform(max) == df_results['test_score']
df_results[idx].sort_values(by = 'test_score', ascending = False)

Unnamed: 0,epoch,no_of_features,hidden_layers,train_score,test_score,time_taken
40,6,32,3,0.89095,0.942392,34.053331
42,3,122,3,0.892997,0.942392,20.054354
43,2,1,5,0.890137,0.942392,3.965397
64,8,32,5,0.890058,0.942392,36.775338
65,2,122,5,0.889428,0.942392,8.897665
57,20,8,5,0.892524,0.942244,82.1684


In [13]:
df_results.sort_values(by = 'test_score', ascending = False)

Unnamed: 0,epoch,no_of_features,hidden_layers,train_score,test_score,time_taken
65,2,122,5,0.889428,0.942392,8.897665
64,8,32,5,0.890058,0.942392,36.775338
43,2,1,5,0.890137,0.942392,3.965397
42,3,122,3,0.892997,0.942392,20.054354
40,6,32,3,0.89095,0.942392,34.053331
63,7,32,5,0.889822,0.94239,31.516529
41,2,122,3,0.85718,0.942352,9.713215
62,6,32,5,0.892,0.942318,26.300749
2,5,1,1,0.887644,0.942313,14.582301
1,4,1,1,0.890084,0.942307,10.903652


In [14]:
pd.Panel(Train.predictions).to_pickle("dataset/tf_dense_only_nsl_kdd_predictions.pkl")
df_results.to_pickle("dataset/tf_dense_only_nsl_kdd_scores.pkl")


In [15]:
import numpy as np
import matplotlib.pyplot as plt
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    np.set_printoptions(precision=4)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j].round(4),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def plot(actual_value, pred_value):
    from sklearn.metrics import confusion_matrix
    cm_2labels = confusion_matrix(y_pred = pred_value, y_true = actual_value)
    plt.figure(figsize=[6,6])
    plot_confusion_matrix(cm_2labels, preprocess.output_columns_2labels, normalize = True,
                         title = Train.best_parameters)

In [19]:
plot(actual_value = Train.actual_value, pred_value = Train.pred_value)

Normalized confusion matrix
[[ 0.9958  0.0042]
 [ 0.8781  0.1219]]


In [None]:
psg = past_scores.groupby(by=['no_of_features', 'hidden_layers'])
psg.test_score.mean()

In [None]:
past_scores

In [18]:
psg = past_scores.groupby(by=['no_of_features', 'hidden_layers'])
psg.mean().test_score

no_of_features  hidden_layers
1               1                0.935056
                3                0.942392
                5                0.942392
8               1                0.722690
                3                0.885555
                5                0.942386
32              1                0.716910
                3                0.941874
                5                0.353504
122             1                0.941660
                3                0.942392
                5                0.942392
Name: test_score, dtype: float64