In [9]:
# Test a deep learning model and evaluate precision, recall and f1 score for the model
import kf_ml_lib as kf
import numpy as np
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split

from keras import backend as K
from keras import models
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier

def precision(y_true, y_pred):	
    """Precision metric.	
    Only computes a batch-wise average of precision. Computes the precision, a
    metric for multi-label classification of how many selected items are
    relevant.
    """	
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))	
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))	
    precision = true_positives / (predicted_positives + K.epsilon())	
    return precision

def recall(y_true, y_pred):	
    """Recall metric.	
    Only computes a batch-wise average of recall. Computes the recall, a metric
    for multi-label classification of how many relevant items are selected.	
    """	
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))	
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))	
    recall = true_positives / (possible_positives + K.epsilon())	
    return recall

def f1_score(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return (2 * p * r) / (p + r + K.epsilon())


dataset_path = "../Datasets/CTU-13/Pre-processed/1.csv"
dataset = kf.load_dataset(dataset_path)
X, y = kf.split_dataset(dataset, extended=False)
#X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, stratify=y)
del dataset


def make_ffnn_model():
    model = models.Sequential()
    
    model.add(Dense(5192, input_dim=6, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[precision, recall, f1_score])       
    
    return model
              
model = KerasClassifier(build_fn=make_ffnn_model, epochs=2, batch_size=500, verbose=1)

scoring = ['precision_macro', 'recall_macro']
                
results = cross_validate(model, X, y, cv=10, scoring=scoring, n_jobs=1, verbose=0)


fit_time = np.mean(results['fit_time'])
precision = np.mean(results['test_precision_macro'])
recall = np.mean(results['test_recall_macro'])
f1_score = kf.calc_f1_score(precision, recall)

print(fit_time)
print(precision)
print(recall)
print(f1_score)

  mask |= (ar1 == a)


ValueError: validation_size is not a legal parameter

In [None]:
sigmoid:
    batch size = 500:
        epochs = 2 :::: f1 =  0.6106146020685346
        epochs = 5 :::: f1 =  0.6616933852796377


In [1]:
# Test a deep learning model and evaluate precision, recall and f1 score for the model
import kf_ml_lib as kf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras import backend as K
from keras import models, callbacks
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier

def precision(y_true, y_pred):	
    """Precision metric.	
    Only computes a batch-wise average of precision. Computes the precision, a
    metric for multi-label classification of how many selected items are
    relevant.
    """	
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))	
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))	
    precision = true_positives / (predicted_positives + K.epsilon())	
    return precision

def recall(y_true, y_pred):	
    """Recall metric.	
    Only computes a batch-wise average of recall. Computes the recall, a metric
    for multi-label classification of how many relevant items are selected.	
    """	
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))	
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))	
    recall = true_positives / (possible_positives + K.epsilon())	
    return recall

def f1_score(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return (2 * p * r) / (p + r + K.epsilon())



def make_ffnn_model():
    model = models.Sequential()
    
    model.add(Dense(5196, input_dim=6, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', precision, recall, f1_score])       
    
    return model

dataset_path = "../Datasets/CTU-13/Pre-processed/1.csv"
dataset = kf.load_dataset(dataset_path)
X, y = kf.split_dataset(dataset, extended=False)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, stratify=y)
del dataset
    

model = KerasClassifier(build_fn=make_ffnn_model, epochs=10, batch_size=2024, verbose=2)

history = model.fit(X_train, y_train, validation_split=0.2)

predictions = model.predict(X_test)

unique = []

for prediction in predictions:
    if prediction[0] not in unique:
        unique.append(prediction[0])

print(unique)

# Plot training & validation accuracy values
plt.plot(history.history['f1_score'])
plt.plot(history.history['val_f1_score'])
plt.title('Model F1-Score')
plt.ylabel('F1-Score')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()


Using TensorFlow backend.
  mask |= (ar1 == a)


Train on 1581796 samples, validate on 395449 samples
Epoch 1/10
 - 7s - loss: 4.8464 - acc: 0.9811 - precision: 0.9843 - recall: 0.9954 - f1_score: 0.9897 - val_loss: 10.4322 - val_acc: 0.9857 - val_precision: 0.9857 - val_recall: 1.0000 - val_f1_score: 0.9928
Epoch 2/10
 - 7s - loss: 2.6459 - acc: 0.9774 - precision: 0.9856 - recall: 0.9916 - f1_score: 0.9879 - val_loss: 0.3081 - val_acc: 0.9857 - val_precision: 0.9857 - val_recall: 1.0000 - val_f1_score: 0.9928
Epoch 3/10
 - 7s - loss: 3.9901 - acc: 0.9786 - precision: 0.9856 - recall: 0.9928 - f1_score: 0.9888 - val_loss: 0.4121 - val_acc: 0.9857 - val_precision: 0.9857 - val_recall: 1.0000 - val_f1_score: 0.9928
Epoch 4/10
 - 7s - loss: 3.4217 - acc: 0.9808 - precision: 0.9856 - recall: 0.9950 - f1_score: 0.9901 - val_loss: 2.7616 - val_acc: 0.9517 - val_precision: 0.9897 - val_recall: 0.9610 - val_f1_score: 0.9751
Epoch 5/10
 - 7s - loss: 9.9258 - acc: 0.9751 - precision: 0.9858 - recall: 0.9891 - f1_score: 0.9867 - val_loss: 3.57

<Figure size 640x480 with 1 Axes>

In [3]:
pred = 0
yc = 0

for prediction in predictions:
    if prediction == 'Botnet':
        pred += 1
for y in y_test:
    if y == 'Botnet':
        yc += 1
        
    
print(pred)
print(yc)

0
12288


In [57]:
y_train_botnet = 0
for item in y_train:
    if item == 'Botnet':
        y_train_botnet += 1
    
y_test_botnet = 0
for item in y_test:
    if item == 'Botnet':
        y_test_botnet += 1
        
print("y_train botnet % of train set = ", (y_train_botnet / len(y_train)))
print("y_test botnet % of test set = ", (y_test_botnet / len(y_test)))

y_train botnet % of train set =  0.00828385427982098
y_test botnet % of test set =  0.00828383310670579


In [1]:
# Test a deep learning model and evaluate precision, recall and f1 score for the model
import kf_ml_lib as kf
import numpy as np
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split

from keras import backend as K
from keras import models
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier

def precision(y_true, y_pred):	
    """Precision metric.	
    Only computes a batch-wise average of precision. Computes the precision, a
    metric for multi-label classification of how many selected items are
    relevant.
    """	
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))	
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))	
    precision = true_positives / (predicted_positives + K.epsilon())	
    return precision

def recall(y_true, y_pred):	
    """Recall metric.	
    Only computes a batch-wise average of recall. Computes the recall, a metric
    for multi-label classification of how many relevant items are selected.	
    """	
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))	
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))	
    recall = true_positives / (possible_positives + K.epsilon())	
    return recall

def f1_score(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return (2 * p * r) / (p + r + K.epsilon())


dataset_path = "../Datasets/CTU-13/Pre-processed_Extended/3.csv"
dataset = kf.load_dataset(dataset_path)

feature_vector_columns = ['sTos','dTos','SrcWin','DstWin','sHops','dHops',
                          'sTtl','dTtl','TcpRtt','SynAck','AckDat','SrcPkts',
                          'DstPkts','SrcBytes','DstBytes','SAppBytes','DAppBytes',
                          'Dur','TotPkts','TotBytes','TotAppByte','Rate','SrcRate','DstRate']

label_vector_column = ['Label']

X = dataset.loc[:, feature_vector_columns]
y = dataset.loc[:, label_vector_column]
y = np.ravel(y)

del dataset


def make_ffnn_model():
    model = models.Sequential()
    
    model.add(Dense(5192, input_dim=24, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[precision, recall, f1_score])       
    
    return model
              
model = KerasClassifier(build_fn=make_ffnn_model, epochs=4, batch_size=100, verbose=1)

scoring = ['precision_macro', 'recall_macro']
                
results = cross_validate(model, X, y, cv=10, scoring=scoring, n_jobs=1, verbose=0)

print("\n\n\ntest_precision_macro results = ", results['test_precision_macro'])

fit_time = np.mean(results['fit_time'])
precision = np.mean(results['test_precision_macro'])
recall = np.mean(results['test_recall_macro'])
f1_score = kf.calc_f1_score(precision, recall)

print(fit_time)
print(precision)
print(recall)
print(f1_score)


Using TensorFlow backend.
  mask |= (ar1 == a)


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4



test_precision_macro results =  [0.53702748 0.53614352 0.57894631 0.98526968 0.99265086 0.49999257
 0.50880282 0.98669874 0.99532932 0.86266603]
553.6836472511292
0.7483527316435898
0.9054869001977753
0.819455021132894


In [None]:
#1
    # base dataset
        # without sport dport
        score = 0.5957670972341521
        # with sport dport
        score = 0.6820977367235456
    # extended dataset
        # without sport dport
            # batch size = 100, epochs = 6
            score = 0.8350587807579647
        # with sport dport
            # batch size = 100, epochs = 6
            score = 0.8350587807579647
            # batch size = 250, epochs = 4
            score = 0.81939769947856
            # batch size = 500, epochs = 2
            score = 0.8079480571973956
#3
    # extended
        #without sport dport
            #batch size = 100, epochs = 4