In [None]:
## Import dependencies
import numpy as np
import pickle
import pandas
import re
import glob
import datetime
import tensorflow as tf
import itertools
import math
import random
#from gensim.models.word2vec import Word2Vec
from collections import Counter
from sklearn.metrics import log_loss, auc, roc_curve
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras import backend as K
from keras.layers import *
from keras.engine.topology import Input
from keras.models import Model, Sequential
from keras.utils import np_utils, to_categorical
from keras.optimizers import TFOptimizer, RMSprop

## Set random seeds for reproducibility
np.random.seed(123)
random.seed(123)

In [None]:
##
## Set modeling parameters
##

seq_len = 10
seq_skip = 1

w2v_size = 25
w2v_min_count = 3
w2v_window = 10
w2v_workers = 4

embedding_a_size = 100
lstm_a_size = 25
lstm_b_size = 25
dense_size = 100

validation_split = 0.1
batch_size = 2048
epochs = 10

cicids_training = datetime.datetime.strptime("2017-07-04 00:00:00", "%Y-%m-%d %H:%M:%S")

num_models = 1

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle

from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp

font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 22}

plt.rc('font', **font)

def evaluate_results(Y_test, score_arrays):
    models = {}
    for val, key in enumerate(np.unique(Y_test).tolist()):
        models[key] = {"fpr":[],"tpr":[],"threshold":[],"auc":[]}
        for preds in score_arrays:
            results_tuple = roc_curve(Y_test == key, -preds if key!="BENIGN" else preds, pos_label=1)
            models[key]["fpr"].append(results_tuple[0])
            models[key]["tpr"].append(results_tuple[1])
            models[key]["threshold"].append(results_tuple[2])
            models[key]["auc"].append(auc(results_tuple[0], results_tuple[1]))
    return models

def make_roc_plots(result_set, attack_names, title_postfix="", file_prefix=""):
    for key, value in attack_names:
        try:
            fpr = {i:v for i,v in enumerate(result_set[key]["fpr"])}
            tpr = {i:v for i,v in enumerate(result_set[key]["tpr"])}
            roc_auc = {i:v for i,v in enumerate(result_set[key]["auc"])}
            n_classes = len(fpr)

            # First aggregate all false positive rates
            all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

            # Then interpolate all ROC curves at this points
            mean_tpr = np.zeros_like(all_fpr)
            for i in range(n_classes):
                mean_tpr += interp(all_fpr, fpr[i], tpr[i])

            # Finally average it and compute AUC
            mean_tpr /= n_classes

            fpr["macro"] = all_fpr
            tpr["macro"] = mean_tpr
            roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

            # Plot all ROC curves
            plt.figure()

            plt.plot(fpr["macro"], tpr["macro"],
                     color='black', linewidth=4)

            colors = cycle(['gray'])
            for i, color in zip(range(n_classes), colors):
                plt.plot(fpr[i], tpr[i], '--', color=color, lw=2)

            plt.plot([0, 1], [0, 1], 'k--', lw=1)
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.title(value + title_postfix)
            plt.plot([], [], ' ', label='AUC = {0:0.2f}'.format(roc_auc["macro"]))
            plt.legend(loc="lower right")
            
            cur_axes = plt.gca()
            cur_axes.axes.get_xaxis().set_ticks([])
            cur_axes.axes.get_yaxis().set_ticks([])
            
            ##plt.savefig('figures/'+file_prefix+key+".pdf")
            plt.savefig('figures/'+file_prefix+key+".pdf")
            
            plt.show()
        except:
            pass

In [None]:
attacks = [("BENIGN","All Attacks"),
           ("Bot","Botnet"),
           ("DDoS","DDoS"),
           ("DoS GoldenEye","DoS GoldenEye"),
           ("DoS Hulk","DoS Hulk"),
           ("DoS Slowhttptest","DoS Slow HTTP Test"),
           ("DoS slowloris","DoS Slow Loris"),
           ("FTPPatator","FTPPatator"),
           ("SSHPatator","SSHPatator"),
           ("Heartbleed","Heartbleed"),
           ("Infiltration","Infiltration"),
           ("PortScan","Port Scan"),
           ("Web Attack  Brute Force","Web Attack Brute Force"),
           ("Web Attack  Sql Injection","Web Attack SQL Injection"),
           ("Web Attack  XSS","Web Attack XSS")]

In [None]:
#ports
agg_rules = ["source","destination","dyad","internal","external"]
for agg in agg_rules:
    dot_0 = pickle.load(open("results/"+agg+"_0_port_preds.pickle","rb"))
    dot_1 = pickle.load(open("results/"+agg+"_1_port_preds.pickle","rb"))
    dot_2 = pickle.load(open("results/"+agg+"_2_port_preds.pickle","rb"))
    Y_test = pickle.load(open("results/"+agg+"_port_truth_L.pickle","rb"))

    models_dot = evaluate_results(Y_test, [dot_0, dot_1, dot_2])
    make_roc_plots(models_dot, attacks, "", "ports_"+agg+"_")

In [None]:
#protobytes
agg_rules = ["source","destination","dyad","internal","external"]
for agg in agg_rules:
    dot_0 = pickle.load(open("results/"+agg+"_0_protobytes_preds.pickle","rb"))
    dot_1 = pickle.load(open("results/"+agg+"_1_protobytes_preds.pickle","rb"))
    dot_2 = pickle.load(open("results/"+agg+"_2_protobytes_preds.pickle","rb"))
    Y_test = pickle.load(open("results/"+agg+"_protobytes_truth_L.pickle","rb"))

    models_dot = evaluate_results(Y_test, [dot_0, dot_1, dot_2])
    make_roc_plots(models_dot, attacks, "", "protobytes_"+agg+"_")