In [1]:
import pandas as pd
import numpy as np

from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
tf.random.set_seed(42)
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

2021-09-10 13:30:57.886139: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
c17 = pd.read_csv("../input/cicids-20172018/CICIDS2017_w_o_oversample.csv")
c18 = pd.read_csv("../input/cicids-20172018/CICIDS_2018_w_o_oversample.csv")
c18 = c18.fillna(0)

In [3]:
c18_gs = c18.columns.to_series().groupby(c18.dtypes).groups
c17_gs = c17.columns.to_series().groupby(c17.dtypes).groups
c18 = c18.drop(columns= ['Timestamp'])

In [4]:
C17_LABELS = len(c17[' Label'].unique())
C18_LABELS = len(c18['Label'].unique())

In [5]:
pd.options.mode.use_inf_as_na = True
inf_Cols = ['Flow Byts/s','Flow Pkts/s']
for i in inf_Cols:
    c18[i] = c18[i].fillna(c18[i].max())

In [6]:
def make_folds(df,x_col, target_col):    
    df['folds'] = 0
    skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
    for i, (_, test_index) in enumerate(skf.split(df[x_col], df[target_col])):
        df.iloc[test_index, -1] = i
    return df

In [7]:
c18 = make_folds(c18,'Protocol','Label')
c17 = make_folds(c17,' Destination Port',' Label')
c18 = pd.get_dummies(c18)
c17 = pd.get_dummies(c17)

In [8]:
train_17 = c17[c17['folds'] != 5]
test_17 = c17[c17['folds'] == 5]

train_18 = c18[c18['folds'] != 5]
test_18 = c18[c18['folds'] == 5]


y_train_18 =  train_18.iloc[:, -C18_LABELS:]
y_test_18 = test_18.iloc[:, -C18_LABELS:]
x_train_18 = train_18.iloc[:, :-C18_LABELS]
x_test_18 = test_18.iloc[:, :-C18_LABELS]

y_train_17 = train_17.iloc[:, -C17_LABELS:]
y_test_17 = test_17.iloc[:, -C17_LABELS:]
x_train_17 = train_17.iloc[:, :-C17_LABELS]
x_test_17 = test_17.iloc[:, :-C17_LABELS]


pca_18 = PCA(n_components=40)
pca_17 = PCA(n_components=40)

x_train_18 = pca_18.fit_transform(x_train_18)
x_train_17 = pca_17.fit_transform(x_train_17)

x_test_18 = pca_18.transform(x_test_18)
x_test_17 = pca_17.transform(x_test_17)

In [12]:
def create_ae_mlp(num_columns, num_labels, hidden_units, dropout_rates, ls = 1e-2, lr = 1e-3):
    
    inp = tf.keras.layers.Input(shape = (num_columns, ))
    x0 = tf.keras.layers.BatchNormalization()(inp)
    
    encoder = tf.keras.layers.GaussianNoise(dropout_rates[0])(x0)
    encoder = tf.keras.layers.Dense(hidden_units[0])(encoder)
    encoder = tf.keras.layers.BatchNormalization()(encoder)
    encoder = tf.keras.layers.Activation('swish')(encoder)
    
    #Decoder
    decoder = tf.keras.layers.Dropout(dropout_rates[1])(encoder)
    decoder = tf.keras.layers.Dense(num_columns, name = 'decoder')(decoder)
    
    
    x_ae = tf.keras.layers.Dense(hidden_units[1])(decoder)
    x_ae = tf.keras.layers.BatchNormalization()(x_ae)
    x_ae = tf.keras.layers.Activation('swish')(x_ae)
    x_ae = tf.keras.layers.Dropout(dropout_rates[2])(x_ae)

    out_ae = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', name = 'AE')(x_ae)
    
    #Multi Layer perceptron
    x = tf.keras.layers.Concatenate()([x0, encoder])
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout_rates[3])(x)
    
    for i in range(2, len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('swish')(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 2])(x)
        
    out = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', name = 'MLP')(x)
    
    model = tf.keras.models.Model(inputs = inp, outputs = [out_ae, out])
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = lr),
                  loss = {
                          'AE': tf.keras.losses.BinaryCrossentropy(label_smoothing = ls),
                          'MLP': tf.keras.losses.BinaryCrossentropy(label_smoothing = ls), 
                         },
                  metrics = { 
                             'AE': tf.keras.metrics.AUC(name = 'AUC'), 
                             'MLP': tf.keras.metrics.AUC(name = 'AUC'), 
                            }, 
                 )
    
    return model

In [13]:
params_18 = {'num_columns': 40, 
          'num_labels': C18_LABELS, 
          'hidden_units': [128, 128, 1024, 512, 512, 256], 
          'dropout_rates': [0.035, 0.038, 0.42, 0.10, 0.49, 0.32, 0.27, 0.43], 
          'ls': 0, 
          'lr':1e-3, 
         }
params_17 = {'num_columns': 40, 
          'num_labels': C17_LABELS, 
          'hidden_units': [128, 128, 1024, 512, 512, 256], 
          'dropout_rates': [0.035, 0.038, 0.42, 0.10, 0.49, 0.32, 0.27, 0.43], 
          'ls': 0, 
          'lr':1e-3, 
         }

In [15]:
# CICIDS 2018

batch_size = 64
fold = 5
ckp_path = f'JSModel_{fold}.hdf5'
model = create_ae_mlp(**params_18)
ckp = ModelCheckpoint(ckp_path, monitor = 'val_MLP_AUC', verbose = 0, 
                      save_best_only = True, save_weights_only = True, mode = 'max')
es = EarlyStopping(monitor = 'val_MLP_AUC', min_delta = 1e-4, patience = 20, mode = 'max', 
                   baseline = None, restore_best_weights = True, verbose = 0)
history = model.fit(x_train_18, [y_train_18,y_train_18], 
                    validation_data = (x_test_18, [ y_test_18,y_test_18]), 
                    epochs = 8, batch_size = batch_size, callbacks = [ckp, es], verbose = True)
hist = pd.DataFrame(history.history)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


KeyError: 'val_action_AUC'

In [16]:
# CICIDS 2017
ckp_path = f'JSModel17_{fold}.hdf5'
model = create_ae_mlp(**params_17)
ckp = ModelCheckpoint(ckp_path, monitor = 'val_MLP_AUC', verbose = 0, 
                      save_best_only = True, save_weights_only = True, mode = 'max')
es = EarlyStopping(monitor = 'val_MLP_AUC', min_delta = 1e-4, patience = 20, mode = 'max', 
                   baseline = None, restore_best_weights = True, verbose = 0)
history = model.fit(x_train_17, [y_train_17,y_train_17], 
                    validation_data = (x_test_17, [ y_test_17,y_test_17]), 
                    epochs = 8, batch_size = batch_size, callbacks = [ckp, es], verbose = True)
hist = pd.DataFrame(history.history)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
