In [1]:
import argparse
import math
import sys
import time
import copy

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, regularizers
from keras.layers.noise import GaussianNoise
from keras.layers import Conv1D, MaxPooling1D
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils.np_utils import to_categorical
K.set_image_dim_ordering('th')
print(K.image_data_format())

## required for efficient GPU use
import tensorflow as tf
from keras.backend import tensorflow_backend
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
session = tf.Session(config=config)
tensorflow_backend.set_session(session)
## required for efficient GPU use

import os
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


channels_first


In [2]:
model_path='D:/AI/Jinoh Kim/one_class_svm/intrusion/Final_data/cnn_deep_mon.h5'

# prepare callbacks
callbacks = [
    EarlyStopping(
        monitor='val_acc', 
        patience=10,
        mode='max',
        verbose=1),
    ModelCheckpoint(model_path,
        monitor='val_acc', 
        save_best_only=True, 
        mode='max',
        verbose=0)
]

In [3]:
# get data

class dataset:
    monday = pd.read_pickle("D:/AI/Jinoh Kim/one_class_svm/intrusion/Final_data/preprocessed_monday_reduced_final.pkl")
    wednesday = pd.read_pickle("D:/AI/Jinoh Kim/one_class_svm/intrusion/Final_data/preprocessed_wednesday_final_data.pkl")
    
    
from sklearn import model_selection as ms
from sklearn import preprocessing as pp

class preprocess:
    
    output_columns_2labels = ['label_0','label_1']
    
    x_input = dataset.monday.drop(output_columns_2labels, axis = 1)
    y_output = dataset.monday.loc[:,output_columns_2labels]

    x_test_input = dataset.wednesday.drop(output_columns_2labels, axis = 1)
    y_test = dataset.wednesday.loc[:,output_columns_2labels]
    
    ss = pp.StandardScaler()
    
    x_train = ss.fit_transform(x_input)
    x_test = ss.fit_transform(x_test_input)
    
    y_train = y_output.values
    y_test = y_test.values
        
    print('x_train shape: {}'.format(x_train.shape))
    print('y_train shape: {}'.format(y_train.shape))
    print('x_test shape: {}'.format(x_test.shape))
    print('y_test shape: {}'.format(y_test.shape))

x_train shape: (52991, 78)
y_train shape: (52991, 2)
x_test shape: (69270, 78)
y_test shape: (69270, 2)


In [4]:
#reshape data
X_train = np.reshape(preprocess.x_train, (preprocess.x_train.shape[0], preprocess.x_train.shape[1], 1))
X_test = np.reshape(preprocess.x_test, (preprocess.x_test.shape[0], preprocess.x_test.shape[1], 1))

print('train shape after reshape: {}'.format(X_train.shape))
print('test shape after reshape: {}'.format(X_test.shape))

train shape after reshape: (52991, 78, 1)
test shape after reshape: (69270, 78, 1)


In [5]:
#size of parameters
batch_size = 1000
num_classes = 2
epochs = 10
filter_size=3
#noise = 1
droprate=0.50

In [6]:
#Start Neural Network
model = Sequential()

#convolution 1st layer
model.add(Conv1D(64, kernel_size=(filter_size), padding="same",
                 activation='relu',
                 input_shape=(78, 1)))
model.add(BatchNormalization())
model.add(Dropout(droprate))
          
#convolution 2nd layer
model.add(Conv1D(128, kernel_size=(filter_size), activation='relu', border_mode="same"))
model.add(BatchNormalization())
model.add(MaxPooling1D(strides=1))
model.add(Dropout(droprate))

#convolution 3rd layer
model.add(Conv1D(256, kernel_size=(filter_size), activation='relu', border_mode="same"))
model.add(BatchNormalization())
model.add(MaxPooling1D(strides=1))
model.add(Dropout(droprate))

#FCN 1st layer
model.add(Flatten())
model.add(Dense(64,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))

#FCN 2nd layer
model.add(Dense(32,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))

#FCN 3rd layer
model.add(Dense(16,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))

#FCN final layer
model.add(Dense(2))
model.add(Activation('softmax'))

model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=['accuracy'])

model.summary()

  if sys.path[0] == '':


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 78, 64)            256       
_________________________________________________________________
batch_normalization_1 (Batch (None, 78, 64)            256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 78, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 78, 128)           24704     
_________________________________________________________________
batch_normalization_2 (Batch (None, 78, 128)           512       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 77, 128)           0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 77, 128)           0         
__________

In [7]:
#Save Model=ON
history = model.fit(X_train, preprocess.y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, preprocess.y_test),shuffle=True,callbacks=callbacks)

score = model.evaluate(X_test, preprocess.y_test, verbose=0)

#print loss and accuracy
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 52991 samples, validate on 69270 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.36988863867737265
Test accuracy: 0.9163851595207161


In [8]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)
print(y_pred)
y_pred = np.argmax(y_pred, axis=1)
print(y_pred)

y_pred = model.predict_classes(X_test)
print(y_pred)

p=model.predict_proba(X_test)

target_names = ['0', '1']
print(classification_report(np.argmax(preprocess.y_test, axis=1), y_pred, target_names=target_names, digits=4))
print(confusion_matrix(np.argmax(preprocess.y_test, axis=1), y_pred))

[[0.93999684 0.06000319]
 [0.96740615 0.03259385]
 [0.97215074 0.02784929]
 ...
 [0.9710032  0.02899683]
 [0.97426087 0.02573917]
 [0.9692654  0.03073466]]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
             precision    recall  f1-score   support

          0     0.9164    1.0000    0.9564     63478
          1     0.0000    0.0000    0.0000      5792

avg / total     0.8398    0.9164    0.8764     69270

[[63478     0]
 [ 5792     0]]


  'precision', 'predicted', average, warn_for)
