In [1]:
# -*- coding: utf-8 -*-
import argparse
import math
import sys
import time
import copy

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, regularizers
from keras.layers.noise import GaussianNoise
from keras.layers import Conv1D, MaxPooling1D
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils.np_utils import to_categorical
K.set_image_dim_ordering('th')
print(K.image_data_format())

## required for efficient GPU use
import tensorflow as tf
from keras.backend import tensorflow_backend
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
session = tf.Session(config=config)
tensorflow_backend.set_session(session)
## required for efficient GPU use

import os
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


channels_first


In [2]:
model_path='./result/cnn_shallow_mawi2.h5'

# prepare callbacks
callbacks = [
    EarlyStopping(
        monitor='val_acc', 
        patience=10,
        mode='max',
        verbose=1),
    ModelCheckpoint(model_path,
        monitor='val_acc', 
        save_best_only=True, 
        mode='max',
        verbose=0)
]

In [3]:
# get data

class dataset:
    mawi_train_2labels = pd.read_pickle("dataset/preprocessed_mawi_train_2labels(001).pkl")
    mawi_test_2labels = pd.read_pickle("dataset/preprocessed_mawi_test_2labels(003).pkl")
    
    
from sklearn import model_selection as ms
from sklearn import preprocessing as pp

class preprocess:
    
    output_columns_2labels = ['status_anomaly','status_normal']
    
    x_input = dataset.mawi_train_2labels.drop(output_columns_2labels, axis = 1)
    y_output = dataset.mawi_train_2labels.loc[:,output_columns_2labels]

    x_test_input = dataset.mawi_test_2labels.drop(output_columns_2labels, axis = 1)
    y_test = dataset.mawi_test_2labels.loc[:,output_columns_2labels]
    
    ss = pp.StandardScaler()
    
    x_train = ss.fit_transform(x_input)
    x_test = ss.fit_transform(x_test_input)
    
    y_train = y_output.values
    y_test = y_test.values
        
    print('x_train shape: {}'.format(x_train.shape))
    print('y_train shape: {}'.format(y_train.shape))
    print('x_test shape: {}'.format(x_test.shape))
    print('y_test shape: {}'.format(y_test.shape))

x_train shape: (40780, 4)
y_train shape: (40780, 2)
x_test shape: (42398, 4)
y_test shape: (42398, 2)


In [4]:
#reshape data
X_train = np.reshape(preprocess.x_train, (preprocess.x_train.shape[0], preprocess.x_train.shape[1], 1))
X_test = np.reshape(preprocess.x_test, (preprocess.x_test.shape[0], preprocess.x_test.shape[1], 1))

print('train shape after reshape: {}'.format(X_train.shape))
print('test shape after reshape: {}'.format(X_test.shape))

train shape after reshape: (40780, 4, 1)
test shape after reshape: (42398, 4, 1)


In [5]:
#size of parameters
batch_size = 64
num_classes = 2
epochs = 20
filter_size=3
#noise = 1
droprate=0.50

In [6]:
#Start Neural Network
model = Sequential()

#convolution 1st layer
model.add(Conv1D(64, kernel_size=(filter_size), padding="same",
                 activation='relu',
                 input_shape=(4, 1)))
model.add(BatchNormalization())
model.add(Dropout(droprate))
model.add(MaxPooling1D(strides=1))

#FCN layer
model.add(Flatten())
model.add(Dense(64,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))
model.add(Dense(2))
model.add(Activation('softmax'))

model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=['accuracy'])

model.summary()

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 4, 64)             256       
_________________________________________________________________
batch_normalization_1 (Batch (None, 4, 64)             256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 4, 64)             0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 64)             0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 192)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                12288     
___________________________________________________________

In [7]:
#Save Model=ON
history = model.fit(X_train, preprocess.y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, preprocess.y_test),shuffle=True,callbacks=callbacks)

score = model.evaluate(X_test, preprocess.y_test, verbose=0)

#print loss and accuracy
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 40780 samples, validate on 42398 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 0.6483596493609756
Test accuracy: 0.6827916411123353


In [8]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)
print(y_pred)
y_pred = np.argmax(y_pred, axis=1)
print(y_pred)

y_pred = model.predict_classes(X_test)
print(y_pred)

p=model.predict_proba(X_test)

target_names = ['normal', 'anomaly']
print(classification_report(np.argmax(preprocess.y_test, axis=1), y_pred, target_names=target_names, digits=4))
print(confusion_matrix(np.argmax(preprocess.y_test, axis=1), y_pred))

[[0.11317939 0.8868206 ]
 [0.11317939 0.8868206 ]
 [0.11317939 0.8868206 ]
 ...
 [0.9967469  0.00325306]
 [0.97680295 0.02319701]
 [0.97680295 0.02319701]]
[1 1 1 ... 0 0 0]
[1 1 1 ... 0 0 0]
             precision    recall  f1-score   support

     normal     0.6137    0.0915    0.1593     13921
    anomaly     0.6863    0.9718    0.8045     28477

avg / total     0.6625    0.6828    0.5927     42398

[[ 1274 12647]
 [  802 27675]]
