In [1]:
import argparse
import math
import sys
import time
import copy

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, regularizers
from keras.layers.noise import GaussianNoise
from keras.layers import Conv1D, MaxPooling1D
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils.np_utils import to_categorical
K.set_image_dim_ordering('th')
print(K.image_data_format())

## required for efficient GPU use
import tensorflow as tf
from keras.backend import tensorflow_backend
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
session = tf.Session(config=config)
tensorflow_backend.set_session(session)
## required for efficient GPU use

import os
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra

Using TensorFlow backend.


channels_first


In [0]:
model_path='cnn_deep_mon.h5'


callbacks = [
    EarlyStopping(
        monitor='val_acc', 
        patience=10,
        mode='max',
        verbose=1),
    ModelCheckpoint(model_path,
        monitor='val_acc', 
        save_best_only=True, 
        mode='max',
        verbose=0)
]

In [12]:
class dataset:
    monday = pd.read_pickle("preprocessed_monday_data.pkl")
    tuesday = pd.read_pickle("preprocessed_tuesday_data.pkl")
    
    
from sklearn import model_selection as ms
from sklearn import preprocessing as pp

class preprocess:
    
    output_columns_2labels = ['label_0','label_1']
    
    x_input = dataset.monday.drop(output_columns_2labels, axis = 1)
    y_output = dataset.monday.loc[:,output_columns_2labels]

    x_test_input = dataset.tuesday.drop(output_columns_2labels, axis = 1)
    y_test = dataset.tuesday.loc[:,output_columns_2labels]
    
    ss = pp.StandardScaler()
    
    x_train = ss.fit_transform(x_input)
    x_test = ss.fit_transform(x_test_input)
    
    y_train = y_output.values
    y_test = y_test.values
        
    print('x_train shape: {}'.format(x_train.shape))
    print('y_train shape: {}'.format(y_train.shape))
    print('x_test shape: {}'.format(x_test.shape))
    print('y_test shape: {}'.format(y_test.shape))

x_train shape: (529918, 78)
y_train shape: (529918, 2)
x_test shape: (445909, 78)
y_test shape: (445909, 2)


In [13]:
X_train = np.reshape(preprocess.x_train, (preprocess.x_train.shape[0], preprocess.x_train.shape[1], 1))
X_test = np.reshape(preprocess.x_test, (preprocess.x_test.shape[0], preprocess.x_test.shape[1], 1))

print('train shape after reshape: {}'.format(X_train.shape))
print('test shape after reshape: {}'.format(X_test.shape))

train shape after reshape: (529918, 78, 1)
test shape after reshape: (445909, 78, 1)


In [0]:
#size of parameters
batch_size = 1000
num_classes = 2
epochs = 10
filter_size=3
#noise = 1
droprate=0.50

In [22]:
#Start Neural Network
model = Sequential()

#convolution 1st layer
model.add(Conv1D(64, kernel_size=(filter_size), padding="same",
                 activation='relu',
                 input_shape=(78, 1)))
model.add(BatchNormalization())
model.add(Dropout(droprate))
          
#convolution 2nd layer
model.add(Conv1D(128, kernel_size=(filter_size), activation='relu', border_mode="same"))
model.add(BatchNormalization())
model.add(MaxPooling1D(strides=1))
model.add(Dropout(droprate))

#convolution 3rd layer
model.add(Conv1D(256, kernel_size=(filter_size), activation='relu', border_mode="same"))
model.add(BatchNormalization())
model.add(MaxPooling1D(strides=1))
model.add(Dropout(droprate))

#FCN 1st layer
model.add(Flatten())
model.add(Dense(64,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))

#FCN 2nd layer
model.add(Dense(32,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))

#FCN 3rd layer
model.add(Dense(16,use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(droprate))

#FCN final layer
model.add(Dense(2))
model.add(Activation('softmax'))

model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=['accuracy'])

model.summary()

  # This is added back by InteractiveShellApp.init_path()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_4 (Conv1D)            (None, 78, 64)            256       
_________________________________________________________________
batch_normalization_7 (Batch (None, 78, 64)            256       
_________________________________________________________________
dropout_7 (Dropout)          (None, 78, 64)            0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 78, 128)           24704     
_________________________________________________________________
batch_normalization_8 (Batch (None, 78, 128)           512       
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 77, 128)           0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 77, 128)           0         
__________

In [23]:
#Save Model=ON
history = model.fit(X_train, preprocess.y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, preprocess.y_test),shuffle=True)

score = model.evaluate(X_test, preprocess.y_test, verbose=0)

#print loss and accuracy
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 529918 samples, validate on 445909 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.4161117301276838
Test accuracy: 0.9689734901067258


In [25]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)
print(y_pred)
y_pred = np.argmax(y_pred, axis=1)
print(y_pred)

y_pred = model.predict_classes(X_test)
print(y_pred)

p=model.predict_proba(X_test)

target_names = ['0', '1']
print(classification_report(np.argmax(preprocess.y_test, axis=1), y_pred, target_names=target_names, digits=4))
print(confusion_matrix(np.argmax(preprocess.y_test, axis=1), y_pred))

[[9.9999988e-01 1.5089576e-07]
 [9.9999988e-01 9.6553933e-08]
 [9.9999988e-01 8.7484146e-08]
 ...
 [9.9999762e-01 2.3681193e-06]
 [9.9999499e-01 5.0405497e-06]
 [9.9999762e-01 2.3367672e-06]]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

          0     0.9690    1.0000    0.9842    432074
          1     0.0000    0.0000    0.0000     13835

avg / total     0.9389    0.9690    0.9537    445909

[[432074      0]
 [ 13835      0]]
