In [2]:
# This Python file uses the following encoding: utf-8
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.preprocessing import sequence
from sklearn.model_selection import train_test_split

from lib.pretty_testing import predict_test

import numpy as np
import pickle

In [3]:
# fix random seed for reproducibility
np.random.seed(8)


# load prepared data
with open('embedded_docs.p') as f:
    data, labels = pickle.load(f)

In [4]:
# padding for the rnn
padded_data = sequence.pad_sequences(data, maxlen=200,padding="pre", truncating="post", value=0.0, dtype='float32')

In [5]:
# split dataset
X_train, X_test, y_train, y_test = train_test_split(padded_data, labels, train_size=0.9, stratify=labels)



In [None]:
model = Sequential()
model.add(LSTM(100, input_shape = (200, 100)))
#model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 80,501
Trainable params: 80,501
Non-trainable params: 0
_________________________________________________________________
None


In [4]:
with open('models/keras_model.json','w') as f:
    f.write(model.to_json())

In [None]:
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 1/3

In [6]:
# serialize weights to HDF5
model.save_weights("models/keras_new_weights.h5")

In [6]:
model.load_weights("models/keras_weights.h5")

In [7]:
predict_test(model, X_test, y_test, ['non_cost', 'cost'])

()
Test classification report
Accuracy: 0.979325
             precision    recall  f1-score   support

          0       0.99      0.97      0.98      1451
          1       0.97      0.99      0.98      1451

avg / total       0.98      0.98      0.98      2902

Test confusion Matrix
             non_cost     cost
    non_cost   1412.0     39.0
        cost     21.0   1430.0


0.97932460372157137

In [7]:
with open('models/keras_model.json') as f:
    model = model_from_json(f.read())
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 80,501
Trainable params: 80,501
Non-trainable params: 0
_________________________________________________________________
