In [0]:

import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.layers import LSTM
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

Using TensorFlow backend.


In [0]:
# load the dataset but only keep the top n words, zero the rest
top_words = 8000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)


# pad dataset to a maximum review length in words
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

X = numpy.concatenate((X_train, X_test), axis=0)
y = numpy.concatenate((y_train, y_test), axis=0)
X_train=X[0:35000]
X_test=X[35000:]
y_train=y[0:35000]
y_test=y[35000: ] 

In [0]:

model = Sequential()
model.add(Embedding(top_words, 100, input_length=max_words))
model.add(LSTM(200))
model.add(Dropout(0.8))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


# Fit the model
model.fit(X_train, y_train, validation_split=0.2, epochs=3, batch_size=128, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


In [0]:
model = Sequential()
model.add(Embedding(top_words, 100, input_length=max_words))
model.add(LSTM(200))
model.add(Dropout(0.8))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 100)          800000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 200)               240800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 201       
Total params: 1,041,001
Trainable params: 1,041,001
Non-trainable params: 0
_________________________________________________________________
None


In [0]:
# Fit the model
model.fit(X_train, y_train, validation_split=0.2, epochs=3, batch_size=128, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


Train on 28000 samples, validate on 7000 samples
Epoch 1/3
 - 240s - loss: 0.5490 - acc: 0.7167 - val_loss: 0.4667 - val_acc: 0.7807
Epoch 2/3
 - 244s - loss: 0.3264 - acc: 0.8680 - val_loss: 0.3379 - val_acc: 0.8566
Epoch 3/3
 - 236s - loss: 0.2642 - acc: 0.8997 - val_loss: 0.3209 - val_acc: 0.8723
Accuracy: 87.73%


In [0]:
y_pred=model.predict_proba(X_test)

In [0]:
test_size=15000
for i in range(test_size):
    if y_pred[i]>0.4:
        y_pred[i]=1
    else:
        y_pred[i]=0

In [0]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
print(cm)

[[6147 1326]
 [ 619 6908]]


In [0]:
tp=cm[0][0]
fn=cm[0][1]
fp=cm[1][0]
tn=cm[1][1]

errorrate=((fp+fn)/(tp+fn+fp+tn))
accuracy=((tp+tn)/(tp+fn+fp+tn))
sensitivity=(tp/(tp+fn))
specificity=(tn/(tn+fp))
precision=(tp/(tp+fp))
fpr=1-specificity
f1_score=(2*precision*sensitivity)/(precision +sensitivity)

In [0]:
print("ERRORRATE")
print(errorrate)
print("accuracy")
print(accuracy)
print("sensitivity")
print(sensitivity)
print("specificity")
print(specificity)
print("precision")
print(precision)
print("fpr")
print(fpr)
print("f1_score")
print(f1_score)

ERRORRATE
0.12966666666666668
accuracy
0.8703333333333333
sensitivity
0.8225612203934163
specificity
0.9177627208715292
precision
0.9085131540053207
fpr
0.08223727912847079
f1_score
0.8634033288854556
