In [0]:

import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.layers import LSTM
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

Using TensorFlow backend.


In [0]:
# load the dataset but only keep the top n words, zero the rest
top_words = 8000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)


# pad dataset to a maximum review length in words
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

X = numpy.concatenate((X_train, X_test), axis=0)
y = numpy.concatenate((y_train, y_test), axis=0)
X_train=X[0:35000]
X_test=X[35000:]
y_train=y[0:35000]
y_test=y[35000: ] 

In [0]:
def create_conv_model():
    model_conv = Sequential()
    model_conv.add(Embedding(top_words, 100, input_length=max_words))
    model_conv.add(LSTM(100,return_sequences=True))
    model_conv.add(Conv1D(64, 3, padding='same',activation='relu'))
    model_conv.add(MaxPooling1D(pool_size=2))
    model_conv.add(Flatten())
    model_conv.add(Dropout(0.8))
    model_conv.add(Dense(250, activation='relu'))
    model_conv.add(Dropout(0.8))
    model_conv.add(Dense(1, activation='sigmoid'))
    model_conv.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_conv


In [0]:
model1=create_conv_model()
model1.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 100)          800000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 500, 100)          80400     
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 500, 64)           19264     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 250, 64)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 16000)             0         
_________________________________________________________________
dropout_1 (Dropout)  

In [0]:
model1 = create_conv_model()

# Fit the model
print('model compiled')
model1.fit(X_train, y_train, validation_split=0.2, epochs=3, batch_size=128, verbose=2)
# Final evaluation of the model
scores = model1.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

model compiled
Instructions for updating:
Use tf.cast instead.
Train on 28000 samples, validate on 7000 samples
Epoch 1/3
 - 489s - loss: 0.4656 - acc: 0.7486 - val_loss: 0.2896 - val_acc: 0.8794
Epoch 2/3
 - 487s - loss: 0.2265 - acc: 0.9134 - val_loss: 0.3021 - val_acc: 0.8777
Epoch 3/3
 - 487s - loss: 0.1712 - acc: 0.9352 - val_loss: 0.2976 - val_acc: 0.8824
Accuracy: 88.49%


In [0]:
y_pred=model1.predict_proba(X_test)

In [0]:
test_size=15000
for i in range(test_size):
    if y_pred[i]>0.4:
        y_pred[i]=1
    else:
        y_pred[i]=0

In [0]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
print(cm)

[[6750  723]
 [ 963 6564]]


In [0]:
tp=cm[0][0]
fn=cm[0][1]
fp=cm[1][0]
tn=cm[1][1]

errorrate=((fp+fn)/(tp+fn+fp+tn))
accuracy=((tp+tn)/(tp+fn+fp+tn))
sensitivity=(tp/(tp+fn))
specificity=(tn/(tn+fp))
precision=(tp/(tp+fp))
fpr=1-specificity
f1_score=(2*precision*sensitivity)/(precision +sensitivity)







In [0]:
print("ERRORRATE")
print(errorrate)
print("accuracy")
print(accuracy)
print("sensitivity")
print(sensitivity)
print("specificity")
print(specificity)
print("precision")
print(precision)
print("fpr")
print(fpr)
print("f1_score")
print(f1_score)

ERRORRATE
0.1124
accuracy
0.8876
sensitivity
0.9032517061421116
specificity
0.8720605819051415
precision
0.8751458576429405
fpr
0.12793941809485854
f1_score
0.8889766890557093
