In [1]:
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from matplotlib import pyplot
from keras.layers.embeddings import Embedding
from keras.layers import Flatten, Dense, LSTM, SimpleRNN
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.preprocessing import sequence
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = imdb.load_data()

In [3]:
x = numpy.concatenate((x_train, x_test), axis=0)
y = numpy.concatenate((y_train, y_test), axis=0)

In [4]:
x.shape, y.shape

((50000,), (50000,))

In [5]:
top_words = 5000
max_words = 500

In [6]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = top_words)

In [7]:
x_train.shape, x_test.shape

((25000,), (25000,))

In [8]:
x_train = sequence.pad_sequences(x_train, maxlen=max_words)
x_test = sequence.pad_sequences(x_test, maxlen=max_words)

In [61]:
def create_wrapper(learn_rate = 0.001):
    model = Sequential()
    embedding_vector_length = 32
    model.add(Embedding(top_words, embedding_vector_length, input_length=max_words))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = Adam(lr = learn_rate)
    model.compile(loss='binary_crossentropy', optimizer= optimizer, metrics=['accuracy'])
    print(model.summary())
    return model

In [62]:
model = KerasClassifier(build_fn = create_wrapper, epochs = 10, batch_size = 128, verbose = True)

In [63]:
learn_rate = [0.0005, 0.0001, 0.00015, 0.0002]

In [64]:
param_grid = dict(learn_rate = learn_rate)

In [65]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)

In [66]:
grid_result = grid.fit(x_train, y_train)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
flatten_2 (Flatten)          (None, 16000)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               2048128   
_________________________________________________________________
dense_5 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 129       
Total params: 2,224,769
Trainable params: 2,224,769
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [67]:
best_mlp = grid_result.best_estimator_

In [71]:
y_predict = best_mlp.predict(x_test)



In [85]:
accuracy_score(y_test, y_predict)

0.8636

### MLP

In [9]:
model = Sequential()
embedding_vector_length = 32
model.add(Embedding(top_words, embedding_vector_length, input_length=max_words))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
flatten_1 (Flatten)          (None, 16000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               2048128   
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 2,224,769
Trainable params: 2,224,769
Non-trainable params: 0
_________________________________________________________________
None


In [10]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10, batch_size=128, verbose=2)
# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 25000 samples, validate on 25000 samples
Epoch 1/10
 - 29s - loss: 0.4786 - accuracy: 0.7322 - val_loss: 0.2993 - val_accuracy: 0.8747
Epoch 2/10
 - 27s - loss: 0.1712 - accuracy: 0.9367 - val_loss: 0.3392 - val_accuracy: 0.8654
Epoch 3/10
 - 26s - loss: 0.0466 - accuracy: 0.9851 - val_loss: 0.5001 - val_accuracy: 0.8558
Epoch 4/10
 - 27s - loss: 0.0102 - accuracy: 0.9973 - val_loss: 0.6654 - val_accuracy: 0.8553
Epoch 5/10
 - 26s - loss: 0.0019 - accuracy: 0.9997 - val_loss: 0.7746 - val_accuracy: 0.8567
Epoch 6/10
 - 26s - loss: 2.2674e-04 - accuracy: 1.0000 - val_loss: 0.8546 - val_accuracy: 0.8574
Epoch 7/10
 - 26s - loss: 8.6339e-05 - accuracy: 1.0000 - val_loss: 0.8946 - val_accuracy: 0.8576
Epoch 8/10
 - 26s - loss: 5.5072e-05 - accuracy: 1.0000 - val_loss: 0.9244 - val_accuracy: 0.8582
Epoch 9/10
 - 26s - loss: 3.8854e-05 - accuracy: 1.0000 - val_loss: 0.9501 - val_accuracy: 0.8578
Epoch 10/10
 - 26s - loss: 2.8961e-05 - accuracy: 1.0000 - val_loss: 0.9720 - val_accura

### LSTM 

In [11]:
embedding_vector_length = 32
model1 = Sequential()
model1.add(Embedding(top_words, embedding_vector_length, input_length=max_words))
model1.add(LSTM(100))
model1.add(Dense(1, activation='sigmoid'))
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model1.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


In [13]:
model1.fit(x_train, y_train, epochs=3, batch_size=128, verbose=2)
# Final evaluation of the model

Epoch 1/3
 - 902s - loss: 0.3014 - accuracy: 0.8787
Epoch 2/3
 - 884s - loss: 0.2307 - accuracy: 0.9110
Epoch 3/3
 - 878s - loss: 0.2080 - accuracy: 0.9204


<keras.callbacks.callbacks.History at 0x1a45059e90>

In [14]:
scores = model1.evaluate(x_test, y_test, verbose=0)

In [16]:
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 85.40%


### CNN

In [79]:
embedding_vector_length = 32
model2 = Sequential()
model2.add(Embedding(top_words, embedding_vector_length, input_length=max_words))
model2.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model2.add(MaxPooling1D(pool_size=2))
model2.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model2.add(MaxPooling1D(pool_size=2))
model2.add(Flatten())
#model2.add(Dense(128, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [80]:
model2.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 498, 32)           3104      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 249, 32)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 247, 32)           3104      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 123, 32)           0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 3936)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 1)                

In [81]:
model2.fit(x_train, y_train, epochs=3, batch_size=128, verbose=2)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/3
 - 19s - loss: 0.6175 - accuracy: 0.6102
Epoch 2/3
 - 18s - loss: 0.2754 - accuracy: 0.8888
Epoch 3/3
 - 18s - loss: 0.2060 - accuracy: 0.9201


<keras.callbacks.callbacks.History at 0x6605a3790>

In [82]:
scores = model2.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 87.72%


In [83]:
mean_squared_error(y_test, model2.predict(x_test))

0.08915372629124096

### RNN

In [None]:
embedding_vector_length = 32
model3 = Sequential()
model3.add(Embedding(top_words, embedding_vector_length, input_length=max_words))
model3.add(SimpleRNN(100))
model3.add(Dense(1, activation='sigmoid'))
model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model3.summary())

In [None]:
model3.fit(x_train, y_train, epochs=3, batch_size=128, verbose=2)