# IMBD movie reviews prediction

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.datasets import imdb

In [3]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=20000)

Loading data...


In [4]:
list(x_train[0][0:10])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]

In [6]:
y_train[0:5]

array([1, 0, 0, 1, 0], dtype=int64)

In [7]:
x_train = sequence.pad_sequences(x_train, maxlen=80)
x_test = sequence.pad_sequences(x_test, maxlen=80)

In [8]:
model = Sequential()
model.add(Embedding(20000, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

In [9]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

## Model

In [10]:
model.fit(x_train, y_train,
          batch_size=32,
          epochs=15,
          verbose=2,
          validation_data=(x_test, y_test))

Train on 25000 samples, validate on 25000 samples
Epoch 1/15
 - 161s - loss: 0.4628 - acc: 0.7866 - val_loss: 0.4626 - val_acc: 0.7809
Epoch 2/15
 - 136s - loss: 0.2994 - acc: 0.8778 - val_loss: 0.3942 - val_acc: 0.8349
Epoch 3/15
 - 147s - loss: 0.2179 - acc: 0.9161 - val_loss: 0.4177 - val_acc: 0.8300
Epoch 4/15
 - 146s - loss: 0.1597 - acc: 0.9407 - val_loss: 0.4617 - val_acc: 0.8294
Epoch 5/15
 - 151s - loss: 0.1141 - acc: 0.9578 - val_loss: 0.5750 - val_acc: 0.8202
Epoch 6/15
 - 152s - loss: 0.0816 - acc: 0.9720 - val_loss: 0.6090 - val_acc: 0.8221
Epoch 7/15
 - 154s - loss: 0.0616 - acc: 0.9780 - val_loss: 0.7303 - val_acc: 0.8035
Epoch 8/15
 - 151s - loss: 0.0497 - acc: 0.9832 - val_loss: 0.7997 - val_acc: 0.8159
Epoch 9/15
 - 152s - loss: 0.0312 - acc: 0.9900 - val_loss: 0.8818 - val_acc: 0.8181
Epoch 10/15
 - 152s - loss: 0.0253 - acc: 0.9923 - val_loss: 0.9051 - val_acc: 0.8166
Epoch 11/15
 - 155s - loss: 0.0246 - acc: 0.9922 - val_loss: 0.9051 - val_acc: 0.8093
Epoch 12/15
 

<tensorflow.python.keras.callbacks.History at 0x1c86f72e4a8>

**Overfitting**

## Checking accuracy

In [11]:
score, acc = model.evaluate(x_test, y_test,
                            batch_size=32,
                            verbose=2)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 1.1374038902404904
Test accuracy: 0.81216
