In [1]:
from __future__ import absolute_import
from __future__ import print_function
import numpy as np

from keras.preprocessing import sequence
from keras.optimizers import SGD, RMSprop, Adagrad
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, GRU
from keras.datasets import imdb

from keras.optimizers import SGD

from theano import function

Using Theano backend.


Using gpu device 0: GeForce 940M (CNMeM is enabled with initial size: 80.0% of memory, cuDNN not available)
  "downsample module has been moved to the theano.tensor.signal.pool module.")


This code was borrowed and modified from https://github.com/fchollet/keras/blob/master/examples/imdb_lstm.py
    
Train a LSTM on the IMDB sentiment classification task. The dataset is actually too small for LSTM to be of any advantage compared to simpler, much faster methods such as TF-IDF+LogReg.

- Notes:
    - RNNs are tricky. Choice of batch size is important, choice of loss and optimizer is critical, etc. Some configurations won't converge.
    - LSTM loss decrease patterns during training can be quite different from what you see with CNNs/MLPs/etc.
    - Suggested GPU command: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py


In [3]:
max_features = 20000
maxlen = 100  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

print("Loading data...")
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

Loading data...
20000 train sequences
5000 test sequences


In [5]:
print("Pad sequences (samples x time)")
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

Pad sequences (samples x time)
X_train shape: (20000L, 100L)
X_test shape: (5000L, 100L)


## IMDB Data
Sample reviews from the full IMDb movie reviews dataset.

Negative review examples:
* Unfortunately it stays absurd the WHOLE time with no general narrative eventually making it just too off putting.
* Even those from the era should be turned off.
* The cryptic dialogue would make Shakespeare seem easy to a third grader.

Positive review examples:
* I didn't know this came from Canada, but it is very good. Very good!
* I liked this movie a lot. It really intrigued me how Deanna and Alicia became friends over such a tragedy
* When I saw the elaborate DVD box for this and the dreadful Red Queen figurine, 
  I felt certain I was in for a big disappointment, but surprise, surprise, I loved it. 

In [7]:
y_train[:10]

[1, 0, 0, 1, 0, 0, 1, 0, 1, 0]

In [8]:
X_train[:2]

array([[  269,   929,    18,     2,     7,     2,  4284,     8,   105,
            5,     2,   182,   314,    38,    98,   103,     7,    36,
         2184,   246,   360,     7,    19,   396,    17,    26,   269,
          929,    18,  1769,   493,     6,   116,     7,   105,     5,
          575,   182,    27,     5,  1002,  1085,   130,    62,    17,
           24,    89,    17,    13,   381,  1421,     8,  5167,     7,
            5,  2723,    38,   325,     7,    17,    23,    93,     9,
          156,   252,    19,   235,    20,    28,     5,   104,    76,
            7,    17,   169,    35, 14764,    17,    23,  1460,     7,
           36,  2184,   934,    56,  2134,     6,    17,   891,   214,
           11,     5,  1552,     6,    92,     6,    33,   256,    82,
            7],
       [   24,    89,    33,  4317,    17,   551,  1851,  3994,    43,
           37,   240,    40,   635,     9,   189,   331,  4183,    45,
            5,     2,     6,   102,    37,    24,     5,   13

In [10]:
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))

model.add(LSTM(128))  # try using a GRU instead, for fun
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")

Build model...


In [11]:
model.layers

[<keras.layers.embeddings.Embedding at 0x654a9f28>,
 <keras.layers.recurrent.LSTM at 0x654a9f98>,
 <keras.layers.core.Dropout at 0x654b5208>,
 <keras.layers.core.Dense at 0x654b5898>,
 <keras.layers.core.Activation at 0x65559358>]

In [14]:
inp = model.get_input()
embedding = model.layers[0].get_output()
F = function([inp], embedding, allow_input_downcast=True)

In [15]:
print(X_train[:1])
print(X_train[:1].shape)

[[  269   929    18     2     7     2  4284     8   105     5     2   182
    314    38    98   103     7    36  2184   246   360     7    19   396
     17    26   269   929    18  1769   493     6   116     7   105     5
    575   182    27     5  1002  1085   130    62    17    24    89    17
     13   381  1421     8  5167     7     5  2723    38   325     7    17
     23    93     9   156   252    19   235    20    28     5   104    76
      7    17   169    35 14764    17    23  1460     7    36  2184   934
     56  2134     6    17   891   214    11     5  1552     6    92     6
     33   256    82     7]]
(1L, 100L)


In [16]:
print(F(X_train[:1]))
print(F(X_train[:1]).shape)

[[[ 0.04523628 -0.04581409  0.00139403 ...,  0.02757302 -0.00659316
    0.04097552]
  [ 0.01897613 -0.02050843  0.03176945 ...,  0.01020672 -0.04183867
    0.01724149]
  [ 0.03700239  0.04918102  0.02664101 ..., -0.00773543  0.04357829
   -0.02801045]
  ..., 
  [-0.02343854  0.0315331   0.01564403 ...,  0.01475945  0.04115001
   -0.01540261]
  [-0.04782986  0.03330185 -0.04914759 ...,  0.03126881  0.01868706
    0.01558241]
  [-0.02831537 -0.01510486  0.01608345 ...,  0.00183296 -0.02324817
   -0.03318051]]]
(1L, 100L, 128L)


In [17]:
print("Train...")

model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=4,
          validation_data=(X_test, y_test), show_accuracy=True)
score, acc = model.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True)

print('Test score:', score)
print('Test accuracy:', acc)

Train...
Train on 20000 samples, validate on 5000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Test score: 0.680139132333
Test accuracy: 0.8258


## Using SGD
...Not improve at all

In [18]:
model_sgd = Sequential()
model_sgd.add(Embedding(max_features, 128, input_length=maxlen))

model_sgd.add(LSTM(128))
model_sgd.add(Dropout(0.5))
model_sgd.add(Dense(1))
model_sgd.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model_sgd.compile(loss='binary_crossentropy', optimizer=sgd, class_mode="binary")

DEBUG: nvcc STDOUT mod.cu
   Creating library C:/Users/user/AppData/Local/Theano/compiledir_Windows-10-10.0.14393-Intel64_Family_6_Model_60_Stepping_3_GenuineIntel-2.7.12-64/tmpgrs_5r/a34361e57f0f9e245bb592780ca46e94.lib and object C:/Users/user/AppData/Local/Theano/compiledir_Windows-10-10.0.14393-Intel64_Family_6_Model_60_Stepping_3_GenuineIntel-2.7.12-64/tmpgrs_5r/a34361e57f0f9e245bb592780ca46e94.exp



In [20]:
print("Train...")

model_sgd.fit(X_train, y_train, batch_size=batch_size, nb_epoch=10,
          validation_data=(X_test, y_test), show_accuracy=True)
score, acc = model_sgd.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True)

print('Test score:', score)
print('Test accuracy:', acc)

Train...
Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test score: 1.01398169923
Test accuracy: 0.8202
