# CNN + LSTM for analysing imdb dataset for simple sentimental analysis

In [0]:
# import imdb dataset
from keras.datasets import imdb
max_words = 20000

(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz",
                                                      num_words=None,
                                                      skip_top=0,
                                                      maxlen=None,
                                                      seed=113,
                                                      start_char=1,
                                                      oov_char=2,
                                                      index_from=3)

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [0]:
# pre-processing dataset
from keras.preprocessing import sequence

maxlen = 1000
x_train = sequence.pad_sequences(x_train, maxlen = maxlen) # pads sequences to same length
x_test = sequence.pad_sequences(x_test, maxlen = maxlen) # pads sequences to same length

In [0]:
# setting up model (combination of CNN + LSTM)
from keras.models import Sequential
from keras.layers import Dropout, Dense, LSTM, Conv1D, MaxPooling1D, Embedding, Activation

model = Sequential()

# transforms positive integers (indexes) into dense vectors of fixed size
# Embedding layer must always be used as the first layer
model.add(Embedding(max_words, 128, input_length = maxlen))
model.add(Dropout(0.3))
# add your 1D convolution layer (e.g. temporal convolution)
model.add(Conv1D(filters = 64,
                 kernel_size = 5,
                 padding = 'valid',
                 activation = 'relu',
                 strides = 1))
# add maximum pooling operation for temporal data
model.add(MaxPooling1D(pool_size = 4))
# add LSTM layer
model.add(LSTM(units = 70))
# add dense layer - 1 target
model.add(Dense(units = 1))
# activation function
model.add(Activation('sigmoid'))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1000, 128)         2560000   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1000, 128)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 996, 64)           41024     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 249, 64)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 70)                37800     
_________________________________________________________________
dense_1 (Dense)      

In [0]:
# model compilation
model.compile(optimizer = 'adam',
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

In [0]:
# model fitting
model.fit(x_train,
          y_train,
          batch_size = 32,
          epochs = 2,
          validation_split = 0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fad16cc7c50>

In [0]:
# converting a full complete sentence to index form (part of sentimental analysis)
word_to_id = imdb.get_word_index()

sad_sentences = 'I am very depressed and upset'
happy_sentences = 'I am very happy and excited'
compiled_sentences = list()
compiled_sentences.append(sad_sentences)
compiled_sentences.append(happy_sentences)

sentence = str(input())
temp_list = list()
print('')
for word in sentence.split(' '):
  print(word)
  temp_list.append(word_to_id[word])
temp_list_padded = sequence.pad_sequences ([temp_list], maxlen = maxlen)
model.predict(temp_list_padded)

i am happy

i
am
happy


array([[0.3840828]], dtype=float32)