[View in Colaboratory](https://colab.research.google.com/github/AdityaVijayvergia/models/blob/master/Sentiment_analysis_using_RNN.ipynb)

In [2]:
from keras.datasets import imdb

Using TensorFlow backend.


In [3]:
max_features = 10000
maxlen = 500
batch_size = 32

(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

25000 train sequences
25000 test sequences


In [0]:
word_index = imdb.get_word_index()

In [0]:
from keras.preprocessing import sequence

In [6]:
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)

print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

input_train shape: (25000, 500)
input_test shape: (25000, 500)


In [0]:
from keras.layers import Dense, Embedding, LSTM
from keras.models import Sequential

In [0]:
model = Sequential()
model.add(Embedding(max_features, 100))
model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

In [9]:
model.layers

[<keras.layers.embeddings.Embedding at 0x7f110d5044a8>,
 <keras.layers.recurrent.LSTM at 0x7f110c3a5e10>,
 <keras.layers.core.Dense at 0x7f110d5046d8>]

In [10]:
!wget http://nlp.stanford.edu/data/glove.6B.zip

--2018-06-10 12:31:47--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2018-06-10 12:31:48--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip.1’



2018-06-10 12:32:33 (18.4 MB/s) - ‘glove.6B.zip.1’ saved [862182613/862182613]



In [12]:
!unzip glove.6B.zip

Archive:  glove.6B.zip
replace glove.6B.50d.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [13]:
ls

[0m[01;34mdatalab[0m/           glove.6B.200d.txt  glove.6B.50d.txt  glove.6B.zip.1
glove.6B.100d.txt  glove.6B.300d.txt  glove.6B.zip


In [14]:
import os
import numpy as np
glove_dir = '/content'
embeddings_index = {}
f = open(os.path.join(glove_dir, 'glove.6B.100d.txt'))
for line in f:
  values = line.split()
  word = values[0]
  coefs = np.asarray(values[1:], dtype='float32')
  embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))

Found 400000 word vectors.


In [0]:
embedding_dim = 100

embedding_matrix = np.zeros((max_features, embedding_dim))
for word, i in word_index.items():
  if i < maxlen:
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
      embedding_matrix[i] = embedding_vector

In [0]:
model.layers[0].set_weights([embedding_matrix])
model.layers[0].trainable = False

In [17]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 100)         1000000   
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                17024     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 1,017,057
Trainable params: 17,057
Non-trainable params: 1,000,000
_________________________________________________________________


In [0]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [0]:
history = model.fit(input_train, y_train, epochs=15, batch_size=32, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/15
Epoch 2/15
  992/20000 [>.............................] - ETA: 12:39 - loss: 0.6807 - acc: 0.5464

Epoch 3/15

Epoch 4/15

Epoch 5/15

Epoch 6/15

Epoch 7/15

Epoch 8/15

Epoch 9/15

Epoch 10/15

Epoch 11/15

Epoch 12/15



In [1]:
model.save_weights('pre_trained_glove_model.h5')

[0m[01;34mdatalab[0m/           glove.6B.200d.txt  glove.6B.50d.txt
glove.6B.100d.txt  glove.6B.300d.txt  glove.6B.zip


In [0]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()