**Movie Sentiment Analysis using RNN **

In [1]:
# Import libraries
from keras.datasets import imdb
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten

In [None]:
# IMDB dataset having 50K movie reviews for natural language processing or Text analytics.
# Here we have set of 25,000 highly polar movie reviews for training and 25,000 for testing. 
# So, predict the number of positive and negative reviews using either classification or deep learning algorithms.
# only consider the top 10,000 most common words,

In [2]:
# Load the dataset
(X_train,y_train),(X_test,y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# To get shape
print('No of samples in training set',X_train.shape)
print('No of samples in test set',X_test.shape)


No of samples in training set (25000,)
No of samples in test set (25000,)


In [None]:
X_train[0]

In [5]:
# To get maximum length of review
print(len(max((X_train+ X_test), key=len)))

2697


In [6]:
# Reshape the y value
import numpy as np
y_train = np.asarray(y_train).reshape((-1,1))
y_test = np.asarray(y_test).reshape((-1,1))

In [7]:
# Apply the padding and decide max length
X_train = pad_sequences(X_train,padding='post',maxlen=100)
X_test = pad_sequences(X_test,padding='post',maxlen=100)

In [8]:
X_train.shape

(25000, 100)

In [9]:
# using RNN Without Embedding
model = Sequential()
model.add(SimpleRNN(32, input_shape=(100,1)))
model.add(Dense(1, activation='sigmoid'))

In [None]:
# # using RNN with Embedding Technique

# model1 = Sequential()
# model1.add(Embedding(10000, 2,input_length=100))  # Unique words= 10000
# model1.add(SimpleRNN(32)) #return_sequences=False
# model1.add(Dense(1, activation='sigmoid'))



In [10]:
# For RNN
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train,epochs=5,validation_data=(X_test,y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff006782e00>

In [None]:
# For RNN with embedding
# model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# model1.fit(X_train, y_train,epochs=5,validation_data=(X_test,y_test))

In [None]:
# To get test accuracy
scores = model1.evaluate(X_test,y_test)
print('test accuracy',scores[1])

test accuracy 0.8338000178337097


In [None]:
# Prediction
y_pred = model1.predict(X_test)
y_pred



array([[0.4809706 ],
       [0.9664097 ],
       [0.82918674],
       ...,
       [0.5477949 ],
       [0.02093412],
       [0.7858767 ]], dtype=float32)

In [None]:
# Covert probability to numbers
t1 = []
for i in range(len(y_pred)):
    if y_pred[i] >= 0.5:
        t1.append(1)
    else:
        t1.append(0)

In [None]:
print(t1)

[0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 

In [None]:
# Actual Values
y_test

array([[0],
       [1],
       [1],
       ...,
       [0],
       [0],
       [0]])