# We use RNNs when there is a sequential dependency between inputs and outputs
## like text prediction,weather forecasting,stock price prediction

## traditional NNs won't work because they don't keep track of the data sequences


# RNN has two main issues when we train them
## >Exploding gradient
## >Vanishing gradient(most common)


# LSTMs and GRUs

##  >LSTM  "Long Short Term Memory"
this has a memory structure called a 'cell state' that store data.and there are control structures called 'Gates' let you access the 'cell state memory'

Xt    :input for time t
Ht-1  :input cell state from time t-1
Ht    :output cell state after time t

              
##  >GRU   "Gated Recurrent Unit"
this structure simplifies the LSTM as Forget + Input = Update and the Merge Cell state and the Hidden State will become one




# How keras supports the RNN layers
## Standard RNN(RNN Type)     :      SimpleRNN(Keras Layer)
## LSTM(RNN Type)                    :      LSTM(Keras Layer)
## GRU(RNN Type)                      :      GRU(Keras Layer)
## CNN+LSTM (RNN Type)         :      ConvLSTM2D(Keras Layer)





# How Sentiment analysis is done by a RNN using keras

In [7]:
#this program will classify reviews from IMDB based on sentiment, positive or negative.
#and for that we are going to use the IMDB database that comes with keras.
#and it contains words and it's respective frequency array.

from tensorflow.python.keras.preprocessing  import sequence

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense,Embedding,LSTM
from tensorflow.python.keras.callbacks import EarlyStopping



# Now importing the IMDB dataset 

In [9]:
from tensorflow.python.keras.datasets import imdb

NUM_WORDS= 6000 #top most frequent words to consider
SKIP_TOP =0 #to skip the top most words that are likely(the,and,a)

MAX_REVIEW_LEN =400 #max number of words from the review

#load the preprocessed sentiment classified review data from IMDB database
(x_train,y_train),(x_test,y_test)=imdb.load_data(num_words=NUM_WORDS,skip_top=SKIP_TOP)

#print a sample
print("encoded word sequence :",x_train[3] )

encoded word sequence : [1, 4, 2, 2, 33, 2804, 4, 2040, 432, 111, 153, 103, 4, 1494, 13, 70, 131, 67, 11, 61, 2, 744, 35, 3715, 761, 61, 5766, 452, 2, 4, 985, 7, 2, 59, 166, 4, 105, 216, 1239, 41, 1797, 9, 15, 7, 35, 744, 2413, 31, 8, 4, 687, 23, 4, 2, 2, 6, 3693, 42, 38, 39, 121, 59, 456, 10, 10, 7, 265, 12, 575, 111, 153, 159, 59, 16, 1447, 21, 25, 586, 482, 39, 4, 96, 59, 716, 12, 4, 172, 65, 9, 579, 11, 2, 4, 1615, 5, 2, 7, 5168, 17, 13, 2, 12, 19, 6, 464, 31, 314, 11, 2, 6, 719, 605, 11, 8, 202, 27, 310, 4, 3772, 3501, 8, 2722, 58, 10, 10, 537, 2116, 180, 40, 14, 413, 173, 7, 263, 112, 37, 152, 377, 4, 537, 263, 846, 579, 178, 54, 75, 71, 476, 36, 413, 263, 2504, 182, 5, 17, 75, 2306, 922, 36, 279, 131, 2895, 17, 2867, 42, 17, 35, 921, 2, 192, 5, 1219, 3890, 19, 2, 217, 4122, 1710, 537, 2, 1236, 5, 736, 10, 10, 61, 403, 9, 2, 40, 61, 4494, 5, 27, 4494, 159, 90, 263, 2311, 4319, 309, 8, 178, 5, 82, 4319, 4, 65, 15, 2, 145, 143, 5122, 12, 2, 537, 746, 537, 537, 15, 2, 4, 2, 594, 7, 

In [10]:
# we have to process the input values again by adding padding and truncating to get them into more general area
x_train=sequence.pad_sequences(x_train,maxlen=MAX_REVIEW_LEN)
x_test=sequence.pad_sequences(x_test,maxlen=MAX_REVIEW_LEN)

print('x_train.shape :',x_train.shape,'x_test.shape :',x_test.shape)

x_train.shape : (25000, 400) x_test.shape : (25000, 400)


# Defining the Model

In [12]:
model=Sequential()
model.add(Embedding(NUM_WORDS,64))
model.add(LSTM(128))
model.add(Dense(1,activation="sigmoid"))

#compile the model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

# Train and Test the model

In [13]:
#hyper paras
BATCH_SIZE = 24
EPOCHS = 5

cbk_early_stopping = EarlyStopping(monitor = 'val_acc', mode = 'max')

model.fit(x_train,y_train,BATCH_SIZE,epochs=EPOCHS,validation_data=(x_test,y_test),callbacks=[cbk_early_stopping])




Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 