# Recurrent Neural Network (RNN) Demo

## How to do integer encoding using keras

In [1]:
import numpy as np

In [2]:
docs = ['recurrent neural network',
		'neural network',
		'artificial neural',
		'connections between nodes',
		'can create a cycle',
		'allowing output',
		'some nodes to affect subsequent',
		'exhibit temporal',
		'dynamic behavior',
		'type of Neural Network',
    'affect subsequent']

In [3]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(oov_token='<nothing>')

In [4]:
tokenizer.fit_on_texts(docs)

In [5]:
tokenizer.word_index

{'<nothing>': 1,
 'neural': 2,
 'network': 3,
 'nodes': 4,
 'affect': 5,
 'subsequent': 6,
 'recurrent': 7,
 'artificial': 8,
 'connections': 9,
 'between': 10,
 'can': 11,
 'create': 12,
 'a': 13,
 'cycle': 14,
 'allowing': 15,
 'output': 16,
 'some': 17,
 'to': 18,
 'exhibit': 19,
 'temporal': 20,
 'dynamic': 21,
 'behavior': 22,
 'type': 23,
 'of': 24}

In [6]:
tokenizer.word_counts

OrderedDict([('recurrent', 1),
             ('neural', 4),
             ('network', 3),
             ('artificial', 1),
             ('connections', 1),
             ('between', 1),
             ('nodes', 2),
             ('can', 1),
             ('create', 1),
             ('a', 1),
             ('cycle', 1),
             ('allowing', 1),
             ('output', 1),
             ('some', 1),
             ('to', 1),
             ('affect', 2),
             ('subsequent', 2),
             ('exhibit', 1),
             ('temporal', 1),
             ('dynamic', 1),
             ('behavior', 1),
             ('type', 1),
             ('of', 1)])

In [7]:
tokenizer.document_count

11

In [8]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[7, 2, 3],
 [2, 3],
 [8, 2],
 [9, 10, 4],
 [11, 12, 13, 14],
 [15, 16],
 [17, 4, 18, 5, 6],
 [19, 20],
 [21, 22],
 [23, 24, 2, 3],
 [5, 6]]

In [9]:
from keras.utils import pad_sequences

In [11]:
sequences = pad_sequences(sequences, padding='post')

In [12]:
sequences

array([[ 7,  2,  3,  0,  0],
       [ 2,  3,  0,  0,  0],
       [ 8,  2,  0,  0,  0],
       [ 9, 10,  4,  0,  0],
       [11, 12, 13, 14,  0],
       [15, 16,  0,  0,  0],
       [17,  4, 18,  5,  6],
       [19, 20,  0,  0,  0],
       [21, 22,  0,  0,  0],
       [23, 24,  2,  3,  0],
       [ 5,  6,  0,  0,  0]], dtype=int32)

## Sentiment Analysis

In [14]:
from keras.datasets import imdb
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [15]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [16]:
y_test

array([0, 1, 1, ..., 0, 0, 0])

In [17]:
X_train[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 22665,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 21631,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 19193,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 10311,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 31050,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 12118,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5

In [18]:
print(len(X_train[2]))
print(len(X_train[3]))

141
550


In [20]:
X_train = pad_sequences(X_train, padding='post', maxlen=50)
X_test = pad_sequences(X_test, padding='post', maxlen=50)

In [21]:
X_train[0]

array([2071,   56,   26,  141,    6,  194, 7486,   18,    4,  226,   22,
         21,  134,  476,   26,  480,    5,  144,   30, 5535,   18,   51,
         36,   28,  224,   92,   25,  104,    4,  226,   65,   16,   38,
       1334,   88,   12,   16,  283,    5,   16, 4472,  113,  103,   32,
         15,   16, 5345,   19,  178,   32], dtype=int32)

In [22]:
print(len(X_train[2]))
print(len(X_train[3]))

50
50


In [23]:
model = Sequential()

model.add(SimpleRNN(32, input_shape=(50,1), return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 32)                1088      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 1,121
Trainable params: 1,121
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3b049373a0>

# How to do encodings using keras embeddings

In [25]:
docs = ['recurrent neural network',
		'neural network',
		'artificial neural',
		'connections between nodes',
		'can create a cycle',
		'allowing output',
		'some nodes to affect subsequent',
		'exhibit temporal',
		'dynamic behavior',
		'type of Neural Network',
    'affect subsequent']

In [26]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

In [27]:
tokenizer.fit_on_texts(docs)

In [28]:
len(tokenizer.word_index)

23

In [29]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[6, 1, 2],
 [1, 2],
 [7, 1],
 [8, 9, 3],
 [10, 11, 12, 13],
 [14, 15],
 [16, 3, 17, 4, 5],
 [18, 19],
 [20, 21],
 [22, 23, 1, 2],
 [4, 5]]

In [30]:
from keras.utils import pad_sequences
sequences = pad_sequences(sequences,padding='post')
sequences

array([[ 6,  1,  2,  0,  0],
       [ 1,  2,  0,  0,  0],
       [ 7,  1,  0,  0,  0],
       [ 8,  9,  3,  0,  0],
       [10, 11, 12, 13,  0],
       [14, 15,  0,  0,  0],
       [16,  3, 17,  4,  5],
       [18, 19,  0,  0,  0],
       [20, 21,  0,  0,  0],
       [22, 23,  1,  2,  0],
       [ 4,  5,  0,  0,  0]], dtype=int32)

In [31]:
model = Sequential()
model.add(Embedding(23,output_dim=2,input_length=5)) #Total vocab len, ouput dim(per word would be represend by 2 vector), input len per row

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 5, 2)              46        
                                                                 
Total params: 46
Trainable params: 46
Non-trainable params: 0
_________________________________________________________________


In [32]:
model.compile('adam','accuracy')

In [33]:
pred = model.predict(sequences)
print(pred)

[[[-4.2283632e-02 -2.3445010e-02]
  [ 3.4622166e-02 -5.0757304e-03]
  [ 4.3998901e-02 -2.1213269e-02]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]]

 [[ 3.4622166e-02 -5.0757304e-03]
  [ 4.3998901e-02 -2.1213269e-02]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]]

 [[ 3.6460485e-02 -3.2052889e-02]
  [ 3.4622166e-02 -5.0757304e-03]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]]

 [[-3.1580776e-04 -1.6731143e-02]
  [ 6.4457580e-04 -4.8798729e-02]
  [-2.2690892e-02  4.7189210e-02]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]]

 [[ 3.2017637e-02 -4.9334168e-03]
  [ 1.5682939e-02 -5.1497594e-03]
  [-1.1492148e-03  2.7590428e-02]
  [-3.5404190e-03 -1.3710380e-02]
  [-3.5088323e-02  7.5129271e-03]]

 [[ 2.1315467e-02  4.1507963e-02]
  [-3.1173706e-02 -8.2135201e-05]
  [-3.5088323e-02  7.5129271e-03]
  [-3.5088323e-02  7.5129271e-03]
  [-

In [34]:
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten

In [35]:
(X_train,y_train),(X_test,y_test) = imdb.load_data()

In [36]:
X_train = pad_sequences(X_train,padding='post',maxlen=50)
X_test = pad_sequences(X_test,padding='post',maxlen=50)

In [37]:
X_train.shape

(25000, 50)

In [38]:
model = Sequential()
model.add(Embedding(10000,output_dim=2,input_length=50))
model.add(SimpleRNN(32,return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 50, 2)             20000     
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 32)                1120      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 21,153
Trainable params: 21,153
Non-trainable params: 0
_________________________________________________________________


In [39]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train,epochs=1,validation_data=(X_test,y_test))



# Predictions

In [40]:
X_test[0][0:50].reshape(1,-1).shape

(1, 50)

In [41]:
test_data = X_test[0][0:50].reshape(1,-1)

In [42]:
model.predict(test_data)



array([[0.11420503]], dtype=float32)