In this code demo, we will see how we can build recurrent neural network.

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import pandas as pd
import os
BASE_DIR="/content/gdrive/MyDrive/RNN-LSTM"

In [4]:
train=pd.read_csv(os.path.join(BASE_DIR,'headlines.csv'))

In [5]:
train.head()

Unnamed: 0,ID,TITLE,CATEGORY
0,226435,Google+ rolls out 'Stories' for tricked out ph...,t
1,356684,Dov Charney's Redeeming Quality,b
2,246926,White God adds Un Certain Regard to the Palm Dog,e
3,318360,"Google shows off Androids for wearables, cars,...",t
4,277235,China May new bank loans at 870.8 bln yuan,b


In [6]:
## We will create a classifier using embedding layer and Recurrent layer
X=train['TITLE']
y=train['CATEGORY']

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [8]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=200)

In [9]:
enc=LabelEncoder()

In [10]:
y_train=enc.fit_transform(y_train)

In [11]:
enc.classes_

array(['b', 'e', 'm', 't'], dtype=object)

In [12]:
y_train

array([2, 3, 3, ..., 3, 1, 2])

In [13]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [14]:
seq_len=16
max_words=10000

In [15]:
tokenizer=Tokenizer(num_words=max_words)
### Split the text into words and assign an integer id
tokenizer.fit_on_texts(X_train.tolist())
## Create a sequence for each entry in the title column
sequence=tokenizer.texts_to_sequences(X_train.tolist())
## Pad the sequences
train_features=pad_sequences(sequence,maxlen=seq_len)

In [16]:
train_features

array([[   0,    0,    0, ...,  142, 1562, 8052],
       [   0,    0,    0, ...,    4, 1671,  525],
       [   0,    0,    0, ..., 5370,    6,   47],
       ...,
       [   0,    0,    0, ..., 4732, 1042,  359],
       [   0,    0,    0, ...,   46,   41,   80],
       [   0,    0,    0, ..., 2953, 6426, 2189]], dtype=int32)

In [17]:
train_features.shape

(168967, 16)

In [18]:
## Create test features
sequence=tokenizer.texts_to_sequences(X_test.tolist())
test_features=pad_sequences(sequence,maxlen=seq_len)
test_features

array([[   0,    0,    0, ...,  113,    2,   31],
       [   0,    0,    0, ...,    4, 4018, 3115],
       [   0,    0,    0, ...,  375, 5948, 4400],
       ...,
       [   0,    0,    0, ...,   11,  157, 1648],
       [   0,    0,    0, ...,   97,   76,    7],
       [   0,    0,    0, ...,  310, 3979, 5986]], dtype=int32)

In [19]:
test_features.shape

(42242, 16)

In [23]:
## Convert y_test and y_train to one hot encoded vector
from tensorflow.keras.utils import to_categorical

In [24]:
y_train=to_categorical(y_train)

In [25]:
y_train

array([[0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]], dtype=float32)

In [26]:
import numpy as np
import time

In [27]:
### Read glove word vectors
t0=time.time()
embedding_index={}
con=open(os.path.join(BASE_DIR,'glove.6B.50d.txt'),encoding='utf-8')
for line in con:
    values=line.split()
    word=values[0]
    vector=np.asarray(values[1:],dtype='float32')
    embedding_index[word]=vector
con.close()
t1=time.time()
print("Took {} seconds to load glove word vectors".format(t1-t0))

Took 9.959806442260742 seconds to load glove word vectors


In [28]:
## Now create an embedding matrix for 10000 words in our corpus
embedding_weight_matrix=np.zeros((max_words,50))
for word,i in tokenizer.word_index.items():
    if i < max_words:
        vector=embedding_index.get(word)
        if vector is not None:
            embedding_weight_matrix[i]=vector

Now, I will start assembling my model. In this model, besides, the dense layers and the embedding layer, I will also use a simple recurrent layer.

In [37]:
## Now we will assemble the model
from keras.models import Sequential
from keras.layers import Dense, Embedding,SimpleRNN

In [30]:
model=Sequential()
model.add(Embedding(input_dim=max_words,output_dim=50,
                    weights=[embedding_weight_matrix],
                    input_length=seq_len))
model.add(SimpleRNN(50))
model.add(Dense(4,activation='softmax'))

Now the entry point to the model is sequential. The first layer in the embedding layer. The number of rows in this embedding layer is same as the maximum vocabulary. The output from this embedding layer is 50 because I'm using word vectors that have a length of 50. I will have to instantiate the weights that I created here. and I will also have to specify the length of input which is my sequence length which is 16 in this case. Then I include a simple recurrent layer. If you remember the recurrent layer takes in the output of the embedding layer. And embedding layer is going to produce word vectors as the output whose dimension will be 50. That's why you have a number 50 here. The last layer will be dense layer. We have four categories, so we will be using four neurons with soft Max activation.

In [31]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 16, 50)            500000    
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 50)                5050      
_________________________________________________________________
dense (Dense)                (None, 4)                 204       
Total params: 505,254
Trainable params: 505,254
Non-trainable params: 0
_________________________________________________________________


Now you can see that all the parameters in the model are trainable. We may want to freeze these parameters

In [38]:
model.layers[0].trainable=False

In [39]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 16, 50)            500000    
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 50)                5050      
_________________________________________________________________
dense (Dense)                (None, 4)                 204       
Total params: 505,254
Trainable params: 5,254
Non-trainable params: 500,000
_________________________________________________________________


if you again look at the summary I can see there are five lakh parameters which are non-trainable which belong to my embeded layer.

In [40]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['acc'])

In [35]:
model.fit(train_features,y_train,epochs=3,batch_size=32,validation_split=0.20)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7faa6b9c7850>

Val Accuracy is around 81%

Let's look at test accuracy

In [36]:
preds=model.predict(test_features)

max_labels = []
for i in preds:
  max_labels.append(np.argmax(i))

pred_labels=enc.inverse_transform(np.array(max_labels))
(y_test==pred_labels).sum()/pred_labels.shape[0]

0.8159414800435586

The accuracy on the test data is again 81%.

Now you can have a recurrent neural network with more than one simple recurrent layer. For that to happen we have to make sure that the recurrent layers preceding to the final recurrent layer have a parameter called return_sequences() to which a value of true is provided. Let's assemble a recurrent neural network with three recurrent layers.

In [41]:
## One can have an rnn model with more than one recurrent layer, when this is done the intermediate layers should return
# the sequence of states not just last state.
model=Sequential()
model.add(Embedding(input_dim=max_words,output_dim=50,
                    weights=[embedding_weight_matrix],
                    input_length=seq_len))
model.add(SimpleRNN(50,return_sequences=True))
model.add(SimpleRNN(50,return_sequences=True))
model.add(SimpleRNN(50))
model.add(Dense(4,activation='softmax'))

return_sequences:  Whether to return the last output in the output sequence, or the full sequence.

Let's make the embedding layer non-trainable.

In [42]:
model.layers[0].trainable=False

Let’s look at the summary and let's compile a model and run it for three epochs

In [43]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 16, 50)            500000    
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 16, 50)            5050      
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 16, 50)            5050      
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 50)                5050      
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 204       
Total params: 515,354
Trainable params: 15,354
Non-trainable params: 500,000
_________________________________________________________________


In [44]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['acc'])

In [45]:
model.fit(train_features,y_train,epochs=3,batch_size=32,validation_split=0.20)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7faa6b772c50>

Now once we are done training with this network, you can see that the validation accuracy is now around 82%.

In [46]:
preds=model.predict(test_features)

max_labels = []
for i in preds:
  max_labels.append(np.argmax(i))

pred_labels=enc.inverse_transform(np.array(max_labels))
(y_test==pred_labels).sum()/pred_labels.shape[0]

0.8308318734908385

Our accuracy on the test data is also around 83%.