# Implementations to do: <br>
Vanilla recurrent neural network <br>
Long short-term memory network <br>
Gated Recurrent Unit <br>

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Activation, LSTM, GRU
from tensorflow.keras import optimizers
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier


In [2]:
from sklearn.model_selection import train_test_split

In [3]:
# parameters for data load
num_words = 30000
maxlen = 50
test_split = 0.3

(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words = num_words, maxlen = maxlen, test_split = test_split)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [5]:
len(X_train), len(X_test)

(1395, 599)

In [8]:
len(X_train[0])

37

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20, random_state=1)

In [9]:
# pad the sequences with zeros 
# padding parameter is set to 'post' => 0's are appended to end of sequences
#converts data to (num_samples,sequence_length) -> sequence length in this case 
#is the value of max length sequence, which is 49
X_train = pad_sequences(X_train, padding = 'post')
X_test = pad_sequences(X_test, padding = 'post')
X_val = pad_sequences(X_val, padding = 'post')

In [11]:
X_train.shape, X_val.shape, X_test.shape

((1116, 49), (279, 49), (599, 49))

convert to (num_samples, sequence_len, num_features)

In [12]:
X_train = np.array(X_train).reshape((X_train.shape[0], X_train.shape[1], 1)) #add additional axis for the number of features
X_test = np.array(X_test).reshape((X_test.shape[0], X_test.shape[1], 1))
X_val = np.array(X_val).reshape((X_val.shape[0], X_val.shape[1], 1))

In [14]:
X_train.shape, X_val.shape, X_test.shape

((1116, 49, 1), (279, 49, 1), (599, 49, 1))

In [15]:
y_data = np.concatenate((y_train, y_val, y_test))

In [16]:
y_data = to_categorical(y_data)  #one-hot encoding of y

In [17]:
#retrieve one-hot y encodings for train, val and test
y_train_cat = y_data[0:len(y_train)]
y_val_cat = y_data[len(y_train):len(y_train) + len(y_val)]
y_test_cat = y_data[len(y_train) + len(y_val):len(y_train) + len(y_val) + len(y_test)]

In [20]:
y_train_cat.shape, y_val_cat.shape, y_test_cat.shape

((1116, 46), (279, 46), (599, 46))

In [18]:
len(y_train), len(y_train_cat), len(y_val), len(y_val_cat), len(y_test), len(y_test_cat)

(1116, 1116, 279, 279, 599, 599)

# Vanilla RNN

In [21]:
def vanilla_rnn():
    model = Sequential()
    model.add(SimpleRNN(50, input_shape = (49,1), return_sequences = False)) #many to one, input_shape = (sequence_len, num_features)
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [22]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1116, 49, 1), (1116,), (599, 49, 1), (599,))

In [25]:
model = vanilla_rnn()
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 50)                2600      
_________________________________________________________________
dense_2 (Dense)              (None, 46)                2346      
_________________________________________________________________
activation_2 (Activation)    (None, 46)                0         
Total params: 4,946
Trainable params: 4,946
Non-trainable params: 0
_________________________________________________________________


In [26]:
model = KerasClassifier(build_fn = vanilla_rnn, epochs = 200, batch_size = 50, verbose = 1)
model.fit(X_train, y_train_cat)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
 4/23 [====>.........................] - ETA: 0s - loss: 1.0418 - accuracy: 0.7300

KeyboardInterrupt: 

Stacked vanilla RNN

In [27]:
def stacked_vanilla_rnn():
    model = Sequential()
    model.add(SimpleRNN(50, input_shape = (49,1), return_sequences = True))   # return_sequences parameter has to be set True to stack
    model.add(SimpleRNN(50, return_sequences = False))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [28]:
model = stacked_vanilla_rnn()
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_4 (SimpleRNN)     (None, 49, 50)            2600      
_________________________________________________________________
simple_rnn_5 (SimpleRNN)     (None, 50)                5050      
_________________________________________________________________
dense_4 (Dense)              (None, 46)                2346      
_________________________________________________________________
activation_4 (Activation)    (None, 46)                0         
Total params: 9,996
Trainable params: 9,996
Non-trainable params: 0
_________________________________________________________________


In [24]:
model = KerasClassifier(build_fn = vanilla_rnn, epochs = 200, batch_size = 50, verbose = 1)
model.fit(X_train, y_train_cat)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200

KeyboardInterrupt: 

# LSTM

In [29]:
def lstm():
    model = Sequential()
    model.add(LSTM(50, input_shape = (49,1), return_sequences = False))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [31]:
model = lstm()
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 50)                10400     
_________________________________________________________________
dense_6 (Dense)              (None, 46)                2346      
_________________________________________________________________
activation_6 (Activation)    (None, 46)                0         
Total params: 12,746
Trainable params: 12,746
Non-trainable params: 0
_________________________________________________________________


Stacked LSTM

In [32]:
def stacked_lstm():
    model = Sequential()
    model.add(LSTM(50, input_shape = (49,1), return_sequences = True))
    model.add(LSTM(50, return_sequences = False))
    #add a third LSTM layer ???
    
    
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [33]:
model = stacked_lstm()
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 49, 50)            10400     
_________________________________________________________________
lstm_3 (LSTM)                (None, 50)                20200     
_________________________________________________________________
dense_7 (Dense)              (None, 46)                2346      
_________________________________________________________________
activation_7 (Activation)    (None, 46)                0         
Total params: 32,946
Trainable params: 32,946
Non-trainable params: 0
_________________________________________________________________


# GRU

In [36]:
def gru():
    model = Sequential()
    model.add(GRU(50, input_shape = (49,1), return_sequences = False))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [37]:
model = gru()
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 50)                7950      
_________________________________________________________________
dense_9 (Dense)              (None, 46)                2346      
_________________________________________________________________
activation_9 (Activation)    (None, 46)                0         
Total params: 10,296
Trainable params: 10,296
Non-trainable params: 0
_________________________________________________________________


Stacked GRU

In [38]:
def stacked_gru():
    model = Sequential()
    model.add(GRU(50, input_shape = (49,1), return_sequences = True))
    model.add(GRU(50, return_sequences = False))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [39]:
model = stacked_gru()
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 49, 50)            7950      
_________________________________________________________________
gru_2 (GRU)                  (None, 50)                15300     
_________________________________________________________________
dense_10 (Dense)             (None, 46)                2346      
_________________________________________________________________
activation_10 (Activation)   (None, 46)                0         
Total params: 25,596
Trainable params: 25,596
Non-trainable params: 0
_________________________________________________________________


# one to many - one input with many output

SimpleRNN(50, input_shape = (1,1), return_sequences = True)

# many to many - many input with many output

SimpleRNN(50, input_shape = (49,1), return_sequences = True)

# many to one - many input with one output

SimpleRNN(50, input_shape = (49,1), return_sequences = False)

# one to one - one input with one output

SimpleRNN(50, input_shape = (1,1), return_sequences = False)

# Varied length sequences

SimpleRNN(50, input_shape = (None,1), return_sequences = True) <br>
(Each batch should have same sequence size.)