<a href="https://colab.research.google.com/github/DiegoAnas/SNN-NLP/blob/master/Sentiment%20LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [2]:
import tensorflow
print(tensorflow.__version__)
import tensorflow.keras as keras

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, \
  concatenate, Dense, Activation, LSTM, Dropout, Bidirectional, Flatten

import numpy as np
from sklearn.model_selection import StratifiedKFold

2.1.0-rc1


In [0]:
# Parameters:
# Embedding
max_features = 20000
maxlen = 400
embedding_size = 128

# LSTM
lstm_output_size = 100

# Training
batch_size = 30
epochs = 3

In [4]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
x_data = np.concatenate((x_train, x_test))
y_data = np.concatenate((y_train, y_test))
skf = StratifiedKFold(n_splits=5, random_state=42)

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)


In [0]:
# Network Architecture
def create_LSTM_model(lstm_cells:[int], activation:str='tanh', recurrent_activation='sigmoid',
                      dropout:float=0.0, recurrent_dropout:float=0.0, embedding_dropout:bool=False, 
                      embedding_dropout_rate:float=0.0):
  # LSTM 
  var_input = Input(shape=(400,))
  emb_layer = Embedding(max_features,
                      embedding_size,
                      input_length=maxlen)(var_input)
  if embedding_dropout:
    emb_layer = SpatialDropout1D(rate=embedding_dropout_rate)(emb_layer)
  previous_layer = emb_layer
  for idx, lstm_units in enumerate(lstm_cells): 
    lstm_layer = LSTM(units=lstm_units, activation=activation, recurrent_activation=recurrent_activation, 
                    dropout=dropout, recurrent_dropout=recurrent_dropout, 
                    return_sequences= (idx!=len(lstm_cells)-1))(previous_layer)
    previous_layer = lstm_layer
  dense_layer = Dense(1, activation='sigmoid')(previous_layer)
  model = Model(inputs=var_input, outputs=dense_layer)
  model.compile(loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])
  return model

In [7]:
# Test 1-layer LSTM RNN
for train, test in skf.split(x_data, y_data):
  target = create_LSTM_model(activation='tanh', lstm_cells=[lstm_output_size])
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

Train on 40000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 40000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 40000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 40000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 40000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [8]:
epochs = 2
# Test 1-layer LSTM RNN w SELU
for train, test in skf.split(x_data, y_data):
  target = create_LSTM_model(activation='selu', lstm_cells=[lstm_output_size])
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
 1470/40000 [>.............................] - ETA: 10:01 - loss: nan - accuracy: 0.5049

KeyboardInterrupt: ignored

Changing activation function to SELU drastically increases training time. Accuracy is poor and training function is unable to calculate loss value. We suspect SeLU activation gets unstably too large.

In [17]:
create_LSTM_model(activation='tanh', lstm_cells=[100,100]).summary()

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        [(None, 400)]             0         
_________________________________________________________________
embedding_13 (Embedding)     (None, 400, 128)          2560000   
_________________________________________________________________
lstm_18 (LSTM)               (None, 400, 100)          91600     
_________________________________________________________________
lstm_19 (LSTM)               (None, 100)               80400     
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 101       
Total params: 2,732,101
Trainable params: 2,732,101
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Test 2-layer LSTM RNN
for train, test in skf.split(x_data, y_data):
  target = create_LSTM_model(activation='tanh', lstm_cells=[100,100])
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


In [0]:
# Test 2-layer LSTM RNN w SELU
for train, test in skf.split(x_data, y_data):
  target = create_LSTM_model(activation='selu', lstm_cells=[100,100])
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

In [0]:
# Network Architecture
def create_BLSTM_model(lstm_cells:[int], activation:str='tanh', recurrent_activation='sigmoid',
                      dropout:float=0.0, recurrent_dropout:float=0.0, embedding_dropout:bool=False, 
                      embedding_dropout_rate:float=0.0):
  # LSTM 
  var_input = Input(shape=(400,))
  emb_layer = Embedding(max_features,
                      embedding_size,
                      input_length=maxlen)(var_input)
  if embedding_dropout:
    emb_layer = SpatialDropout1D(rate=embedding_dropout_rate)(emb_layer)
  previous_layer = emb_layer
  for idx, lstm_units in enumerate(lstm_cells): 
    lstm_layer = Bidirectional(LSTM(units=lstm_units, activation=activation, recurrent_activation=recurrent_activation, 
                    dropout=dropout, recurrent_dropout=recurrent_dropout, 
                    return_sequences= (idx!=len(lstm_cells)-1)))(previous_layer)
    previous_layer = lstm_layer
  dense_layer = Dense(1, activation='sigmoid')(previous_layer)
  model = Model(inputs=var_input, outputs=dense_layer)
  model.compile(loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])
  return model

In [21]:
# Test 1-layer BILSTM RNN
for train, test in skf.split(x_data, y_data):
  target = create_BLSTM_model(activation='tanh', lstm_cells=[lstm_output_size])
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


In [22]:
# Test 1-layer BILSTM RNN w SELU
for train, test in skf.split(x_data, y_data):
  target = create_BLSTM_model(activation='selu', lstm_cells=[lstm_output_size])
  target.fit(x_data[train], y_data[train],
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_data[test], y_data[test]))

Train on 40000 samples, validate on 10000 samples
Epoch 1/2
 1200/40000 [..............................] - ETA: 20:46 - loss: 0.6951 - accuracy: 0.5000

KeyboardInterrupt: ignored