<a href="https://colab.research.google.com/github/HuyenNguyenHelen/LING-5412/blob/main/Assignment5_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow_datasets as tfds
import tensorflow as tf
import torch
import sklearn
from sklearn.metrics import accuracy_score, classification_report
tfds.disable_progress_bar()

In [2]:
# Get the GPU device name.
device_name = tf.test.gpu_device_name()

# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

Found GPU at: /device:GPU:0
There are 1 GPU(s) available.
We will use the GPU: Tesla K80


# Loading the dataset

In [3]:
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

[1mDownloading and preparing dataset imdb_reviews/plain_text/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m
Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteQAON10/imdb_reviews-train.tfrecord
Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteQAON10/imdb_reviews-test.tfrecord
Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteQAON10/imdb_reviews-unsupervised.tfrecord




[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [4]:
from keras.datasets import imdb
from keras.preprocessing import sequence

max_features = 10000
maxlen = 500
batch_size = 32

# data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen= maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [5]:
info

tfds.core.DatasetInfo(
    name='imdb_reviews',
    version=1.0.0,
    description='Large Movie Review Dataset.
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.',
    homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
    features=FeaturesDict({
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
        'text': Text(shape=(), dtype=tf.string),
    }),
    total_num_examples=100000,
    splits={
        'test': 25000,
        'train': 25000,
        'unsupervised': 50000,
    },
    supervised_keys=('text', 'label'),
    citation="""@InProceedings{maas-EtAl:2011:ACL-HLT2011,
      author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan  and  Ng, Andrew Y.  and  Potts, Christopher},
      title     = {Learning Word

In [6]:
# Shuffling the dataset
buffer_size = 10000
batch_size = 32
train_dataset = train_dataset.shuffle(buffer_size).batch(batch_size) #.prefetch (tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size) #.prefetch(tf.data.AUTOTUNE)

# Building the model

## Building the LSTM model

In [7]:
# Defining an evaluation metric function
def printing_eval_scores (y_true, y_pred, report=''):
  accuracy = sklearn.metrics.accuracy_score(y_true, y_pred)
  precision = sklearn.metrics.precision_score(y_true, y_pred, average='binary')
  recall = sklearn.metrics.recall_score(y_true, y_pred, average='binary')
  f1 = sklearn.metrics.f1_score(y_true, y_pred , average='binary')
  print('accuracy score: {:.3f}'.format(accuracy))
  print('precision score: {:.3f}'.format(precision))
  print('recall score: {:.3f}'.format(recall))
  print('F1 score: {:.3f}'.format(f1))
  if report is True:
    print(classification_report(y_true, y_pred))
  else:
    pass
  return accuracy, precision, recall, f1

### With different embedding sizes

In [8]:
## Representing the  text
vocab_size = 10000
encoder = tf.keras.layers.TextVectorization(max_tokens=vocab_size)
encoder.adapt(train_dataset.map(lambda x,y: x))

# Store vocabulary
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

# Defining a function for fitting vectorizer function/layer to vectorize text (review)
def fitting_vectorizer (text, label):
  text = tf.expand_dims(text, -1)
  return encoder (text), label

# storing text batch and label batch
text_batch, label_batch = next(iter(train_dataset))

# ## print an instance with vectorized review and label for observing
# print ('REVIEW:', text_batch[0])
# print('LABEL:', raw_train.class_names[label_batch[0]] )

train = train_dataset.map(fitting_vectorizer)
test = test_dataset.map(fitting_vectorizer)

In [9]:
for k,v in train.take(1):
  print(k)

tf.Tensor(
[[1749   37   44 ...    0    0    0]
 [  10  208   11 ...    0    0    0]
 [  10   26 7300 ...    0    0    0]
 ...
 [  11    7    4 ...    0    0    0]
 [  11   20   21 ...    0    0    0]
 [  51   10   86 ...    0    0    0]], shape=(32, 501), dtype=int64)


In [None]:
# Creating the model
embedding_sizes = [32,64,128]
for size in embedding_sizes:
  print ("\n========= embedding vectors'size= %s ============" %size)
  model = tf.keras.Sequential([encoder,
                              tf.keras.layers.Embedding(
                                  input_dim = len(encoder.get_vocabulary()),
                                  output_dim = size,
                                  mask_zero = True),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
                              tf.keras.layers.Dense(64, activation = 'relu'),
                              tf.keras.layers.Dense(1, activation = 'sigmoid')]) 
  print(model.summary())
  # Compile the model for training
  model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
                optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
                metrics = ['accuracy'])
  # Training the model
  history = model.fit (train_dataset,
                      epochs = 10, 
                      validation_data = test_dataset,
                      validation_steps = 30)
  # testing the model
  ### pred_label = tf.argmax(model.predict(test),1)
  pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
  true_label = np.concatenate([y for x, y in test], axis=0)

  test_loss, test_acc = model.evaluate (test_dataset)
  # print('Test loss: ', test_loss)
  # print('Test acurracy: ', test_acc)
  print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
  printing_eval_scores (true_label, pred_label, report=True)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss:  0.4095645844936371
Test acurracy:  0.8651599884033203
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss:  0.4872962236404419
Test acurracy:  0.8571599721908569
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss:  0.5302598476409912
Test acurracy:  0.8559600114822388


### With different vocabulary size

In [None]:
vocab_sizes = [5000, 7000, 10000]
for size in vocab_sizes:
  print ("\n========= vocabulary size = %s ============" %size)
  encoder = tf.keras.layers.TextVectorization(max_tokens=size)
  encoder.adapt(train_dataset.map(lambda x,y: x))
  # Store vocabulary
  vocab = np.array(encoder.get_vocabulary())

  model = tf.keras.Sequential([encoder,
                              tf.keras.layers.Embedding(
                                  input_dim = len(encoder.get_vocabulary()),
                                  output_dim = 32,
                                  mask_zero = True),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
                              tf.keras.layers.Dense(32, activation = 'relu'),
                              tf.keras.layers.Dense(1, activation = 'sigmoid')]) 
  print(model.summary())
  # Compile the model for training
  model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
                optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
                metrics = ['accuracy'])
  # Training the model
  history = model.fit (train_dataset,
                      epochs = 10, 
                      validation_data = test_dataset,
                      validation_steps = 30)
  # testing the model
  ### pred_label = tf.argmax(model.predict(test),1)
  pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
  true_label = np.concatenate([y for x, y in test_dataset], axis=0)

  test_loss, test_acc = model.evaluate (test_dataset)
  # print('Test loss: ', test_loss)
  print('Test acurracy: ', test_acc)
  print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
  printing_eval_scores (true_label, pred_label, report=True)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, None, 32)          160000    
                                                                 
 bidirectional (Bidirectiona  (None, 128)              49664     
 l)                                                              
                                                                 
 dense (Dense)               (None, 32)                4128      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 213,825
Trainable params: 213,825
Non-tra

NameError: ignored

### With different optimizers

In [None]:
# configure the model uisng optimizer and loss function
optimizers = ['adagrad', 'rmsprop', 'adam']

print(model.summary())
for opt in optimizers:
  print( '\n========== optimizer = %s' %opt)
  # Compile the model for training
  model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
                optimizer = opt,
                metrics = ['accuracy'])
  # Training the model
  history = model.fit (train_dataset,
                      epochs = 10, 
                      validation_data = test_dataset,
                      validation_steps = 30)
  # testing the model
  ### pred_label = tf.argmax(model.predict(test),1)
  pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
  true_label = np.concatenate([y for x, y in test_dataset], axis=0)

  test_loss, test_acc = model.evaluate (test_dataset)
  # print('Test loss: ', test_loss)
  print('Test acurracy: ', test_acc)
  print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
  printing_eval_scores (true_label, pred_label, report=True)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, None, 32)          160000    
                                                                 
 bidirectional (Bidirectiona  (None, 128)              49664     
 l)                                                              
                                                                 
 dense (Dense)               (None, 32)                4128      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 213,825
Trainable params: 213,825
Non-trai

### Replacing with LSTM with GRU 

In [None]:
model = tf.keras.Sequential([encoder,
                              tf.keras.layers.Embedding(
                                  input_dim = len(encoder.get_vocabulary()),
                                  output_dim = 32,
                                  mask_zero = True),
                              tf.keras.layers.GRU(64,
                                                  activation = 'tanh',
                                                  recurrent_activation = 'sigmoid',
                                                  recurrent_dropout = 0.0,
                                                  use_bias = True),
                              tf.keras.layers.Dense(32, activation = 'relu'),
                              tf.keras.layers.Dense(1, activation = 'sigmoid')]) 
print(model.summary())
# Compile the model for training
print( 'Training GRU model...')
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = 'adagrad',
              metrics = ['accuracy'])
# Training the model
history = model.fit (train_dataset,
                    epochs = 10, 
                    validation_data = test_dataset,
                    validation_steps = 30)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, None, 32)          320000    
                                                                 
 gru (GRU)                   (None, 64)                18816     
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 340,929
Trainable params: 340,929
Non-trainable params: 0
__________________________________________________

NameError: ignored

# Stacking 2 bidirectional LSTM

In [None]:
# Creating the model
model = tf.keras.Sequential([encoder,
                             tf.keras.layers.Embedding(
                                 input_dim = len(encoder.get_vocabulary()),
                                 output_dim = 64,
                                 mask_zero = True),
                            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences = True),
                                                          backward_layer = tf.keras.layers.LSTM(64, return_sequences = True, go_backwards = True),
                                                          merge_mode = 'concat'),
                            tf.keras.layers.Dense(64, activation = 'relu'),
                            tf.keras.layers.Dense(1, activation = 'sigmoid')])


# Compile the model for training
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics = ['accuracy'])

# Training the model
history = model.fit (train_dataset,
                    epochs = 10, 
                    validation_data = test_dataset,
                    validation_steps = 30)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)

Epoch 1/10


ValueError: ignored

### With an average of all hidden states to fully connected layer

In [12]:
# Creating the model
model = tf.keras.Sequential([encoder,
                              tf.keras.layers.Embedding(
                                  input_dim = len(encoder.get_vocabulary()),
                                  output_dim = 64,
                                  mask_zero = True),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences = True)),
                             tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)),
                             tf.keras.layers.GlobalAveragePooling1D(),
                              tf.keras.layers.Dense(1, activation = 'sigmoid')]) 

# Compile the model for training
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics = ['accuracy'])

# Training the model
history = model.fit (train_dataset,
                    epochs = 5, 
                    validation_data = test_dataset,
                    validation_steps = 30)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

KeyboardInterrupt: ignored

In [23]:
#### test the new way to build the model
# Creating the model
input_dim = len(encoder.get_vocabulary())
_input = tf.keras.Input(shape = (1,), dtype=tf.string)

## Block 1
x = encoder (_input) 
x = tf.keras.layers.Embedding(input_dim = input_dim,output_dim = 64, mask_zero = True) (x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences = True)) (x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)) (x)
x = tf.keras.layers.GlobalAveragePooling1D() (x)

## output layer
output = tf.keras.layers.Dense(1, activation = 'sigmoid') (x)

## combine in one
model = tf.keras.Model(_input,output)


## Compile the model for training
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics = ['accuracy'])

# Training the model
history = model.fit (train_dataset,
                    epochs = 5, 
                    validation_data = test_dataset,
                    validation_steps = 30)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test acurracy:  0.8629599809646606

Testing performance:
 Loss: 0.352 - Accuracy: 0.863
accuracy score: 0.863
precision score: 0.863
recall score: 0.863
F1 score: 0.863
              precision    recall  f1-score   support

           0       0.86      0.86      0.86     12500
           1       0.86      0.86      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86      0.86     25000
weighted avg       0.86      0.86      0.86     25000



(0.86296, 0.8627278541733291, 0.86328, 0.8630038387715933)

In [None]:
# Creating the model
model = tf.keras.Sequential([encoder,
                              tf.keras.layers.Embedding(
                                  input_dim = len(encoder.get_vocabulary()),
                                  output_dim = 64,
                                  mask_zero = True),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences = True)),
                            # tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)),
                             tf.keras.layers.GlobalAveragePooling1D(),
                              tf.keras.layers.Dense(1, activation = 'sigmoid')]) 

print(model.summary())
# Compile the model for training
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics = ['accuracy'])

# Training the model
history = model.fit (train_dataset,
                    epochs = 5, 
                    validation_data = test_dataset,
                    validation_steps = 30)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, None, 64)          640000    
                                                                 
 bidirectional (Bidirectiona  (None, None, 128)        66048     
 l)                                                              
                                                                 
 global_average_pooling1d (G  (None, 128)              0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                        

(0.85764, 0.8620717583218596, 0.85152, 0.8567633919587878)

## With attention layers

In [None]:
from keras import backend as K
units = 64
max_length = 120


_input = tf.keras.Input(shape = (1,), dtype=tf.string)
vectorizer = encoder (_input) 
embeddings = tf.keras.layers.Embedding(input_dim = input_dim,output_dim = 64, input_length=max_length, mask_zero = True) (vectorizer)
#embeddings = tf.keras.layers.Embedding(input_dim=len(encoder.get_vocabulary()),output_dim=64,input_length=max_length )(vectorizer)
#print (embeddings.get_shape())
# lstm layer (getting hidden states)
lstm_h = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units,  return_sequences = True)) (embeddings)
# attention layer
attention = tf.keras.layers.Dense(1, activation = 'tanh') (lstm_h)
attention = tf.keras.layers.Flatten()(attention)
attention = tf.keras.layers.Activation('softmax') (attention)
attention = tf.keras.layers.RepeatVector (units*2) (attention)
attention = tf.keras.layers.Permute ([2,1]) (attention)

attention_weight = tf.keras.layers.Multiply()([lstm_h, attention])
attention_weight = tf.keras.layers.Lambda(lambda x: K.sum(x, axis = 1)) (attention_weight)

## output layer
output = tf.keras.layers.Dense(1, activation = 'sigmoid') (attention_weight)

## combine in one
model = tf.keras.Model(_input,output)

print(model.summary())
# Compile the model for training
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics = ['accuracy'])

# Training the model
history = model.fit (train_dataset,
                    epochs = 5, 
                    validation_data = test_dataset)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 text_vectorization (TextVector  (None, None)        0           ['input_9[0][0]']                
 ization)                                                                                         
                                                                                                  
 embedding_11 (Embedding)       (None, None, 64)     640000      ['text_vectorization[8][0]']     
                                                                                                  
 bidirectional_10 (Bidirectiona  (None, None, 128)   66048       ['embedding_11[0][0]']     

In [None]:
units=250
EMBEDDING_DIM=310
MAX_LENGTH_PER_SENTENCE=65
encoder_input = keras.Input(shape=(MAX_LENGTH_PER_SENTENCE))
x =layers.Embedding(input_dim=len(embedding_matrix), output_dim=EMBEDDING_DIM, input_length=MAX_LENGTH_PER_SENTENCE,
                              weights=[embedding_matrix],
                              trainable=False)(encoder_input)
                              
activations =layers.Bidirectional(tf.keras.layers.LSTM(units))(x)
activations = layers.Dropout(0.5)(activations)

attention=layers.Dense(1, activation='tanh')(activations)
attention=layers.Flatten()(attention)
attention=layers.Activation('softmax')(attention)
attention=layers.RepeatVector(units*2)(attention)
attention=layers.Permute((2, 1))(attention)

sent_representation = layers.Multiply()([activations, attention])
sent_representation = layers.Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(units*2,))(sent_representation)

sent_representation = layers.Dropout(0.5)(sent_representation)

probabilities = layers.Dense(4, activation='softmax')(sent_representation)


encoder = keras.Model(inputs=[encoder_input], outputs=[probabilities],name='encoder')
encoder.summary()

NameError: ignored

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras import backend as K

class attention(Layer):
    
    def __init__(self, return_sequences=True):
        self.return_sequences = return_sequences
        super(attention,self).__init__()
        
    def build(self, input_shape):
        
        self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                               initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                               initializer="zeros")
        
        super(attention,self).build(input_shape)
        
    def call(self, x):
        
        e = K.tanh(K.dot(x,self.W)+self.b)
        a = K.softmax(e, axis=1)
        output = x*a
        
        if self.return_sequences:
            return output
        
        return K.sum(output, axis=1)

max_len = 100
max_words = len(encoder.get_vocabulary())
emb_dim = 64
# n_sample = 5
# X = np.random.randint(0,max_words, (n_sample,max_len))
# Y = np.random.randint(0,2, n_sample)
model = tf.keras.Sequential([encoder,
                              tf.keras.layers.Embedding(
                                  input_dim = len(encoder.get_vocabulary()),
                                  output_dim = 64,
                                  mask_zero = True),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences = True)),
                              attention(return_sequences=True),
                              tf.keras.layers.Dense(1, activation = 'sigmoid')]) 
# model = Sequential()
# model.add(encoder)
# model.add(Embedding(max_words, emb_dim, input_length=max_len))
# model.add(Bidirectional(LSTM(32, return_sequences=True)))
# model.add(attention(return_sequences=True)) # receive 3D and output 3D
# model.add(LSTM(32))
# model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile('adam', 'binary_crossentropy')
train_dataset2 = list(train_dataset)
model.fit(tf.data.Dataset.from_tensor_slices(train_dataset2),
          epochs = 5, 
          validation_data = test_dataset)

#Compile the model for training
model.compile(loss = tf.keras.losses.BinaryCrossentropy(from_logits = False),
              optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics = ['accuracy'])

# Training the model
history = model.fit (train,
                    epochs = 5, 
                    validation_data = test)
# testing the model
### pred_label = tf.argmax(model.predict(test),1)
pred_label = (model.predict(test_dataset) > 0.5).astype("int32")
true_label = np.concatenate([y for x, y in test_dataset], axis=0)

test_loss, test_acc = model.evaluate (test_dataset)
print('Test acurracy: ', test_acc)
print('\nTesting performance:\n Loss: {:.3f} - Accuracy: {:.3f}'. format(test_loss, test_acc))
printing_eval_scores (true_label, pred_label, report=True)



ValueError: ignored

In [None]:
from keras.datasets import imdb
from keras.preprocessing import sequence
#from keras_self_attention import SeqSelfAttention, SeqWeightedAttention

max_features = 10000
maxlen = 500
batch_size = 32

# data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

x_train = sequence.pad_sequences(x_train, maxlen= maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

# model 
from keras import models
from keras import layers
from keras.layers import Dense, Embedding, LSTM, Bidirectional
model = models.Sequential()
# model.add( Embedding(max_features, 32,  mask_zero=True))
model.add( Embedding(max_features, 32))
model.add(Bidirectional( LSTM(32, return_sequences=True)))
# add an attention layer
# model.add(SeqSelfAttention(attention_activation='sigmoid'))
#model.add(SeqWeightedAttention())

model.add( Dense(1, activation='sigmoid') )

# compile and fit
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()

history = model.fit(train, epochs=5, batch_size=128, )#validation_split=0.2)

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_15 (Embedding)    (None, None, 32)          320000    
                                                                 
 bidirectional_15 (Bidirecti  (None, None, 64)         16640     
 onal)                                                           
                                                                 
 dense_21 (Dense)            (None, None, 1)           65        
                                                                 
Total params: 336,705
Trainable params: 336,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5


ValueError: ignored