<a href="https://colab.research.google.com/github/Nanditharangu/NLP_model_comparison/blob/main/Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Dependencies


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers 

# Download IMDB movie review dataset

In [None]:
num_words = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
embedding_dim = 128 

(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=num_words)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

# Conv1D model

In [None]:
inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(num_words, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid", name="outputs")(x)
model1 = keras.Model(inputs, outputs)

# Bidirectional LSTM model

In [None]:
inputs = keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(num_words, embedding_dim)(inputs)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(64))(x)
outputs = layers.Dense(1, activation="sigmoid", name="outputs")(x)
model2 = keras.Model(inputs, outputs)

# Defining TransfomerBlock Layer & Embedding Layers for Transformer model

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embedding_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embedding_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, num_words, embedding_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=num_words, output_dim=embedding_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embedding_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

# Transformer model

In [None]:
num_heads = 2  # No. of attention heads
ff_dim = 32  # Size of hidden layer in feed forward network

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, num_words, embedding_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embedding_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)

outputs = layers.Dense(1, activation="sigmoid", name="outputs")(x)
model3 = keras.Model(inputs, outputs)

# Train the models

In [None]:
epochs=3
batch_size=32
optimizer= "adam"
metrics=["accuracy"]
loss= "binary_crossentropy"

for i in [model1, model2, model3]:
  print (i.summary())
  i.compile(optimizer= optimizer, loss=loss, metrics= metrics)
  i.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_val, y_val))