<a href="https://colab.research.google.com/github/Jhansipothabattula/Machine_Learning/blob/main/Day108.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Transformers

**Transformers**

Transformers are deep learning architectures designed for handling sequential data without relying on recurrence, which is commonly used in RNNs. Instead, Transformers use a mechanism called self-attention to process all tokens in the sequence simultaneously, capturing dependencies between tokens regardless of their distance in the sequence. Transformers have become the foundation of many NLP tasks and models, including BERT and GPT

In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

# Load and preprocess the IMDB dataset
max_features = 10000  # Vocabulary size
max_len = 200        # Limit review length to 200 words

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

# Define a Transformer block
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Define the model with an embedding layer, transformer block, and output layer
embed_dim = 32
num_heads = 2
ff_dim = 32

inputs = layers.Input(shape=(max_len,))
embedding_layer = layers.Embedding(input_dim=max_features, output_dim=embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x, training=True) # Explicitly pass `training=True`
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile and train the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=32, epochs=5, validation_split=0.2)

# Evaluation of the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Accuracy: \n", test_acc)


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 66ms/step - accuracy: 0.6791 - loss: 0.5611 - val_accuracy: 0.8706 - val_loss: 0.2970
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 66ms/step - accuracy: 0.9127 - loss: 0.2228 - val_accuracy: 0.8830 - val_loss: 0.2881
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 65ms/step - accuracy: 0.9476 - loss: 0.1529 - val_accuracy: 0.8736 - val_loss: 0.3515
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 64ms/step - accuracy: 0.9656 - loss: 0.1023 - val_accuracy: 0.8654 - val_loss: 0.4241
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 66ms/step - accuracy: 0.9772 - loss: 0.0726 - val_accuracy: 0.8590 - val_loss: 0.4995
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - accuracy: 0.8441 - loss: 0.5474
Test Accuracy: 
 0.8406000137329102
