<a href="https://colab.research.google.com/github/PRANAYRAJU07/training/blob/main/QA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

import os
import urllib.request
import tarfile
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import (
    Input, Embedding, Dense, Dropout, LayerNormalization,
    GlobalAveragePooling1D, LSTM, Concatenate
)
from tensorflow.keras.models import Model


url = "https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz"
archive = "babi_tasks.tar.gz"
data_dir = "babi_data"

if not os.path.exists(data_dir):
    urllib.request.urlretrieve(url, archive)
    with tarfile.open(archive, "r:gz") as tar:
        tar.extractall(data_dir)

train_file = f"{data_dir}/tasks_1-20_v1-2/en/qa1_single-supporting-fact_train.txt"
test_file  = f"{data_dir}/tasks_1-20_v1-2/en/qa1_single-supporting-fact_test.txt"


def parse_babi(filepath):
    stories, questions, answers = [], [], []
    story = []
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            _, text = line.split(" ", 1)
            if "\t" in text:
                q, a, _ = text.split("\t")
                stories.append(" ".join(story))
                questions.append(q)
                answers.append(a)
            else:
                story.append(text)
    return stories, questions, answers

train_stories, train_questions, train_answers = parse_babi(train_file)
test_stories, test_questions, test_answers = parse_babi(test_file)


tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(train_stories + train_questions + train_answers)

vocab_size = len(tokenizer.word_index) + 1
max_story_len = 200
max_question_len = 20

X_story = pad_sequences(tokenizer.texts_to_sequences(train_stories), maxlen=max_story_len)
X_question = pad_sequences(tokenizer.texts_to_sequences(train_questions), maxlen=max_question_len)

y = np.array([tokenizer.texts_to_sequences([a])[0][0] for a in train_answers])

X_story_test = pad_sequences(tokenizer.texts_to_sequences(test_stories), maxlen=max_story_len)
X_question_test = pad_sequences(tokenizer.texts_to_sequences(test_questions), maxlen=max_question_len)
y_test = np.array([tokenizer.texts_to_sequences([a])[0][0] for a in test_answers])


class TransformerEncoder(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim)
        ])
        self.norm1 = LayerNormalization()
        self.norm2 = LayerNormalization()
        self.drop1 = Dropout(0.1)
        self.drop2 = Dropout(0.1)

    def call(self, x):
        attn = self.att(x, x)
        x = self.norm1(x + self.drop1(attn))
        ffn = self.ffn(x)
        return self.norm2(x + self.drop2(ffn))


embed_dim = 64
num_heads = 4
ff_dim = 128

story_in = Input(shape=(max_story_len,))
ques_in = Input(shape=(max_question_len,))

embed = Embedding(vocab_size, embed_dim)

story_emb = embed(story_in)
ques_emb = embed(ques_in)

story_enc = TransformerEncoder(embed_dim, num_heads, ff_dim)(story_emb)

cross_attn = tf.keras.layers.MultiHeadAttention(
    num_heads=num_heads, key_dim=embed_dim
)(
    query=ques_emb, value=story_enc, key=story_enc
)

pooled = GlobalAveragePooling1D()(cross_attn)
out = Dense(vocab_size, activation="softmax")(pooled)

transformer_model = Model([story_in, ques_in], out)

transformer_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

print("\nTraining Transformer QA Model")
transformer_model.fit(
    [X_story, X_question],
    y,
    epochs=10,
    batch_size=32,
    validation_split=0.1
)

print("\nEvaluating Transformer QA Model")
_, acc_tr = transformer_model.evaluate(
    [X_story_test, X_question_test],
    y_test
)


story_lstm = LSTM(64)(story_emb)
ques_lstm = LSTM(64)(ques_emb)

merged = Concatenate()([story_lstm, ques_lstm])
out_lstm = Dense(vocab_size, activation="softmax")(merged)

lstm_model = Model([story_in, ques_in], out_lstm)

lstm_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

print("\nTraining LSTM QA Model")
lstm_model.fit(
    [X_story, X_question],
    y,
    epochs=10,
    batch_size=32,
    validation_split=0.1
)

print("\nEvaluating LSTM QA Model")
_, acc_lstm = lstm_model.evaluate(
    [X_story_test, X_question_test],
    y_test
)


print("\nFINAL RESULTS")
print(f"Transformer Accuracy: {acc_tr:.4f}")
print(f"LSTM Accuracy       : {acc_lstm:.4f}")



Training Transformer QA Model
Epoch 1/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 481ms/step - accuracy: 0.1811 - loss: 2.2010 - val_accuracy: 0.2400 - val_loss: 1.8029
Epoch 2/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 469ms/step - accuracy: 0.1690 - loss: 1.8437 - val_accuracy: 0.0700 - val_loss: 1.8691
Epoch 3/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 474ms/step - accuracy: 0.1706 - loss: 1.8119 - val_accuracy: 0.0700 - val_loss: 1.9670
Epoch 4/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 484ms/step - accuracy: 0.1573 - loss: 1.8681 - val_accuracy: 0.2400 - val_loss: 1.7980
Epoch 5/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 488ms/step - accuracy: 0.1681 - loss: 1.8255 - val_accuracy: 0.2000 - val_loss: 1.7521
Epoch 6/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 461ms/step - accuracy: 0.1804 - loss: 1.8444 - val_accuracy: 0.0700 - val_loss