In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd

# Load dataset
df = pd.read_csv('nrmal.csv')
questions = df['question'].tolist()
answers = df['answer'].tolist()

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers)

# Convert texts to sequences
question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

# Pad sequences
max_len = max(max(len(seq) for seq in question_sequences), max(len(seq) for seq in answer_sequences))
question_padded = pad_sequences(question_sequences, maxlen=max_len, padding='post')
answer_padded = pad_sequences(answer_sequences, maxlen=max_len, padding='post')

# Prepare input and output
X = question_padded
y = answer_padded

# Model parameters
vocab_size = len(tokenizer.word_index) + 1  # Adding 1 because of reserved 0 index
embedding_dim = 128
num_heads = 8
ff_dim = 512  # Feed-forward network dimension

# Define the Functional API model
# Inputs to the model
inputs = tf.keras.Input(shape=(max_len,))

# Embedding layer
embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim)(inputs)

# Multi-head attention requires query, value, and key
# Here, we use the same input for all three (self-attention)
attn_output = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim)(embedding_layer, embedding_layer)

# Feed-forward network after attention
ff_output = tf.keras.layers.Dense(embedding_dim, activation='relu')(attn_output)

# Layer normalization
norm_output = tf.keras.layers.LayerNormalization()(ff_output)

# Output layer for final prediction
outputs = tf.keras.layers.Dense(vocab_size, activation='softmax')(norm_output)

# Create the model
model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, np.expand_dims(y, -1), epochs=200, verbose=1)

# Function to predict the answer based on the question
def predict_answer(question):
    question_seq = tokenizer.texts_to_sequences([question])
    question_padded = pad_sequences(question_seq, maxlen=max_len, padding='post')
    
    prediction = model.predict(question_padded)
    
    # Get the indices of the predicted words
    predicted_word_indices = np.argmax(prediction[0], axis=-1)
    
    # Convert word indices back to words
    predicted_words = [tokenizer.index_word[i] for i in predicted_word_indices if i > 0]
    
    return ' '.join(predicted_words)

# Test the model with a sample question
test_question = "How are you?"
response = predict_answer(test_question)
print(f"Response: {response}")


Epoch 1/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0000e+00 - loss: 3.3921
Epoch 2/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.7300 - loss: 2.3308
Epoch 3/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.7300 - loss: 1.4587
Epoch 4/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.7300 - loss: 1.5893
Epoch 5/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.7300 - loss: 1.6504
Epoch 6/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.7300 - loss: 1.5313
Epoch 7/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.7300 - loss: 1.3502
Epoch 8/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.7300 - loss: 1.2714
Epoch 9/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [7]:
model.save("models/transformer.h5")



In [6]:
# Test the model with a sample question
test_question = "which is your favourit colour"
response = predict_answer(test_question)
print(f"Response: {response}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Response: blue
