In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [2]:
# Sample data
data = [
    {"context": "TensorFlow is an open-source platform for machine learning.", "question": "What is TensorFlow?", "answer": "an open-source platform for machine learning"},
    {"context": "Python is a programming language.", "question": "What is Python?", "answer": "a programming language"},
]

In [3]:

# Prepare dataset
contexts = [item["context"] for item in data]
questions = [item["question"] for item in data]
answers = [item["answer"] for item in data]


In [4]:
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(contexts + questions + answers)

context_sequences = tokenizer.texts_to_sequences(contexts)
question_sequences = tokenizer.texts_to_sequences(questions)

In [5]:

# Pad sequences
max_length = max(max(len(seq) for seq in context_sequences), max(len(seq) for seq in question_sequences))
context_padded = pad_sequences(context_sequences, maxlen=max_length)
question_padded = pad_sequences(question_sequences, maxlen=max_length)

In [6]:
# Prepare answer labels as start and end indices
start_labels = []
end_labels = []

In [7]:
for i, answer in enumerate(answers):
    context = contexts[i]
    start_index = context.find(answer)
    end_index = start_index + len(answer) - 1

    # Convert to token indices
    start_token_index = tokenizer.texts_to_sequences([context[:start_index]])[0][-1]
    end_token_index = tokenizer.texts_to_sequences([context[:end_index + 1]])[0][-1]

    start_labels.append(start_token_index)
    end_labels.append(end_token_index)

start_labels = np.array(start_labels)
end_labels = np.array(end_labels)

In [8]:
# Model Definition
input_context = layers.Input(shape=(max_length,))
input_question = layers.Input(shape=(max_length,))

embedding_layer = layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)

context_embedding = embedding_layer(input_context)
question_embedding = embedding_layer(input_question)

In [9]:
# Merge context and question
merged = layers.concatenate([context_embedding, question_embedding])
x = layers.LSTM(128)(merged)
x = layers.Dense(64, activation='relu')(x)

In [10]:
# Output layers for start and end positions
start_output = layers.Dense(len(tokenizer.word_index) + 1, activation='softmax', name='start')(x)
end_output = layers.Dense(len(tokenizer.word_index) + 1, activation='softmax', name='end')(x)

model = models.Model(inputs=[input_context, input_question], outputs=[start_output, end_output])

In [11]:
# Compile the model with metrics for both outputs
model.compile(optimizer='adam', 
              loss={'start': 'sparse_categorical_crossentropy', 'end': 'sparse_categorical_crossentropy'},
              metrics={'start': 'accuracy', 'end': 'accuracy'})

In [12]:

# Train the model
model.fit([context_padded, question_padded], [start_labels, end_labels], epochs=10, batch_size=2)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - end_accuracy: 0.0000e+00 - loss: 5.4062 - start_accuracy: 0.0000e+00
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - end_accuracy: 1.0000 - loss: 5.3348 - start_accuracy: 0.5000
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - end_accuracy: 0.5000 - loss: 5.2646 - start_accuracy: 1.0000
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - end_accuracy: 0.5000 - loss: 5.1788 - start_accuracy: 1.0000
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - end_accuracy: 0.5000 - loss: 5.0705 - start_accuracy: 1.0000
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - end_accuracy: 0.5000 - loss: 4.9318 - start_accuracy: 1.0000
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - end_accuracy: 0.5000 - loss: 4.7

<keras.src.callbacks.history.History at 0x1fcdbd77ca0>

In [18]:
model.save('owngpt.h5')



In [15]:
# Function to answer questions
def answer_question(context, question):
    context_seq = tokenizer.texts_to_sequences([context])
    question_seq = tokenizer.texts_to_sequences([question])
    
    context_padded = pad_sequences(context_seq, maxlen=max_length)
    question_padded = pad_sequences(question_seq, maxlen=max_length)

    start_pred, end_pred = model.predict([context_padded, question_padded])
    
    start_index = np.argmax(start_pred, axis=1)[0]
    end_index = np.argmax(end_pred, axis=1)[0]
     # Get answer from context based on predicted indices
    answer_tokens = context_padded[0][start_index:end_index + 1]
    answer = tokenizer.sequences_to_texts([answer_tokens])[0]

    return answer.strip()

In [16]:

# Example usage
context = "TensorFlow is an open-source platform for machine learning."
question = "What is TensorFlow?"

answer = answer_question(context, question)
print("Answer:", answer)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
Answer: is an open source platform for machine learning


In [17]:
# Example usage
context = "TensorFlow is a programming language."
question = "What is TensorFlow?"

answer = answer_question(context, question)
print("Answer:", answer)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Answer: tensorflow is a programming language
