In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Sample data
conversations = [
    ["Hi there!", "Hello! How can I help you?"],
    ["What services do you offer?", "We offer a variety of services including..."],
    ["Can you help me?", "Of course! What do you need help with?"]
]

# Flatten and tokenize
input_texts, response_texts = zip(*conversations)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(input_texts + response_texts)
input_sequences = tokenizer.texts_to_sequences(input_texts)
response_sequences = tokenizer.texts_to_sequences(response_texts)

# Pad sequences
max_len = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='post')
response_sequences = pad_sequences(response_sequences, maxlen=max_len, padding='post')

# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1


In [6]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.LSTM(64, return_sequences=False),  # Remove return_sequences to get single output
    tf.keras.layers.Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()



In [7]:
# Split sequences for training
X_train = input_sequences
#y_train = np.array([seq[1:] + [0] for seq in response_sequences])
# Prepare target sequence to predict only the next word
y_train = np.array([seq[1] for seq in response_sequences])  # Only the next word for each sequence

# Train
history = model.fit(X_train, y_train, epochs=50, batch_size=32)



Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0000e+00 - loss: 3.0907
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 1.0000 - loss: 3.0752
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 1.0000 - loss: 3.0592
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.6667 - loss: 3.0420
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.6667 - loss: 3.0229
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.6667 - loss: 3.0013
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.3333 - loss: 2.9767
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3333 - loss: 2.9482
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [9]:
def generate_response(input_text):
    seq = tokenizer.texts_to_sequences([input_text])
    seq = pad_sequences(seq, maxlen=max_len, padding='post')
    pred_seq = model.predict(seq)
    pred_word_indices = np.argmax(pred_seq, axis=-1)
    response = ' '.join(tokenizer.index_word.get(idx, '') for idx in pred_word_indices)
    return response

# Test response
input_text = "Hi there!"
response = generate_response(input_text)
print("Bot:", response)

# Test response
input_text = "Can you help me?"
response = generate_response(input_text)
print("Bot:", response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Bot: can
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Bot: you
