In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import numpy as np
# Example dataset
conversations = [
    ("Hello", "Hi there!"),
    ("What's your name?", "I'm your friendly Amazon bot."),
    ("May I know your name?", "I'm here to help you out."),
    ("How are you?", "I'm doing well, thank you."),
    ("Tell me a joke", "Why did the scarecrow win an award? Because he was outstanding in his field!"),
    ("What's the weather like today?", "I'm sorry, I don't have access to real-time data."),
    ("Exit", "Goodbye!"),
    # Add more dialogue pairs...
]

# Extract prompts and responses from conversations
prompts, responses = zip(*conversations)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(prompts + responses)

total_words = len(tokenizer.word_index) + 1

# Convert text to sequences
prompts_sequences = tokenizer.texts_to_sequences(prompts)
responses_sequences = tokenizer.texts_to_sequences(responses)

# Find the maximum sequence length among prompts and responses
max_sequence_length = max(max(len(seq) for seq in prompts_sequences), max(len(seq) for seq in responses_sequences))

# Pad sequences for consistent input length using the calculated maximum length
prompts_padded = pad_sequences(prompts_sequences, padding='post', maxlen=max_sequence_length)
responses_padded = pad_sequences(responses_sequences, padding='post', maxlen=max_sequence_length)

# One-hot encode the responses
responses_one_hot = to_categorical(responses_padded, num_classes=total_words)

# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50))
model.add(LSTM(units=64, return_sequences=True))
model.add(Dense(units=len(tokenizer.word_index) + 1, activation='softmax'))

# Compile the model with categorical_crossentropy
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model on the provided data
model.fit(prompts_padded, responses_one_hot, epochs=50, batch_size=1)



Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.2050 - loss: 3.9692   
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6838 - loss: 3.8328
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5672 - loss: 3.5359
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.4165 - loss: 3.0474    
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7031 - loss: 1.8222
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4083 - loss: 2.8670    
Epoch 7/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5976 - loss: 1.9967
Epoch 8/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.6323 - loss: 1.7563
Epoch 9/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x1d81ff00d90>

In [None]:
def generate_response(prompt, model, tokenizer):
    prompt_sequence = tokenizer.texts_to_sequences([prompt])
    prompt_padded = pad_sequences(prompt_sequence, maxlen=max_sequence_length)
    predicted_sequence = model.predict(prompt_padded)[0]
    predicted_word_index = np.argmax(predicted_sequence, axis=-1)
    predicted_word = tokenizer.index_word.get(predicted_word_index, 'UNKNOWN')
    return predicted_word

user_name = input("May I know your name: ")
print(f"Hello, {user_name}! How can I assist you today?")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        break

    # Update conversations with new interactions
    conversations.append((user_input, generate_response(user_input, model, tokenizer)))

    # Retrain the model with updated data
    prompts, responses = zip(*conversations)
    prompts_sequences = tokenizer.texts_to_sequences(prompts)
    responses_sequences = tokenizer.texts_to_sequences(responses)
    prompts_padded = pad_sequences(prompts_sequences, maxlen=max_sequence_length)
    responses_padded = pad_sequences(responses_sequences, maxlen=max_sequence_length)

    model.fit(prompts_padded, responses_padded, epochs=1, batch_size=1)

    # Generate a response
    bot_response = generate_response(user_input, model, tokenizer)
    print(f"Bot: {bot_response}")
