In [29]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [30]:
# Load dataset
df = pd.read_csv("chatbot_data.csv")

# Convert text columns to lists
questions = df["question"].astype(str).tolist()
answers = df["answer"].astype(str).tolist()

In [31]:

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers)
vocab_size = len(tokenizer.word_index) + 1

In [32]:
# Convert text to sequences
question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

In [49]:
# Padding sequences
max_length = max(len(seq) for seq in question_sequences)
X_train = pad_sequences(question_sequences, maxlen=max_length, padding="post")
y_train = pad_sequences(answer_sequences, maxlen=max_length, padding="post")


In [50]:
# Define RNN Model (Simple RNN instead of LSTM)
model = Sequential([
    Embedding(vocab_size, 128,input_length=max_length),
    SimpleRNN(vocab_size, return_sequences=True),
    Dense(vocab_size, activation="softmax")
])

# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])


In [51]:

# Convert y_train to NumPy array
y_train = np.array(y_train)

# Train the model
model.fit(X_train, y_train, epochs=100, verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0200 - loss: 4.6357
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.0400 - loss: 4.6058
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.1300 - loss: 4.5753
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.2000 - loss: 4.5438
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.2200 - loss: 4.5107
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.2900 - loss: 4.4754
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.3500 - loss: 4.4375
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.3600 - loss: 4.3965
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x2989c020610>

In [58]:
model.save('chatbot_model.keras')


In [None]:
import numpy as np

# Function to predict the answer based on the question
def predict_answer(question):
    # Convert the question into a sequence of integers
    question_sequence = tokenizer.texts_to_sequences([question])
    
    # Pad the sequence to match the input length expected by the model
    padded_question = pad_sequences(question_sequence, maxlen=max_length, padding='post')
    
    # Get the model's prediction (probabilities for each word in the vocabulary)
    prediction = model.predict(padded_question)
    
    # Get the index of the predicted word with the highest probability
    predicted_word_index = np.argmax(prediction, axis=-1)[0]
    
    # Convert the predicted word index back to the actual word
    predicted_words = [tokenizer.index_word[i] for i in predicted_word_index if i > 0]
    
    # Return the predicted words as a string
    return ' '.join(predicted_words)

# Test the function with a sample question
question = "What's your name?"
answer = predict_answer(question)
print(f"Answer: {answer}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Answer: i am a chatbot


In [53]:

# Start chatbot loop
print("Chatbot: Hello! Type 'exit' to stop.")
while True:
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print("Chatbot: Goodbye!")
        break
    response = predict_answer(user_input)
    print(f"Chatbot: {response}")

Chatbot: Hello! Type 'exit' to stop.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Chatbot: hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Chatbot: hi there
Chatbot: Goodbye!
