In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load the training data from a CSV file
data = pd.read_csv(r"C:\Users\85job\Downloads\Conversation.csv") 
input_texts = data['question'].tolist()
response_texts = data['answer'].tolist()

# Tokenize the input and response texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(input_texts + response_texts)
input_sequences = tokenizer.texts_to_sequences(input_texts)
response_sequences = tokenizer.texts_to_sequences(response_texts)

# Pad the sequences
max_sequence_length = max(max(len(seq) for seq in input_sequences), max(len(seq) for seq in response_sequences))
input_sequences_padded = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='post')
response_sequences_padded = pad_sequences(response_sequences, maxlen=max_sequence_length, padding='post')

# Convert responses to one-hot encoding
response_sequences_one_hot = np.zeros((len(response_sequences_padded), max_sequence_length, len(tokenizer.word_index) + 1))
for i, seq in enumerate(response_sequences_padded):
    for j, word_index in enumerate(seq):
        response_sequences_one_hot[i, j, word_index] = 1

# Build the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length))
model.add(LSTM(64, return_sequences=True))
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(input_sequences_padded, response_sequences_one_hot, epochs=5, batch_size=32)

# Save the tokenizer and model
import pickle
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
model.save('chatbot_model.keras')  # Save the model in the native Keras format




Epoch 1/5
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 29ms/step - accuracy: 0.6223 - loss: 5.6568
Epoch 2/5
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.6542 - loss: 2.6859
Epoch 3/5
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.6556 - loss: 2.5517
Epoch 4/5
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.6596 - loss: 2.4255
Epoch 5/5
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.6580 - loss: 2.3837


In [3]:
loss, accuracy = model.evaluate(input_sequences_padded, response_sequences_one_hot)
print(f'Accuracy: {accuracy * 100:.2f}%')


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.6632 - loss: 2.2890
Accuracy: 66.01%
