In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import random
import pickle
import matplotlib.pyplot as plt

# Load JSON data
with open('benh.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Prepare data
intents = data['intents']
words = []
classes = []
documents = []
ignore_words = ['?', '!']

for intent in intents:
    for pattern in intent['patterns']:
        # Tokenize each word
        word_list = pattern.lower().split()
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Remove duplicates and ignore words
words = [word for word in sorted(set(words)) if word not in ignore_words]
classes = sorted(set(classes))

# Save words and classes for later use
with open('words.pkl', 'wb') as f:
    pickle.dump(words, f)
with open('classes.pkl', 'wb') as f:
    pickle.dump(classes, f)

# Create training data
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    pattern_words = doc[0]
    for word in words:
        bag.append(1 if word in pattern_words else 0)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

# Shuffle and split training data
random.shuffle(training)
training = np.array(training, dtype=object)
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

# Build model
vocab_size = len(words)
max_len = len(max(documents, key=lambda x: len(x[0]))[0])
embedding_dim = 128

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_len),
    LSTM(128, return_sequences=True),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(len(classes), activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts([' '.join(doc[0]) for doc in documents])
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

# Train model and capture history
train_x_padded = pad_sequences(tokenizer.texts_to_sequences([' '.join(doc[0]) for doc in documents]), maxlen=max_len)
history = model.fit(train_x_padded, train_y, epochs=200, batch_size=5, verbose=1)
model.save('model.pkl')

# Display training results
print("\nKết quả huấn luyện:")
print(f"Loss cuối cùng: {history.history['loss'][-1]:.4f}")
print(f"Accuracy cuối cùng: {history.history['accuracy'][-1]:.4f}")

# Plot training loss and accuracy
plt.figure(figsize=(12, 4))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('training_plot.png')
print("Biểu đồ huấn luyện đã được lưu vào 'training_plot.png'")

# Chatbot functions
def clean_up_sentence(sentence):
    sentence_words = sentence.lower().split()
    return sentence_words

def bow(sentence, words, tokenizer, max_len):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    padded = pad_sequences(tokenizer.texts_to_sequences([' '.join(sentence_words)]), maxlen=max_len)
    return np.array(padded)

def predict_class(sentence, model, tokenizer, words, max_len):
    bow_input = bow(sentence, words, tokenizer, max_len)
    output = model.predict(bow_input, verbose=0)[0]
    predicted_class = classes[np.argmax(output)]
    max_prob = np.max(output)
    return predicted_class, max_prob

def get_response(predicted_class, max_prob, intents_json):
    for intent in intents_json['intents']:
        if intent['tag'] == predicted_class:
            disease = intent.get('disease', 'Không xác định')
            prob = intent.get('probability', 0.5)
            response = random.choice(intent['responses'])
            return f"{response} (Khả năng mắc {disease}: {max_prob*prob*100:.1f}%)"
    return "Tôi không thể xác định bệnh từ triệu chứng bạn cung cấp. Hãy mô tả chi tiết hơn hoặc đi khám bác sĩ."

# Chatbot loop
print("\nChatbot đã sẵn sàng! Mô tả triệu chứng của bạn hoặc nhập 'quit' để thoát.")
while True:
    user_input = input("Bạn: ")
    if user_input.lower() == 'quit':
        break
    predicted_class, max_prob = predict_class(user_input, model, tokenizer, words, max_len)
    response = get_response(predicted_class, max_prob, data)
    print(f"Chatbot: {response}")

Epoch 1/200




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.0000e+00 - loss: 1.7980
Epoch 2/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3133 - loss: 1.7876
Epoch 3/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.3467 - loss: 1.7820
Epoch 4/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2800 - loss: 1.7765
Epoch 5/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4267 - loss: 1.7647 
Epoch 6/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4533 - loss: 1.7606 
Epoch 7/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4733 - loss: 1.7412 
Epoch 8/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4333 - loss: 1.7162
Epoch 9/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 




Kết quả huấn luyện:
Loss cuối cùng: 0.0014
Accuracy cuối cùng: 1.0000
Biểu đồ huấn luyện đã được lưu vào 'training_plot.png'

Chatbot đã sẵn sàng! Mô tả triệu chứng của bạn hoặc nhập 'quit' để thoát.
