Data preprocess:

In [1]:
import json
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout, LayerNormalization, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Load intents file
with open('intents.json') as file:
    data = json.load(file)

preprocess:


In [2]:

# Prepare data for training
training_sentences = []
training_labels = []
labels = []
responses = {}

for intent in data['intents']:
    for pattern in intent['patterns']:
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    responses[intent['tag']] = intent['responses']
    if intent['tag'] not in labels:
        labels.append(intent['tag'])

num_classes = len(labels)

# Tokenization and sequence padding
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(training_sentences)
max_len = max([len(sentence.split()) for sentence in training_sentences])
vocab_size = len(tokenizer.word_index) + 1

sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

# One-hot encode labels
label_index = dict((label, idx) for idx, label in enumerate(labels))
training_labels = np.array([label_index[label] for label in training_labels])
categorical_labels = to_categorical(training_labels, num_classes=num_classes)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, categorical_labels, test_size=0.2, random_state=42)


build and train model:

In [None]:
from keras.models import Sequential
from keras.layers import Input, Embedding, LSTM, Dense, Dropout, Bidirectional, LayerNormalization
from keras.regularizers import l2
from keras.callbacks import EarlyStopping

# Define the model
model = Sequential([
    Input(shape=(max_len,)),
    Embedding(input_dim=vocab_size, output_dim=100, mask_zero=True),
    Bidirectional(LSTM(64, return_sequences=True)),
    LayerNormalization(),
    LSTM(128),
    Dense(128, activation="relu"),
    LayerNormalization(),
    Dropout(0.2),
    Dense(128, activation="relu"),
    LayerNormalization(),
    Dropout(0.2),
    Dense(num_classes, activation="softmax")
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=21, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


comment ว่าอะไรเป็นอะไร


evaluation:

In [6]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Validation accuracy: {accuracy*100:.2f}%")




Validation accuracy: 38.30%


In [7]:

# Save the model
model.save('chatbot_model.h5')


  saving_api.save_model(


In [8]:
import tensorflow as tf

# Load the Keras model
model = tf.keras.models.load_model('chatbot_model.h5')

# Configure the converter to use TensorFlow operations (SELECT_TF_OPS)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # Enable TensorFlow Lite ops.
    tf.lite.OpsSet.SELECT_TF_OPS  # Enable TensorFlow ops.
]

# (Optional) Disable the experimental lowering of tensor list ops
converter._experimental_lower_tensor_list_ops = False

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model to file
with open('chat_model.tflite', 'wb') as f:
    f.write(tflite_model)

print("TFLite model has been converted and saved.")

# Save tokenizer and label index
with open('tokenizer.json', 'w') as f:
    json.dump(tokenizer.to_json(), f)
with open('label_index.json', 'w') as f:
    json.dump(label_index, f)

TFLite model has been converted and saved.


In [9]:
# Assuming `responses` is already defined
with open('responses.json', 'w') as responses_file:
    json.dump(responses, responses_file)

In [10]:
max_len = max([len(sentence.split()) for sentence in training_sentences])  # Example calculation

with open('config.json', 'w') as config_file:
    json.dump({'max_len': max_len}, config_file)

In [11]:
# Chatbot function
def chatbot_response(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len, padding='post')
    prediction = model.predict(padded)[0]
    tag = labels[np.argmax(prediction)]
    return np.random.choice(responses[tag])

# Example of interacting with the chatbot
print(chatbot_response("Hi"))

Hi there. What brings you here today?
