In [46]:
import json 
import numpy as np 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping


In [47]:
with open('chatbot_data.json') as file:
    data = json.load(file)
    
training_sentences = []
training_labels = []
labels = []
responses = []


for intent in data['intents']:
    for pattern in intent['patterns']:
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    responses.append(intent['responses'])
    
    if intent['tag'] not in labels:
        labels.append(intent['tag'])
        
num_classes = len(labels)

In [48]:
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)

In [49]:
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)

In [50]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(32, activation='relu', kernel_regularizer='l2'))  # Regularización L2
model.add(Dropout(0.3))  # Dropout
model.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model.add(Dropout(0.3))  # Dropout
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'])

model.summary()
epochs = 500
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(
    padded_sequences, 
    np.array(training_labels), 
    validation_split=0.2,  
    epochs=epochs, 
    callbacks=[early_stop]
)



Epoch 1/500
5/5 ━━━━━━━━━━━━━━━━━━━━ 10s 3s/step - accuracy: 0.0312 - loss: 4.07 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step - accuracy: 0.0338 - loss: 4.073 ━━━━━━━━━━━━━━━━━━━━ 3s 156ms/step - accuracy: 0.0342 - loss: 4.0727 - val_accuracy: 0.0000e+00 - val_loss: 4.0677
Epoch 2/500
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - accuracy: 0.0312 - loss: 4.052 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step - accuracy: 0.0312 - loss: 4.046 ━━━━━━━━━━━━━━━━━━━━ 0s 52ms/step - accuracy: 0.0329 - loss: 4.0447 - val_accuracy: 0.0000e+00 - val_loss: 4.0540
Epoch 3/500
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 53ms/step - accuracy: 0.0312 - loss: 4.027 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step - accuracy: 0.0392 - loss: 4.0187 - val_accuracy: 0.0000e+00 - val_loss: 4.0404
Epoch 4/500
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step - accuracy: 0.0312 - loss: 4.001 ━━━━━━━━━━━━━━━━━━━━ 0s 31ms/step - accuracy: 0.0408 - loss: 3.9929 - val_accuracy: 0.0000e+00 - val_loss: 4.0287
Epoch 5/500
5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 38ms/step - accuracy: 0.0312 - loss: 3.

In [51]:
# to save the trained model
model.save("chat_model.h5")

import pickle

# to save the fitted tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# to save the fitted label encoder
with open('label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)



In [52]:
import json 
import numpy as np
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder

import colorama 
colorama.init()
from colorama import Fore, Style, Back

import random
import pickle

with open("chatbot_data.json") as file:
    data = json.load(file)


def chat():
    # load trained model
    model = keras.models.load_model('chat_model.h5')

    # load tokenizer object
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

    # load label encoder object
    with open('label_encoder.pickle', 'rb') as enc:
        lbl_encoder = pickle.load(enc)

    # parameters
    max_len = 20

    print(Fore.YELLOW + "Start messaging with the bot (type quit to stop)!" + Style.RESET_ALL)
    while True:
        print(Fore.LIGHTBLUE_EX + "User: " + Style.RESET_ALL, end="")
        inp = input()
        if inp.lower() == "quit":
            break

        result = model.predict(keras.preprocessing.sequence.pad_sequences(tokenizer.texts_to_sequences([inp]),
                                             truncating='post', maxlen=max_len))
        tag = lbl_encoder.inverse_transform([np.argmax(result)])

        for intent in data['intents']:
            if intent['tag'] == tag[0]:
                print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL, random.choice(intent['responses']))
                break
        # print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL,random.choice(responses))

chat()



Start messaging with the bot (type quit to stop)!
User: 