In [54]:
import json
import nltk
import string
import numpy as np
import pickle
from keras.src.models import Sequential
from keras.src.layers import Dense, Dropout, Input

np.set_printoptions(suppress=True)

In [6]:
with open("data/intents.json") as f:
    intents = json.load(f)["intents"]

In [7]:
unique_words = set()
unique_classes = set()
document = []
lemmatizer = nltk.WordNetLemmatizer()
ignore_words = string.punctuation
ignore_words

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [8]:
for intent in intents:
    for pattern in intent["patterns"]:
        word_list = nltk.word_tokenize(pattern)
        word_list = [lemmatizer.lemmatize(word.lower()) for word in word_list if word not in ignore_words]
        unique_words.update(word_list)
        unique_classes.add(intent["tag"])
        document.append((word_list, intent["tag"]))

In [9]:
unique_words = sorted(unique_words)
unique_classes = sorted(unique_classes)

In [55]:
with open("data/words.pkl", "wb") as f:
    pickle.dump(unique_words, f)

with open("data/classes.pkl", "wb") as f:
    pickle.dump(unique_classes, f)

In [10]:
trainX = []
trainY = []

In [11]:
for pair in document:
    features = [0] * len(unique_words)
    for word in pair[0]:
        features[unique_words.index(word)] = 1
    trainX.append(np.array(features))
    
    labels = [0] * len(unique_classes)
    labels[unique_classes.index(pair[1])] = 1
    trainY.append(np.array(labels))

In [16]:
model = Sequential([
    Input(shape=(len(trainX[0]),)),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(64, activation="relu"),
    Dropout(0.5),
    Dense(len(trainY[0]), activation="softmax")
])
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()

In [17]:
model.fit(np.array(trainX), np.array(trainY), epochs=200, batch_size=5)

Epoch 1/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.0955 - loss: 2.1637  
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0821 - loss: 2.1135    
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0861 - loss: 2.1905    
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1452 - loss: 1.9987    
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.3585 - loss: 1.8813
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2957 - loss: 1.9042
Epoch 7/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2752 - loss: 1.9292
Epoch 8/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2771 - loss: 1.8879
Epoch 9/200
[1m7/7[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2d11e273b30>

In [19]:
def input_parser(word: str):
    word_list = nltk.word_tokenize(word)
    word_list = [lemmatizer.lemmatize(word.lower()) for word in word_list if word not in ignore_words]
    features = [0] * len(unique_words)
    for word in word_list:
        if word in unique_words:
            features[unique_words.index(word)] = 1
    return np.array([features])

In [53]:
while True:
    user_input = input("You: ")
    prediction = model.predict(input_parser(user_input))
    print(f"Prediction probability: {np.max(prediction)}")
    class_index = np.argmax(prediction)
    tag = unique_classes[class_index]
    print(f"Predicted class: {tag}")
    for intent in intents:
        if intent["tag"] == tag:
            responses = intent["responses"]
            print(f"Bot: {np.random.choice(responses)}")
            break
    if tag == "goodbye":
        break

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
Prediction probability: 0.9999974966049194
Predicted class: greeting
Bot: Hi there! What can I help you with?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Prediction probability: 0.997001588344574
Predicted class: goodbye
Bot: See you soon! Keep learning!
