In [16]:
import nltk, json, pickle, numpy as np, random, string, unicodedata
from nltk.stem import WordNetLemmatizer
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)

True

In [None]:
lemmatizer = WordNetLemmatizer()
with open("./intents.json", 'r', encoding='utf-8') as f:
    data = json.load(f)

words, classes, documents = [], [], []
def should_keep_word(word):
    # Remove if it's purely punctuation/whitespace
    if all(unicodedata.category(char).startswith('P') for char in word):
        return False
    # Remove if it's purely symbols
    if all(unicodedata.category(char).startswith('S') for char in word):
        return False
    return True
# ignore_chars = string.punctuation

for intent in data['intents']:
    tag = intent['tag']
    if tag not in classes:
        classes.append(tag)
    for pattern in intent['patterns']:
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        documents.append((tokens, tag))

cleaned_words = []
for w in words:
    if should_keep_word(w):
        w_cleaned = lemmatizer.lemmatize(w.lower())
        cleaned_words.append(w_cleaned)

words = sorted(set(cleaned_words))
classes = sorted(set(classes))

pickle.dump(words, open('pickle_files/words.pkl', 'wb'))
pickle.dump(classes, open('pickle_files/classes.pkl', 'wb'))


In [18]:
documents[:10]

[(['Hi', 'there'], 'greeting'),
 (['How', 'are', 'you'], 'greeting'),
 (['Is', 'anyone', 'there', '?'], 'greeting'),
 (['Hey'], 'greeting'),
 (['Hola'], 'greeting'),
 (['Hello'], 'greeting'),
 (['Good', 'day'], 'greeting'),
 (['Bye'], 'goodbye'),
 (['See', 'you', 'later'], 'goodbye'),
 (['Goodbye'], 'goodbye')]

In [19]:
output_empty = [0]*len(classes)
training = []

for doc in documents[:10]:
    bag = [0]*len(words)
    output_row = [0]*len(classes)
    pattern_words = doc[0]
    for i, w in enumerate(words):
        if w in pattern_words:
            bag[i] = 1
        else:
            bag[i] = 0
    output_row[classes.index(doc[1])]=1
    training.append([bag, output_row])
print(f"Created {len(training)} training samples")

Created 10 training samples


In [29]:
import random

random.shuffle(training)
train_X = np.array([item[0] for item in training])
train_y = np.array([item[1] for item in training])

print("X Shape:", train_X.shape)
print("y shape:", train_y.shape)

X Shape: (10, 637)
y shape: (10, 262)


In [33]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import Input

model = Sequential()
model.add(Input(shape=(len(train_X[0]),)))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

In [34]:
from keras.optimizers import SGD
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [None]:
# Training
# model.fit(train_X, train_y, epochs=200, batch_size=5, verbose=1);

Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0000e+00 - loss: 5.5467 
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1000 - loss: 5.4867    
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 1.0000 - loss: 5.3414
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 5.1692
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 4.9726
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 4.6875
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 1.0000 - loss: 4.3888
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 3.7775 
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x129e52320>

In [37]:
model.save('models/chatbot_model.keras')
print("Model training complete and saved!")

Model training complete and saved!
