In [12]:
import random
import json
import pickle
import numpy as np

import nltk
from nltk.stem import WordNetLemmatizer


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam

nltk.download('punkt_tab')
nltk.download('wordnet')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\BANKII\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\BANKII\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [13]:
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intents.json').read())

In [14]:
words = [] # X
classes = [] # Label Y
documents = []
ignore_letters = ['?', '!', '.', ','] # Let chatbot ignore letter to make a performance

In [15]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# print(documents)
print(words)

['Hi', 'How', 'are', 'you', 'Is', 'anyone', 'there', '?', 'Hello', 'Good', 'day', 'Whats', 'up', 'Hey', 'Greetings', 'Cya', 'See', 'you', 'later', 'Goodbye', 'I', 'am', 'leaving', 'Have', 'a', 'good', 'day', 'Bye', 'Ciao', 'See', 'ya', 'Thanks', 'Thank', 'you', 'That', "'s", 'helpful', 'I', 'appreciate', 'it', 'Thanks', 'a', 'lot', 'What', 'is', 'programming', '?', 'What', 'is', 'coding', '?', 'Tell', 'me', 'about', 'programming', 'Tell', 'me', 'about', 'coding', 'What', 'is', 'software', 'development', '?', 'Where', 'can', 'I', 'learn', 'to', 'code', '?', 'Best', 'way', 'to', 'learn', 'to', 'code', 'How', 'can', 'I', 'learn', 'programming', '?', 'Good', 'programming', 'resources', 'Can', 'you', 'recommend', 'good', 'coding', 'resources', '?', 'What', 'is', 'Python', '?', 'Tell', 'me', 'about', 'Python', 'Is', 'Python', 'good', 'for', 'beginners', '?', 'Why', 'use', 'Python', '?', 'What', 'can', 'I', 'do', 'with', 'Python', '?', 'What', 'is', 'JavaScript', '?', 'Tell', 'me', 'about', '

In [16]:
cleaned_words = []
for word in words:
    if word not in ignore_letters:
        cleaned_words.append(lemmatizer.lemmatize(word))

cleaned_words = sorted(set(cleaned_words))
print(cleaned_words)

["'s", 'AI', 'Best', 'Bye', 'Can', 'Ciao', 'Cya', 'Cyber', 'Good', 'Goodbye', 'Greetings', 'Have', 'Hello', 'Hey', 'Hi', 'How', 'I', 'Is', 'JavaScript', 'Python', 'See', 'Tell', 'Thank', 'Thanks', 'That', 'What', 'Whats', 'Where', 'Why', 'a', 'about', 'am', 'an', 'anyone', 'appreciate', 'are', 'artificial', 'beginner', 'business', 'can', 'code', 'coding', 'common', 'cyber', 'cybersecurity', 'data', 'day', 'development', 'do', 'doe', 'eCommerce', 'for', 'future', 'good', 'helpful', 'intelligence', 'is', 'it', 'later', 'learn', 'learning', 'leaving', 'lot', 'machine', 'me', 'my', 'online', 'programming', 'protect', 'recommend', 'resource', 'security', 'site', 'software', 'start', 'store', 'the', 'there', 'threat', 'to', 'up', 'use', 'way', 'web', 'with', 'work', 'ya', 'you']


In [17]:
#Save data
classes = sorted(set(classes))

pickle.dump(cleaned_words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [18]:
training = []
output_empty = [0] * len(classes)  # Create an empty one-hot encoded list

for document in documents:
    bag = []
    word_patterns = document[0]  # Extract tokenized words from document
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]  # Lemmatize words

    # Create a bag of words
    bag = [1 if word in word_patterns else 0 for word in cleaned_words]

    # Create one-hot encoded output
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1

    training.append([bag, output_row])

# Shuffle and convert to NumPy array (with dtype=object to avoid shape issues)
random.shuffle(training)
training = np.array(training, dtype=object)

# Extract features and labels
train_x = [row[0] for row in training]  # Input features (bag of words)
train_y = [row[1] for row in training]  # Output labels (one-hot encoded)

print(f"Training samples: {len(train_x)}, Labels: {len(train_y)}")
print(f'X_Train: {train_x[0]}')
print(f'Y_Train: {train_y[0]}')

Training samples: 55, Labels: 55
X_Train: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Y_Train: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]


In [19]:
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

In [20]:
adam = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [21]:
model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

Epoch 1/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1891 - loss: 2.3014  
Epoch 2/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1506 - loss: 2.2975 
Epoch 3/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0481 - loss: 2.3697     
Epoch 4/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1911 - loss: 2.2748 
Epoch 5/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1253 - loss: 2.2423 
Epoch 6/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1393 - loss: 2.2533     
Epoch 7/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2053 - loss: 2.2539 
Epoch 8/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2516 - loss: 2.2207 
Epoch 9/200
[1m11/11[0m [32m

<keras.src.callbacks.history.History at 0x1f1ac680ed0>

In [22]:
model.save('small_chatbot.h5')

