**Libraries Used**

In [1]:
import nltk  # Import NLTK library for NLP utilities
import json  # Import JSON for reading/writing structured data
import numpy as np  # Import NumPy for numerical operations and arrays
import random as rnd  # Import random module (aliased as rnd) for random selections
import pickle as pk  # Import pickle (aliased as pk) for saving/loading Python objects
from keras.models import Sequential  # Import Sequential model API from Keras
from keras.layers import Dense, Dropout  # Import Dense and Dropout layers used in neural network architecture
from keras.optimizers import SGD  # Import SGD optimizer for training the model
from nltk.stem import WordNetLemmatizer  # Import WordNetLemmatizer for normalizing words to base forms
lemmatizer = WordNetLemmatizer()  # Create a lemmatizer instance for later text preprocessing

**Text Preprocessing and Training Data Preparation**

In [2]:
Words = []  # Initialize list of all tokenized words
Classes = []  # Initialize list of intent class labels
Documents = []  # Initialize list to store (tokenized pattern, tag) pairs

ignore_words = ['?', '!', '.', ',']  # Define punctuation tokens to ignore during vocabulary build
intents_file = open('intents.json' , encoding = 'utf-8').read()  # Read intents dataset from JSON file
intents = json.loads(intents_file)  # Parse JSON text into Python dictionary

for intent in intents['intents']:  # Iterate through each intent entry
    for pattern in intent['patterns']:  # Iterate through each training pattern in the intent
        word = nltk.word_tokenize(pattern)  # Tokenize pattern sentence into words
        Words.extend(word)  # Add tokens to global words list
        Documents.append((word, intent['tag']))  # Save tokenized pattern with its intent tag
        if intent['tag'] not in Classes:  # Add tag to classes if it has not been seen yet
            Classes.append(intent['tag'])  # Append new class label
print(Documents)  # Print collected document-tag pairs

Words = [lemmatizer.lemmatize(word.lower()) for word in Words if word not in ignore_words]  # Normalize words by lowercasing, lemmatizing, and removing ignored tokens
Words = sorted(list(set(Words)))  # Deduplicate and sort vocabulary
Classes = sorted(list(set(Classes)))  # Deduplicate and sort class labels

print(len(Documents), "documents")  # Show number of training documents
print(len(Classes), "classes", Classes)  # Show number of classes and their names
print(len(Words), "unique lemmatized words", Words)  # Show size and content of final vocabulary

pk.dump(Words, open('words.pkl', 'wb'))  # Persist vocabulary to disk
pk.dump(Classes, open('classes.pkl', 'wb'))  # Persist class labels to disk
training = []  # Initialize training dataset container
output_empty = [0] * len(Classes)  # Create zeroed output template for one-hot labels

for doc in Documents:  # Build feature/label rows for each document
    bag = []  # Initialize bag-of-words vector for current document
    pattern_words = doc[0]  # Extract token list from current document
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]  # Normalize document tokens
    for word in Words:  # For each vocabulary word, mark presence in current document
        bag.append(1) if word in pattern_words else bag.append(0)  # Append 1 if word exists in document, otherwise 0
    output_row = list(output_empty)  # Copy empty output template
    output_row[Classes.index(doc[1])] = 1  # Set correct class index to 1 for one-hot label
    training.append([bag, output_row])  # Add feature vector and label to training set

rnd.shuffle(training)  # Shuffle training rows to randomize learning order
training = np.array(training, dtype=object)  # Convert training list to NumPy array
train_x = list(training[:, 0])  # Split features into train_x
train_y = list(training[:, 1])  # Split labels into train_y
print("Training data created")  # Confirm training data preparation is complete

[(['Hi', 'there'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hey'], 'greeting'), (['Hola'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['what', 'is', 'aiolearn', '?'], 'aiolearn'), (['aiolearn'], 'aiolearn'), (['aiolearn', 'academy', '?'], 'aiolearn'), (['academy'], 'aiolearn'), (['the', 'best', 'programming', 'and', 'ai', 'academy'], 'aiolearn'), (['aiolearn', '?'], 'aiolearn'), (['best', 'academy', 'in', 'iran', '?'], 'aiolearn'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Nice', 'chatting', 'to', 'you', ',', 'bye'], 'goodbye'), (['Till', 'next', 'time'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['Awesome', ',', 'thanks'], 'thanks'), (['Thanks', 'for', 'helping', 'me'], 'thanks'), (['How', 'you', 'could', 'help', 'me', '?'], 'options'), (['What', 'you', 'can', 'do', '?'], 'options'), (['What'

**Model Architecture & Training Process**

In [3]:
Model = Sequential([  # Create a Sequential neural network model
    Dense(128, input_shape=(len(train_x[0]),), activation='relu'),  # First hidden layer with 128 neurons and ReLU activation
    Dropout(0.5),  # First dropout layer to reduce overfitting
    Dense(64, activation='relu'),  # Second hidden layer with 64 neurons and ReLU activation
    Dropout(0.5),  # Second dropout layer to reduce overfitting
    Dense(len(train_y[0]), activation='softmax')  # Output layer with Softmax for multi-class classification
])  # End of model architecture definition

sdg = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)  # Configure SGD optimizer with momentum and Nesterov acceleration
Model.compile(loss='categorical_crossentropy', optimizer=sdg, metrics=['accuracy'])  # Compile model with categorical crossentropy loss and accuracy metric

Model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)  # Train the model for 200 epochs with batch size 5
Model.save('chatbot_model_01.h5')  # Save the trained model to disk
print("Model created")  # Print confirmation message after model creation


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

  saving_api.save_model(
