In [None]:
#import the necessary libraries
import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
import random

In [None]:
# Download NLTK data (wordnet, punkt)
nltk.download('punkt')
nltk.download('wordnet')

In [None]:
# Initialize the WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [None]:
# Load the intents file
with open('intents.json') as file:
    intents = json.load(file)

In [None]:
# Initialize lists
words = []
classes = []
documents = []
ignore_words = ['?', '!', '.']

In [None]:
# Process the intents
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # Add to documents
        documents.append((w, intent['tag']))
        # Add to classes if not already present
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [None]:
# Lemmatize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

In [None]:
# Sort classes
classes = sorted(list(set(classes)))

In [None]:
print(f"{len(documents)} documents")
print(f"{len(classes)} classes: {classes}")
print(f"{len(words)} unique lemmatized words: {words}")

In [None]:
# Save words and classes to pickle files
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [None]:
# Create training data
training = []
output_empty = [0] * len(classes)

In [None]:
for doc in documents:
    # Initialize bag of words
    bag = []
    # Tokenize the pattern words
    pattern_words = doc[0]
    # Lemmatize each word
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # Create bag of words
    bag = [1 if w in pattern_words else 0 for w in words]

    # Output is '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

In [None]:
# Shuffle training data and convert to numpy array
random.shuffle(training)
training = np.array(training, dtype=object)
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

In [None]:
#Ensure the training data is craeted
print("Training data created")

In [None]:
# Create model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

In [None]:
# Compile model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [None]:
# Train the model
hist = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)

In [None]:
# Save the model
model.save('chatbot_model.h5', hist)

In [None]:
#Ensure the modelis created
print("Model created and saved")