In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import random
import pickle
import json
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer

In [None]:
# Initialize lemmatizer and tokenizer
lemmatizer = WordNetLemmatizer()
tokenizer = RegexpTokenizer(r'\w+')

# Data loading and preprocessing
words, classes, documents = [], [], []
ignore_words = ['?', '!']
data_file = open('intents.json').read()
intents = json.loads(data_file)

# Tokenizing and lemmatizing
for intent in intents['intents']:
    for pattern in intent['patterns']:
        w = tokenizer.tokenize(pattern)  # Tokenizing using RegexpTokenizer
        words.extend(w)
        documents.append((w, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Lemmatize and clean up words
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)

# Save words and classes for future use
pickle.dump(words, open('texts.pkl', 'wb'))
pickle.dump(classes, open('labels.pkl', 'wb'))

# Create training data
training = []
output_empty = [0] * len(classes)

In [None]:
documents

In [4]:

for doc in documents:
    bag = [0] * len(words)
    pattern_words = [lemmatizer.lemmatize(w.lower()) for w in doc[0]]
    
    # Create the bag of words
    for s in pattern_words:
        if s in words:
            bag[words.index(s)] = 1
    
    # Create output row (one-hot encoding for classes)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

In [None]:
# Shuffle and prepare data for training
random.shuffle(training)
train_x = np.array([x[0] for x in training], dtype=np.float32)
train_y = np.array([x[1] for x in training], dtype=np.float32)

print("Training data created")

In [None]:
# Model creation
model = Sequential()
model.add(Input(shape=(len(train_x[0]),)))

# First hidden layer with more neurons
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))  # Adjusted dropout rate

# Second hidden layer with more neurons
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))  # Adjusted dropout rate

# Output layer
model.add(Dense(len(classes), activation='softmax'))

# Compile model using Adam optimizer
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# Model summary
model.summary()

In [None]:
# Implementing Early Stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='loss', patience=50, restore_best_weights=True)

# Fit the model with validation split
hist = model.fit(train_x, train_y, epochs=500, batch_size=5, verbose=1, validation_split=0.1, callbacks=[early_stopping])

# Save the model
model.save('model.keras')
print("Model created and saved")