In [1]:
# importing libraries
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
# tokenizing and lematizing
words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('intents.json').read()
intents = json.loads(data_file)

for intent in intents['intents']:
    for pattern in intent['patterns']:
        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])
            
# lemmatize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print (len(documents), "documents")
# classes = intents
print (len(classes), "classes", classes)
# words = all words, vocabulary
print (len(words), "unique lemmatized words", words)

# Save processed data
pickle.dump(words,open('texts.pkl','wb'))
pickle.dump(classes,open('labels.pkl','wb'))

# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

297 documents
130 classes ['What are the types of depression?', 'about', 'afternoon', 'anxious', 'ask', 'at what age does anxiety peak?', 'can lack of sleep make you feel sad?', 'can low blood sugar cause suicidal thoughts?', 'casual', 'creation', 'death', 'default', 'depressed', 'do we control our thoughts?', 'does oversleeping cause depression?', 'done', 'evening', 'fact-1', 'fact-10', 'fact-11', 'fact-12', 'fact-13', 'fact-14', 'fact-15', 'fact-16', 'fact-17', 'fact-18', 'fact-19', 'fact-2', 'fact-20', 'fact-21', 'fact-22', 'fact-23', 'fact-24', 'fact-25', 'fact-26', 'fact-27', 'fact-28', 'fact-29', 'fact-3', 'fact-30', 'fact-31', 'fact-32', 'fact-5', 'fact-6', 'fact-7', 'fact-8', 'fact-9', 'friends', 'general-conversation', 'goodbye', 'greeting', 'happy', 'hate-me', 'hate-you', 'help', 'how can we reduce anxiety?', 'how does depression affect the world?', 'how long can anxiety last?', 'how many thoughts a day do we have?', 'i am a victim of bullying', 'i am afraid i will fail again

In [3]:
# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

In [4]:
# shuffle our features and turn into np.array - FIXED VERSION
random.shuffle(training)

# Separate features and labels properly
train_x = []
train_y = []

for features, label in training:
    train_x.append(features)
    train_y.append(label)

# Convert to numpy arrays
train_x = np.array(train_x)
train_y = np.array(train_y)

print("Training data created")
print(f"Train X shape: {train_x.shape}")
print(f"Train Y shape: {train_y.shape}")

Training data created
Train X shape: (297, 359)
Train Y shape: (297, 130)


In [5]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

print("Model created")
print(f"Input shape: {len(train_x[0])}")
print(f"Output shape: {len(train_y[0])}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Model created
Input shape: 359
Output shape: 130


In [6]:
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
# FIXED: Use 'learning_rate' instead of deprecated 'lr'
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

print("Model compiled")
model.summary()

Model compiled




In [7]:
# fitting and saving the model - FIXED VERSION
print("Starting training...")
hist = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)

# FIXED: Correct way to save the model
model.save('model.h5')
print("Model trained and saved successfully!")

Starting training...
Epoch 1/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.0303 - loss: 4.8506
Epoch 2/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0673 - loss: 4.7425
Epoch 3/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0741 - loss: 4.6075
Epoch 4/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0774 - loss: 4.4820
Epoch 5/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0842 - loss: 4.3601
Epoch 6/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0808 - loss: 4.2516
Epoch 7/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0943 - loss: 4.1183
Epoch 8/200
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.1380 - loss: 3.9404
Epoch 9/200
[1m60/60[0m 



Model trained and saved successfully!


In [8]:
# Summary and validation
print("\n=== Training Summary ===")
print(f"Total documents: {len(documents)}")
print(f"Total classes: {len(classes)}")
print(f"Total unique words: {len(words)}")
print(f"Training data shape: {train_x.shape}")
print(f"Labels shape: {train_y.shape}")
print("\nFiles created:")
print("- model.h5 (trained model)")
print("- texts.pkl (vocabulary)")
print("- labels.pkl (intent classes)")
print("\nTraining completed successfully!")


=== Training Summary ===
Total documents: 297
Total classes: 130
Total unique words: 359
Training data shape: (297, 359)
Labels shape: (297, 130)

Files created:
- model.h5 (trained model)
- texts.pkl (vocabulary)
- labels.pkl (intent classes)

Training completed successfully!
