In [9]:
import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_file = open('intents.json').read()
intents = json.loads(data_file)
lemmatizer = WordNetLemmatizer()

In [3]:
words = []
classes = []
documents = []
ignore_words = ['?', '!']

In [4]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each pattern
        w = nltk.word_tokenize(pattern)
        # Add words to the list
        words.extend(w)
        # Add documents to the list
        documents.append((w, intent['tag']))
        # Add intent tag to classes if not already there
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [5]:
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# Sort classes
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)

192 documents
30 classes ['UGEE', 'agriculture_research', 'aieed', 'ailet', 'bana_thali', 'bhu_uet', 'bitsat', 'cmi', 'cuet', 'efl_u_hyderabad_entrance_test', 'exams_after_12th', 'goodbye', 'greeting', 'hotel_mgt', 'hsee', 'ind_stats_ints_add', 'indian_army_technical_entry_scheme', 'indian_navy_btech_entry_scheme', 'indian_navy_sailors_recruitment', 'jee_advanced', 'jee_main', 'law_add_test', 'mgt_add_test', 'mht_cet', 'national_defence_academy_and_naval_academy_examination', 'nest', 'nift', 'sci_edu_res', 'thanks', 'tiss_bat']
145 unique lemmatized words ["'s", '(', ')', ',', '12th', 'a', 'about', 'academy', 'adios', 'admission', 'advanced', 'after', 'afternoon', 'agriculture', 'aieea', 'aieed', 'ailet', 'all', 'and', 'appreciate', 'are', 'army', 'b.tech', 'banasthali', 'bbanasthali', 'bhu', 'bitsat', 'bye', 'can', 'care', 'career', 'catch', 'chennai', 'clat', 'cmi', 'common', 'completing', 'council', 'cuet', 'deal', 'defence', 'detail', 'do', 'education', 'english', 'entrance', 'entr

In [6]:
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [7]:
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]

    # Create bag of words
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # One-hot encode the output
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    # Append to training data
    training.append([bag, output_row])

# Shuffle and convert to numpy arrays
random.shuffle(training)
training = np.array(training)

# Split data into training and testing sets
train_x = list(training[:, 0])
train_y = list(training[:, 1])

print("Training data created")

# Convert to numpy arrays
train_x = np.array(train_x)
train_y = np.array(train_y)

Training data created


In [8]:
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Train model
hist = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)

# Save model
model.save("chat_model.h5")
print("Model saved to disk")

Epoch 1/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0453 - loss: 3.4222   
Epoch 2/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1078 - loss: 3.2522
Epoch 3/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1689 - loss: 3.1351
Epoch 4/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2826 - loss: 2.7110
Epoch 5/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3203 - loss: 2.5520
Epoch 6/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5328 - loss: 2.0249
Epoch 7/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.4593 - loss: 2.0718
Epoch 8/200
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5567 - loss: 1.7543
Epoch 9/200
[1m39/39[0m [32m━━━━━━━━━━━━━━



Model saved to disk
