<h1>Import Libraries</h1> 

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random
import nltk
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

import json
import pickle




<h1>Load the Data</h1>

In [2]:
intents_file = open('intents.json').read()
intents = json.loads(intents_file)

<h1>Preprocessing the Data</h1>

In [3]:
import nltk
nltk.download('punkt')

words = []
classes = []
documents = []
ignore_letters = ['!', '?', ',', '.']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each word
        tokenized_words = nltk.word_tokenize(pattern)
        words.extend(tokenized_words)   
        
        # Add documents in the corpus
        documents.append((tokenized_words, intent['tag']))
        
        # Add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

print(documents)


[(['Hi', 'there'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hey'], 'greeting'), (['Hola'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Nice', 'chatting', 'to', 'you', ',', 'bye'], 'goodbye'), (['Till', 'next', 'time'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['Awesome', ',', 'thanks'], 'thanks'), (['Thanks', 'for', 'helping', 'me'], 'thanks'), (['How', 'you', 'could', 'help', 'me', '?'], 'options'), (['What', 'you', 'can', 'do', '?'], 'options'), (['What', 'help', 'you', 'provide', '?'], 'options'), (['How', 'you', 'can', 'be', 'helpful', '?'], 'options'), (['What', 'support', 'is', 'offered'], 'options'), (['How', 'to', 'check', 'Adverse', 'drug', 'reaction', '?'], 'adverse_drug'), (['Open', 'adverse', 'drugs', 'module'], 'adverse_drug'), (['Give', 'm

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Sujon\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


<h1>Lemmatize each word and remove the duplicate words</h1>

In [4]:
import nltk
from nltk.stem import WordNetLemmatizer
import pickle

# Ensure 'punkt' and 'wordnet' resources are available
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Your pre-existing lists
words = []
classes = []
documents = []
ignore_letters = ['!', '?', ',', '.']

# Assuming you already have 'intents' dictionary populated from previous steps
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each word
        tokenized_words = nltk.word_tokenize(pattern)
        words.extend(tokenized_words)
        documents.append((tokenized_words, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Lemmatize, lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters]
words = sorted(list(set(words)))

# Sort classes
classes = sorted(list(set(classes)))

# Print results
print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)

# Save words and classes to pickle files
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Sujon\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


47 documents
9 classes ['adverse_drug', 'blood_pressure', 'blood_pressure_search', 'goodbye', 'greeting', 'hospital_search', 'options', 'pharmacy_search', 'thanks']
87 unique lemmatized words ["'s", 'a', 'adverse', 'all', 'anyone', 'are', 'awesome', 'be', 'behavior', 'blood', 'by', 'bye', 'can', 'causing', 'chatting', 'check', 'could', 'data', 'day', 'detail', 'do', 'dont', 'drug', 'entry', 'find', 'for', 'give', 'good', 'goodbye', 'have', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'history', 'hola', 'hospital', 'how', 'i', 'id', 'is', 'later', 'list', 'load', 'locate', 'log', 'looking', 'lookup', 'management', 'me', 'module', 'nearby', 'next', 'nice', 'of', 'offered', 'open', 'patient', 'pharmacy', 'pressure', 'provide', 'reaction', 'related', 'result', 'search', 'searching', 'see', 'show', 'suitable', 'support', 'task', 'thank', 'thanks', 'that', 'there', 'till', 'time', 'to', 'transfer', 'up', 'want', 'what', 'which', 'with', 'you']


<h1>Create Training and Testing Data</h1>

In [7]:
# create the training data
training = []
output = []
# create empty array for the output
output_empty = np.zeros((len(classes),))

# training set, bag of words for every sentence
for doc in documents:
    # initializing bag of words
    bag = np.zeros((len(words),))
    # list of tokenized words for the pattern
    word_patterns = doc[0]
    # lemmatize each word - create base word, in an attempt to represent related words
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    # create the bag of words array with 1, if the word is found in the current pattern
    for i, word in enumerate(words):
        bag[i] = 1 if word in word_patterns else 0
    
    # output is a '0' for each tag and '1' for the current tag (for each pattern)
    output_row = output_empty.copy()
    output_row[classes.index(doc[1])] = 1
    training.append(bag)
    output.append(output_row)

# convert lists to numpy arrays
training = np.array(training)
output = np.array(output)

# shuffle the features and make a numpy array
indices = np.arange(training.shape[0])
np.random.shuffle(indices)
training = training[indices]
output = output[indices]

# create training and testing lists. X - patterns, Y - intents
train_x = training
train_y = output

print("Training data is created")



Training data is created


<h1>Create Model</h1>

In [8]:
# deep neural networds model

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))




<h1>Compile the model</h1>

In [11]:
# Compiling model. SGD with Nesterov accelerated gradient gives good results for this model

from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Define an exponential decay learning rate schedule
initial_learning_rate = 0.01
lr_schedule = ExponentialDecay(
    initial_learning_rate=initial_learning_rate,
    decay_steps=1000,  # Number of steps before applying decay
    decay_rate=0.96,   # Decay rate
    staircase=True     # If True, the learning rate is decayed at discrete intervals
)

# Initialize the SGD optimizer with the learning rate schedule
sgd = SGD(learning_rate=lr_schedule, momentum=0.9, nesterov=True)

# Compile the model with categorical crossentropy loss and the SGD optimizer
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])



<h1>Training the Model</h1>

In [12]:
#Training and saving the model 

hist = model.fit(np.array(train_x), 
                 np.array(train_y), 
                 epochs=5, 
                 batch_size=5, 
                 verbose=1)

model.save('chatbot_model.h5', hist)

print("model is created")


Epoch 1/5












Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(


model is created
