In [1]:
import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers.legacy import SGD
import random

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Load and process the data
words = []  # List to hold all words
classes = []  # List to hold all classes (intents)
documents = []  # List to hold pairs of words and corresponding intent
ignore_words = ['?', '!']  # Words to ignore
data_file = open('data.json').read()  # Read the data from the JSON file
intents = json.loads(data_file)  # Load the JSON data
#Tokenize and Lemmatize Words
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize each word in the pattern
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # Add the tokenized pattern and corresponding intent to documents
        documents.append((w, intent['tag']))

        # Add the intent to classes if it's not already there
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Lemmatize (reduce words to their base form) and convert to lowercase, then remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# Sort the classes
classes = sorted(list(set(classes)))
# Print the number of documents, classes, and unique words
print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)

# Save words and classes to files
pickle.dump(words, open('texts.pkl', 'wb'))
pickle.dump(classes, open('labels.pkl', 'wb'))

# Create training data
training = []
# Create an empty array for the output with a zero for each class
output_empty = [0] * len(classes)
# Create the training set, bag of words for each sentence
for doc in documents:
    # Initialize the bag of words
    bag = []
    # List of tokenized words for the pattern
    pattern_words = doc[0]
    # Lemmatize each word
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # Create the bag of words array with 1 if the word is found in the current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # Create the output row with a 0 for each class and a 1 for the current class
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

# Shuffle the training data and convert it to a numpy array
random.shuffle(training)
training = np.array(training, dtype=object)  # dtype=object to handle sequences of different lengths

# Create train and test lists. X - patterns, Y - intents
train_x = np.array([element[0] for element in training])
train_y = np.array([element[1] for element in training])

print("Training data created")

# Create the neural network model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))  # First layer with 128 neurons
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Dense(64, activation='relu'))  # Second layer with 64 neurons
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Dense(len(train_y[0]), activation='softmax'))  # Output layer with softmax activation

# Compile the model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Fit the model (train it)
hist = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)

# Save the trained model
model.save('model.h5')

print("Model created")


193 documents
25 classes ['appointment_confirmation', 'appointment_reschedule', 'billing_inquiry', 'book_appointment', 'cancel_appointment', 'doctor_availability', 'emergency_contact', 'emergency_services', 'get_directions', 'goodbye', 'greeting', 'health_checkup_packages', 'hospital_hours', 'insurance_inquiry', 'location_query', 'medical_records', 'prescription_refill', 'provide_date', 'provide_doctor_name', 'provide_specialist_date', 'provide_specialist_type', 'specialist_consultation', 'statement_ok', 'test_results', 'thanks']
185 unique lemmatized words ["'s", ',', '15th', '24/7', '3', 'a', 'about', 'accept', 'accepted', 'access', 'address', 'afternoon', 'ajay', 'alright', 'alrighty', 'an', 'anyone', 'appointment', 'appreciate', 'appreciated', 'are', 'at', 'availability', 'available', 'awesome', 'be', 'bill', 'billing', 'book', 'booking', 'by', 'bye', 'can', 'cancel', 'cardiologist', 'care', 'catch', 'change', 'chatting', 'check', 'checkup', 'close', 'confirm', 'confirmation', 'con

Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoc

Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
Model created


  saving_api.save_model(
