In [61]:
import random
import json
import pickle
import numpy as np
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [62]:
# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aksha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\aksha\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [63]:
# Load intents from JSON file
with open('intents.json') as json_file:
    intents = json.load(json_file)


In [64]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Preprocess data and extract words, classes, and documents
words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']


In [65]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        pattern = pattern.lower()  # Convert pattern to lowercase
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])


In [66]:
# Lemmatize words and remove ignored letters
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))

# Save words and classes
pickle.dump(words, open('words4.pkl', 'wb'))
pickle.dump(classes, open('classes4.pkl', 'wb'))


In [67]:

# Create training data
training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

In [68]:
random.shuffle(training)
train_x = np.array([x[0] for x in training])
train_y = np.array([x[1] for x in training])

In [70]:
# Define LSTM model architecture
model = Sequential()
model.add(Embedding(len(words), 128, input_length=len(train_x[0])))
model.add(Dropout(0.5))
model.add(LSTM(128))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))


# Optimize Training Parameters
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Perform Error Analysis
history = model.fit(train_x, train_y, epochs=200, batch_size=32, verbose=1,
                    validation_split=0.2, callbacks=[early_stopping, reduce_lr])


# Save model
model.save('Using LSTM model\medchatbot_lstm.h5')

print('Training Done')


  model.save('Using LSTM model\medchatbot_lstm.h5')
  model.save('Using LSTM model\medchatbot_lstm.h5')


ValueError: Unrecognized keyword arguments passed to Embedding: {'input_length': 134}

In [None]:
# Plot training history
import matplotlib.pyplot as plt
plt.plot(hist.history['accuracy'], label='accuracy')
plt.plot(hist.history['loss'], label='loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()