In [18]:
import tensorflow as tf
import pandas as pd
import numpy as np
import nltk 
import json
import pickle


In [7]:
path = "dataset/medical-related.json"

with open(path,"rb")as file:
    data = json.load(file)


In [16]:

stemmer = LancasterStemmer()

words = []
classes = []
documents = []
ignoreWords = ["!","?"]

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        documents.append((w, intent["tag"]))
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

words = [stemmer.stem(w.lower()) for w in words if w not in ignoreWords]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))

print(len(documents), "Docs")
print(len(classes), "Classes", classes)
print(len(words), "Split words", words)


188 Docs
44 Classes ['Abdonominal Pain', 'Abrasions', 'Broken Toe', 'Bruises', 'CPR', 'Chemical Burn', 'Choking', 'Cold', 'Cough', 'Cuts', 'Diarrhea', 'Drowning', 'Eye Injury', 'Fainting', 'Fever', 'Fracture', 'Frost bite', 'Gastrointestinal problems', 'Head Injury', 'Headache', 'Heat Exhaustion', 'Heat Stroke', 'Insect Bites', 'Nasal Congestion', 'Normal Bleeding', 'Poison', 'Pulled Muscle', 'Rash', 'Rectal bleeding', 'Skin problems', 'Sore Throat', 'Splinter', 'Sprains', 'Strains', 'Sun Burn', 'Teeth', 'Testicle Pain', 'Vertigo', 'Wound', 'animal bite', 'nose bleed', 'seizure', 'snake bite', 'stings']
114 Split words ['a', 'abdonomin', 'abras', 'allergy', 'am', 'an', 'anim', 'apply', 'bet', 'bit', 'blee', 'block', 'bring', 'brok', 'bru', 'burn', 'by', 'caus', 'chem', 'chok', 'cold', 'congest', 'cough', 'cpr', 'cream', 'cur', 'cut', 'diagnos', 'diarrhe', 'do', 'doe', 'dog', 'drown', 'due', 'exhaust', 'ey', 'faint', 'feel', 'fev', 'for', 'fract', 'frost', 'gas', 'gastrointestin', 'get'

In [19]:
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

In [30]:
from nltk.stem import WordNetLemmatizer
import random
import numpy as np

lemmatizer = WordNetLemmatizer()

# Assuming `documents`, `classes`, and `words` are already defined
training = []
output_empty = [0] * len(classes)

# Build the training data
for doc in documents:
    bag = []
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in doc[0]]

    # Create the bag of words array
    for w in words:
        bag.append(1 if w in pattern_words else 0)

    # Output row with one-hot encoding for the intent
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    # Append to training data
    training.append([bag, output_row])

# Shuffle the training data
random.shuffle(training)

# Separate features and labels into two lists
train_x = [entry[0] for entry in training]  # Extracting the bag of words (features)
train_y = [entry[1] for entry in training]  # Extracting the one-hot encoded labels (intents)

# Convert to NumPy arrays
train_x = np.array(train_x)
train_y = np.array(train_y)

print("Training data created")


Training data created


In [35]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons, and 3rd output layer contains the number of neurons
# equal to the number of intents to predict the output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))

# Compile model with the corrected learning_rate argument
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])

# Fit the model and save it
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save("chatbot_model.h5")

print("Model created and saved")

Epoch 1/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0336 - loss: 3.8400
Epoch 2/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 812us/step - accuracy: 0.0109 - loss: 3.7963    
Epoch 3/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 783us/step - accuracy: 0.0602 - loss: 3.7562
Epoch 4/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 729us/step - accuracy: 0.0565 - loss: 3.7482 
Epoch 5/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 449us/step - accuracy: 0.0199 - loss: 3.7324  
Epoch 6/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646us/step - accuracy: 0.1314 - loss: 3.6440 
Epoch 7/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 694us/step - accuracy: 0.0670 - loss: 3.6682 
Epoch 8/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 641us/step - accuracy: 0.1199 - loss: 3.5756 
Epoch 9/200
[1m38/38[0



Model created and saved
