## Upgrade Tensorflow and Keras libraries

In [None]:
pip install --upgrade tensorflow keras

## Import dependencies

In [3]:
import random
import json
import pickle
import numpy as np
import pandas as pd

import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


## Downoad dataset (intents.json) from the Google Drive

In [4]:
import gdown

url = "https://drive.google.com/uc?id=1aoaVG3giKiVrrOVP6E0YyEGH7qjj19fu"
output = "intents.json"
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1aoaVG3giKiVrrOVP6E0YyEGH7qjj19fu
To: /content/intents.json
100%|██████████| 36.0k/36.0k [00:00<00:00, 32.6MB/s]


'intents.json'

## Open and load the dataset (intents.json)

In [5]:
try:
    with open("intents.json", "r") as f:
      intents = json.load(f)
    print("File loaded successfully!")
except FileNotFoundError:
    print("The file 'intents.json' was not found. Make sure it exists in the current directory.")
except json.JSONDecodeError:
    print("The file 'intents.json' does not contain valid JSON data. Check the file's contents.")

File loaded successfully!


## Create words, classes and documents arrays from the dataset (intents.json)

In [6]:
words = []
classes = []
documents = []
lemmatizer = WordNetLemmatizer()
stop_words = ["?", "!", ".", ","]

for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent["tag"]))
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

words[:5], classes[:5], documents[:5]

(['Hi', 'How', 'are', 'you', '?'],
 ['greeting', 'goodbye', 'creator', 'name', 'Diabetes'],
 [(['Hi'], 'greeting'),
  (['How', 'are', 'you', '?'], 'greeting'),
  (['Is', 'anyone', 'there', '?'], 'greeting'),
  (['Hello'], 'greeting'),
  (['Good', 'day'], 'greeting')])

## Lemmatize and sort the words (total vocab)

In [7]:
words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
words = sorted(set(words))
classes = sorted(set(classes))

words[:10], classes[:5]

(["'m",
  "'s",
  '7',
  '8',
  'AM',
  'Abrasions',
  'Allergies',
  'Back',
  'Bite',
  'Burns'],
 ['Abdominal Pain', 'Abrasions', 'Allergies', 'Back Pain', 'Broken Finger'])

## Dump the arrays words and classes in "words.pkl" and "classes.pkl" files respectively using "pickle" module for later use in the "chatbot.py"

In [31]:
pickle.dump(words, open("words.pkl", "wb"))
pickle.dump(classes, open("classes.pkl", "wb"))

## Use Bag of Words to convert sentences into vectors of numbers

In [8]:
training = []

for document in documents:
  bag = []
  pattern_words = document[0]
  pattern_words = [lemmatizer.lemmatize(word) for word in pattern_words]
  for word in words:
    bag.append(1) if word in pattern_words else bag.append(0)

    output_row = list(np.zeros(len(classes), dtype=np.int32))
    output_row[classes.index(document[1])] = 1

    training.append([bag, output_row])

random.shuffle(training)

X_train = [item[0] for item in training]
y_train = [item[1] for item in training]

X_train = np.array(X_train)
y_train = np.array(y_train)

X_train.shape, y_train.shape

((72864, 253), (72864, 58))

## Generate model

In [22]:
model = Sequential()
model.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(y_train[0]), activation='softmax'))

sgd = SGD(learning_rate=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Train the model on training data

In [23]:
model.fit(X_train, y_train, epochs=100, batch_size=5)

Epoch 1/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 2ms/step - accuracy: 0.2192 - loss: 3.1763
Epoch 2/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.8406 - loss: 0.6029
Epoch 3/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.9324 - loss: 0.2582
Epoch 4/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 2ms/step - accuracy: 0.9553 - loss: 0.1662
Epoch 5/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.9645 - loss: 0.1287
Epoch 6/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.9702 - loss: 0.1076
Epoch 7/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.9731 - loss: 0.0938
Epoch 8/100
[1m14573/14573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.9763 - loss:

<keras.src.callbacks.history.History at 0x78248f469d20>

## Save the model for later use

In [24]:
model.save("medical_assistant_AI_chatbot.keras")