# About this Notebook
This notebook (`model.ipynb`) uses a step-by-step approach for model building and training, suitable for experimentation and manual tuning. It is ideal for users who want to understand and modify each stage of the model pipeline.

In [1]:
import random
import json
import numpy as np

In [2]:
intents = json.loads(open("intents.json").read())

In [3]:
intents

{'intents': [{'tag': 'greetings',
   'patterns': ['Hello', 'Hey', 'Hi', 'Good Day', 'Greetings'],
   'responses': ['Hello!', 'Hey', 'What Can I do for you?']},
  {'tag': 'Acne',
   'patterns': ['blackheads',
    'pimples',
    'face pimples',
    'neck pimples',
    'shoulder pimples'],
   'responses': ['According to your symptoms you might have Acne',
    'You have Acne',
    'I suppose you have Acne']},
  {'tag': 'Attention Deficit Disorder (ADHD)',
   'patterns': ['impulsivity',
    'hyperactivity',
    'lack of focus',
    'messy works',
    'restlessness'],
   'responses': ['According to your symptoms you might have Attention Deficit Disorder',
    'You have Attention Deficit Disorder',
    'I suppose you have Attention Deficit Disorder']},
  {'tag': 'Allergies',
   'patterns': ['sneezing',
    'itchy eyes',
    'coughing',
    'watery eyes',
    'itchy nose'],
   'responses': ['According to your symptoms you might have Allergies',
    'You have Allergies',
    'I suppose you have

In [4]:
import sys
!"{sys.executable}" -m pip install nltk



In [5]:
import nltk
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to C:\Users\Ankit
[nltk_data]    |     Regmi\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to C:\Users\Ankit
[nltk_data]    |     Regmi\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\Ankit Regmi\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     C:\Users\Ankit Regmi\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_eng is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\Ankit Regmi\

True

In [6]:
# from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

words = []
classes = []
documents = []
ignore_letters = ["?", "!", ".", ","]

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        word_list = [lemmatizer.lemmatize(word.lower()) for word in word_list if word not in ignore_letters]
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
    # if intent["tag"] not in classes:
    classes.append(intent["tag"])

In [7]:
classes

['greetings',
 'Acne',
 'Attention Deficit Disorder (ADHD)',
 'Allergies',
 'Alzheimers',
 'Ankylosing Spondylitis',
 'Anorexia',
 'Anxiety or Panic Disorder',
 'Arthritis',
 'Antitrypsin Deficiency',
 'Abdominal Migraine',
 'Amyloidosis',
 'Abscessed Tooth',
 'Amenorrhea',
 'Absence Seizure',
 'Sleep Apnea',
 'Achalasia',
 'Appendicitis',
 'Aparaxia of Speech',
 'Arachnoiditis',
 'Acute Respiratory Distress Syndrome (ARDS)',
 'Anemia',
 'Arrhythmia',
 'Asthma',
 'Autism',
 'Blepharitis',
 'Bacterial Arthritis',
 'Bacterial Meningitis',
 'Benign Prostate Enlargement(BPE)',
 'Bipolar Disorder',
 'Bronchiectasis',
 'Bronchitis',
 'Bulimia',
 'Catarrh',
 'Cellulitis',
 'Chest Infection',
 'Chest Pain',
 'Chronic Fatigue Syndrome',
 'Common Cold',
 'Conjunctivitis',
 'Dehydration',
 'Dental Abscess',
 'Depression',
 'Diabetes',
 'Diarrhoea',
 'Dystonia',
 'Epilepsy',
 'Escherichia Coli',
 'Flu',
 'Fetal alcohol spectrum disorder',
 'Food poisoning',
 'Fungal nail infection',
 'Gallbladder 

In [8]:
# def get_wordnet_pos(word):
#     """Map POS tag to first character for lemmatizer"""
#     from nltk import pos_tag
#     tag = pos_tag([word])[0][1][0].upper()
#     tag_dict = {"J": wordnet.ADJ,
#                 "N": wordnet.NOUN,
#                 "V": wordnet.VERB,
#                 "R": wordnet.ADV}
#     return tag_dict.get(tag, wordnet.NOUN)

In [9]:
words = sorted(set(words))
classes = sorted(set(classes))

In [10]:
documents

[(['hello'], 'greetings'),
 (['hey'], 'greetings'),
 (['hi'], 'greetings'),
 (['good', 'day'], 'greetings'),
 (['greeting'], 'greetings'),
 (['blackhead'], 'Acne'),
 (['pimple'], 'Acne'),
 (['face', 'pimple'], 'Acne'),
 (['neck', 'pimple'], 'Acne'),
 (['shoulder', 'pimple'], 'Acne'),
 (['impulsivity'], 'Attention Deficit Disorder (ADHD)'),
 (['hyperactivity'], 'Attention Deficit Disorder (ADHD)'),
 (['lack', 'of', 'focus'], 'Attention Deficit Disorder (ADHD)'),
 (['messy', 'work'], 'Attention Deficit Disorder (ADHD)'),
 (['restlessness'], 'Attention Deficit Disorder (ADHD)'),
 (['sneezing'], 'Allergies'),
 (['itchy', 'eye'], 'Allergies'),
 (['coughing'], 'Allergies'),
 (['watery', 'eye'], 'Allergies'),
 (['itchy', 'nose'], 'Allergies'),
 (['forgetful'], 'Alzheimers'),
 (['confusion', 'with', 'time'], 'Alzheimers'),
 (['poor', 'judgement'], 'Alzheimers'),
 (['loss', 'of', 'appetite'], 'Ankylosing Spondylitis'),
 (['weight', 'loss'], 'Ankylosing Spondylitis'),
 (['difficulty', 'walking']

In [11]:
import pickle

In [12]:
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [13]:
X, y = [], []
for word_patterns, tag in documents:
    bag = [1 if w in word_patterns else 0 for w in words]

    onehot = [0]*len(classes)
    onehot[classes.index(tag)] = 1

    X.append(bag)
    y.append(onehot)

X = np.array(X, dtype=np.float32)             
y = np.array(y, dtype=np.float32)             

idx = np.arange(len(X))
np.random.shuffle(idx)
train_x = X[idx]
train_y = y[idx]

print(train_x.shape, train_y.shape)

(551, 497) (551, 98)


In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

In [15]:
model = Sequential()
model.add(Dense(256, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

model.save("chatbot_model.keras", hist)
print("Done!")

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0109 - loss: 4.5904 
Epoch 2/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0163 - loss: 4.5516 
Epoch 3/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0327 - loss: 4.5119 
Epoch 4/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0290 - loss: 4.4802 
Epoch 5/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0417 - loss: 4.4365 
Epoch 6/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0490 - loss: 4.3805 
Epoch 7/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0672 - loss: 4.3186 
Epoch 8/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0780 - loss: 4.2308 
Epoch 9/200
[1m111/111[0m [32m━━━