# About this Notebook
This notebook (`training.ipynb`) provides a streamlined workflow for training, focusing on automation and reproducibility. It is best for quickly retraining the model with updated data or parameters.

In [1]:
import json 
import numpy as np

In [2]:
intents = json.loads(open("intents.json").read())

In [3]:
intents

{'intents': [{'tag': 'greetings',
   'patterns': ['Hello', 'Hey', 'Hi', 'Good Day', 'Greetings'],
   'responses': ['Hello!', 'Hey', 'What Can I do for you?']},
  {'tag': 'Acne',
   'patterns': ['blackheads',
    'pimples',
    'face pimples',
    'neck pimples',
    'shoulder pimples'],
   'responses': ['According to your symptoms you might have Acne',
    'You have Acne',
    'I suppose you have Acne']},
  {'tag': 'Attention Deficit Disorder (ADHD)',
   'patterns': ['impulsivity',
    'hyperactivity',
    'lack of focus',
    'messy works',
    'restlessness'],
   'responses': ['According to your symptoms you might have Attention Deficit Disorder',
    'You have Attention Deficit Disorder',
    'I suppose you have Attention Deficit Disorder']},
  {'tag': 'Allergies',
   'patterns': ['sneezing',
    'itchy eyes',
    'coughing',
    'watery eyes',
    'itchy nose'],
   'responses': ['According to your symptoms you might have Allergies',
    'You have Allergies',
    'I suppose you have

In [4]:
patterns = [p for intent in intents["intents"] for p in intent['patterns']]
tags = [intent['tag'] for intent in intents["intents"] for p in intent['patterns']]

In [5]:
tags

['greetings',
 'greetings',
 'greetings',
 'greetings',
 'greetings',
 'Acne',
 'Acne',
 'Acne',
 'Acne',
 'Acne',
 'Attention Deficit Disorder (ADHD)',
 'Attention Deficit Disorder (ADHD)',
 'Attention Deficit Disorder (ADHD)',
 'Attention Deficit Disorder (ADHD)',
 'Attention Deficit Disorder (ADHD)',
 'Allergies',
 'Allergies',
 'Allergies',
 'Allergies',
 'Allergies',
 'Alzheimers',
 'Alzheimers',
 'Alzheimers',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Ankylosing Spondylitis',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anorexia',
 'Anxiety or Panic Disorder',
 'Anxiety or Panic Disorder',
 'Anxiety or Panic Disorder',
 'Anxiety or Panic Disorder',
 'Anxiety or Panic Disorder',
 'Anxiety or Pan

In [6]:
from text_utils import tokenize_and_lemma

In [7]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(binary=True, tokenizer=tokenize_and_lemma, lowercase=True)
X = vectorizer.fit_transform(patterns).toarray()



In [8]:
X

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_temp = le.fit_transform(tags)

In [10]:
from tensorflow.keras.utils import to_categorical
y = to_categorical(y_temp)

In [11]:
y

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD

In [13]:
model = Sequential([
    Dense(256, input_shape=(X.shape[1],), activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(y.shape[1], activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit(X, y, epochs=200, batch_size=5, verbose=1)

Epoch 1/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0109 - loss: 4.5821   
Epoch 2/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0236 - loss: 4.5406 
Epoch 3/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0272 - loss: 4.4934 
Epoch 4/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0309 - loss: 4.4738 
Epoch 5/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0381 - loss: 4.4287 
Epoch 6/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0417 - loss: 4.3607 
Epoch 7/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0708 - loss: 4.2846
Epoch 8/200
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0835 - loss: 4.1818 
Epoch 9/200
[1m111/111

<keras.src.callbacks.history.History at 0x1a699388fa0>

In [15]:
import joblib

model.save("chatbot_model.keras")
joblib.dump(vectorizer, "vectorizer.joblib")   
joblib.dump(le, "label_encoder.joblib");