In [1]:
# import the required libraries 
import json
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# Load the dataset
with open('projectone.json') as file:
    data = json.load(file)

In [4]:
# Extract patterns and tags
patterns = []
tags = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        tags.append(intent['tag'])

# Print sample patterns and tags to verify data
print(f"Sample patterns: {patterns[:5]}")
print(f"Sample tags: {tags[:5]}")

Sample patterns: ['Hi', 'Hey', 'Is anyone there?', 'Hi there', 'Hello']
Sample tags: ['greeting', 'greeting', 'greeting', 'greeting', 'greeting']


In [5]:
# Encode the tags
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(tags)

# Print label classes to verify encoding
print(f"Classes: {label_encoder.classes_}")

Classes: ['What are the types of depression?' 'about' 'afternoon' 'anxious' 'ask'
 'at what age does anxiety peak?' 'breathing_exercises'
 'can lack of sleep make you feel sad?'
 'can low blood sugar cause suicidal thoughts?' 'casual' 'coping_anxiety'
 'coping_depression' 'coping_stress' 'creation' 'death' 'default'
 'definition_anxiety' 'definition_depression' 'definition_stress'
 'depressed' 'do we control our thoughts?'
 'does oversleeping cause depression?' 'done' 'emergency_help' 'evening'
 'exercise' 'fact-1' 'fact-10' 'fact-11' 'fact-12' 'fact-13' 'fact-14'
 'fact-15' 'fact-16' 'fact-17' 'fact-18' 'fact-19' 'fact-2' 'fact-20'
 'fact-21' 'fact-22' 'fact-23' 'fact-24' 'fact-25' 'fact-26' 'fact-27'
 'fact-28' 'fact-29' 'fact-3' 'fact-30' 'fact-31' 'fact-32' 'fact-5'
 'fact-6' 'fact-7' 'fact-8' 'fact-9' 'friends' 'goodbye' 'greeting'
 'happy' 'hate-me' 'hate-you' 'help' 'how can we reduce anxiety?'
 'how does depression affect the world?' 'how long can anxiety last?'
 'how many thou

In [6]:
# Tokenize the patterns
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(patterns)
sequences = tokenizer.texts_to_sequences(patterns)

# Print sample sequences to verify tokenization
print(f"Sample sequences: {sequences[:5]}")

Sample sequences: [[203], [204], [14, 188, 83], [203, 83], [366]]


In [7]:
padded_sequences = pad_sequences(sequences, padding='post', maxlen=20)  # Adjust maxlen if needed

# Print sample padded sequences to verify padding
print(f"Sample padded sequences: {padded_sequences[:5]}")

Sample padded sequences: [[203   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [204   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [ 14 188  83   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [203  83   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [366   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]]


In [8]:
# Get the vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# Get the number of classes
num_classes = len(label_encoder.classes_)

In [9]:
# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, 32, input_length=padded_sequences.shape[1]),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()



In [10]:
# Split the data into training and validation sets
training_size = int(len(padded_sequences) * 0.8)
x_train = padded_sequences[:training_size]
y_train = labels[:training_size]
x_val = padded_sequences[training_size:]
y_val = labels[training_size:]

In [11]:
# Print shapes of training and validation sets to verify
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_val shape: {x_val.shape}")
print(f"y_val shape: {y_val.shape}")

x_train shape: (482, 20)
y_train shape: (482,)
x_val shape: (121, 20)
y_val shape: (121,)


In [12]:
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [14]:
# Train the model
history = model.fit(x_train, y_train, epochs=200, batch_size=16, validation_data=(x_val, y_val), callbacks=[early_stopping])

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(x_val, y_val, verbose=2)
print(f"Validation accuracy: {val_accuracy * 100:.2f}%")
# print(f"Validation loss: {val_loss:.2f}")

Epoch 1/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - accuracy: 0.8742 - loss: 0.4547 - val_accuracy: 0.8512 - val_loss: 2.1590
Epoch 2/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.8149 - loss: 0.5254 - val_accuracy: 0.8430 - val_loss: 2.1653
Epoch 3/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step - accuracy: 0.8913 - loss: 0.3521 - val_accuracy: 0.8430 - val_loss: 2.1948
Epoch 4/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - accuracy: 0.8948 - loss: 0.3821 - val_accuracy: 0.8595 - val_loss: 2.1715
Epoch 5/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.8739 - loss: 0.4272 - val_accuracy: 0.8347 - val_loss: 2.2002
Epoch 6/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.8955 - loss: 0.4223 - val_accuracy: 0.8430 - val_loss: 2.1973
Epoch 7/200
[1m31/31[0m [

Chatbot Running Section

In [15]:
# Function to preprocess user input
def preprocess_input(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=20, padding='post')
    return padded_sequence

In [16]:
# Function to generate chatbot response
def generate_response(text):
    padded_sequence = preprocess_input(text)
    prediction = model.predict(padded_sequence)
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    confidence = np.max(prediction)

    # Set a confidence threshold
    confidence_threshold = 0.6

    if confidence < confidence_threshold:
        return "I'm sorry, I didn't understand that. Could you please rephrase?"
    else:
        for intent in data['intents']:
            if intent['tag'] == predicted_label:
                return np.random.choice(intent['responses'])

In [None]:
# Run the chatbot
print("Start chatting with mental health chatbot (type 'quit' to stop)!")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = generate_response(user_input)
    print(f"Bot: {response}")

Start chatting with mental health chatbot (type 'quit' to stop)!


You:  Hello


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Bot: Hello there. Glad to see you're back. What's going on in your world right now?


You:  who are you?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Bot: I'm HopeSoon!
