In [1]:
!pip install spacy transformers nltk tflearn
!python -m spacy download en_core_web_sm

Collecting tflearn
  Downloading tflearn-0.5.0.tar.gz (107 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.3/107.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: tflearn
  Building wheel for tflearn (setup.py) ... [?25l[?25hdone
  Created wheel for tflearn: filename=tflearn-0.5.0-py3-none-any.whl size=127283 sha256=61244d9fd3e8e1fd9f4896b142b07d310bdeb52ba3fb17620f6864847f8caeb2
  Stored in directory: /root/.cache/pip/wheels/55/fb/7b/e06204a0ceefa45443930b9a250cb5ebe31def0e4e8245a465
Successfully built tflearn
Installing collected packages: tflearn
Successfully installed tflearn-0.5.0
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m41.1 MB/s[0m eta [36

In [2]:
import spacy
import nltk
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import word_tokenize
import numpy as np
import random
import json
import logging
import torch
import pandas as pd
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer

**Loading all the content of the intents json file**

In [3]:
# Load SpaCy English model
nlp = spacy.load('en_core_web_sm')

with open('/content/intents.json') as intents:
    data = json.load(intents)

stemmer = LancasterStemmer()

In [4]:
# Setup logging configuration for tracking
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
)

# Some Analysis

In [5]:
import json

with open('/content/intents.json', 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data['intents'])
df

Unnamed: 0,tag,patterns,responses
0,greeting,"[Hi, Hey, Is anyone there?, Hi there, Hello, H...",[Hello there. Tell me how are you feeling toda...
1,morning,[Good morning],[Good morning. I hope you had a good night's s...
2,afternoon,[Good afternoon],[Good afternoon. How is your day going?]
3,evening,[Good evening],[Good evening. How has your day been?]
4,night,[Good night],"[Good night. Get some proper sleep, Good night..."
...,...,...,...
75,fact-28,[What do I do if I'm worried about my mental h...,[The most important thing is to talk to someon...
76,fact-29,[How do I know if I'm unwell?],"[If your beliefs , thoughts , feelings or beha..."
77,fact-30,[How can I maintain social connections? What i...,"[A lot of people are alone right now, but we d..."
78,fact-31,[What's the difference between anxiety and str...,[Stress and anxiety are often used interchange...


In [6]:
df['tag'].unique()

array(['greeting', 'morning', 'afternoon', 'evening', 'night', 'goodbye',
       'thanks', 'no-response', 'neutral-response', 'about', 'skill',
       'creation', 'name', 'help', 'sad', 'stressed', 'worthless',
       'depressed', 'happy', 'casual', 'anxious', 'not-talking', 'sleep',
       'scared', 'death', 'understand', 'done', 'suicide', 'hate-you',
       'hate-me', 'default', 'jokes', 'repeat', 'wrong', 'stupid',
       'location', 'something-else', 'friends', 'ask', 'problem',
       'no-approach', 'learn-more', 'user-agree', 'meditation',
       'user-meditation', 'pandora-useful', 'user-advice',
       'learn-mental-health', 'mental-health-fact', 'fact-1', 'fact-2',
       'fact-3', 'fact-5', 'fact-6', 'fact-7', 'fact-8', 'fact-9',
       'fact-10', 'fact-11', 'fact-12', 'fact-13', 'fact-14', 'fact-15',
       'fact-16', 'fact-17', 'fact-18', 'fact-19', 'fact-20', 'fact-21',
       'fact-22', 'fact-23', 'fact-24', 'fact-25', 'fact-26', 'fact-27',
       'fact-28', 'fact-29', '

**Extracting all the information from the json file and applying preprocessing techniqes**

In [18]:
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    # entities = [ent.text for ent in doc.ents]  # Named entities
    return tokens

In [19]:
# Extract data from the JSON
all_words = []
all_labels = []
x_words = []
y_words = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        # Tokenizing and normalizing using SpaCy
        wrds = preprocess_text(pattern)
        all_words.extend(wrds)
        x_words.append(wrds)
        y_words.append(intent['tag'])

        if intent['tag'] not in all_labels:
            all_labels.append(intent['tag'])

**Sreaming the data and Removing all the duplicates**

In [20]:
# Removing duplicates and sorting
all_words = sorted(set(all_words))
all_labels = sorted(all_labels)

**Preparing the training data by using One hot encoding**

In [21]:
# Prepare training data
training = []
output = []
out_empty = [0] * len(all_labels)

for i, doc in enumerate(x_words):
    word_bag = [1 if word in doc else 0 for word in all_words]
    final_output = out_empty[:]
    final_output[all_labels.index(y_words[i])] = 1

    training.append(word_bag)
    output.append(final_output)

training = np.array(training)
output = np.array(output)


# **Let's Create our Model**

In [44]:
# Print shapes for debugging
print("Training shape: ", training.shape)
print("Output shape: ", output.shape)

# Create a TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices((training, output))
dataset = dataset.shuffle(buffer_size=len(training)).batch(8)

# Build a new model
model = keras.Sequential([
    layers.Input(shape=(training.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),  # Regularization to prevent overfitting
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(output[0]), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Use EarlyStopping to prevent overfitting
early_stopping = keras.callbacks.EarlyStopping(monitor='loss', patience=10)

# Train the model
try:
    model.fit(dataset, epochs=500, callbacks=[early_stopping], verbose=1)
except Exception as e:
    print("An error occurred during training:", e)

# Save the model
model.save('model.h5')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Training shape:  (232, 279)
Output shape:  (232, 80)
Epoch 1/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.0177 - loss: 4.4136
Epoch 2/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0240 - loss: 4.3611
Epoch 3/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0452 - loss: 4.2822
Epoch 4/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0867 - loss: 4.1871
Epoch 5/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0632 - loss: 4.0889
Epoch 6/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0836 - loss: 3.9815
Epoch 7/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0635 - loss: 3.8744
Epoch 8/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1476 - loss: 3



In [48]:
# Define a function to predict the intent based on user input
def predict_intent(user_input):
    # Tokenize and stem the input
    tokens = nltk.word_tokenize(user_input)
    tokens = [stemmer.stem(w.lower()) for w in tokens]

    # Create a bag of words
    bag = [0] * len(all_words)
    for w in tokens:
        for i, word in enumerate(all_words):
            if word == w:
                bag[i] = 1

    # Predict the intent
    prediction = model.predict(np.array([bag]))[0]
    intent_index = np.argmax(prediction)
    return all_labels[intent_index], prediction[intent_index]

In [49]:
# Define a function to get a random response based on the intent
def get_response(intent):
    for i in data['intents']:
        if i['tag'] == intent:
            return random.choice(i['responses'])



In [50]:
# Main loop for the chatbot
def chatbot():
    print("Chatbot is ready! Type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break

        intent, confidence = predict_intent(user_input)

        # Use confidence threshold to determine if the response should be generated
        if confidence > 0.7:  # Adjust the threshold based on your needs
            response = get_response(intent)
            print(f"Chatbot: {response}")
        else:
            print("Chatbot: I'm sorry, I don't understand that.")

In [52]:
# Start the chatbot
chatbot()

Chatbot is ready! Type 'exit' to quit.
You: Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Chatbot: Hello there. Tell me how are you feeling today?
You: I want to know more about mental health
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Chatbot: Oh that's really great. I'd be willing to answer anything that I know about it.
You: what is mental health
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Chatbot: Mental health includes our emotional, psychological, and social well-being. It affects how we think, feel, and act. It also helps determine how we handle stress, relate to others, and make choices.
You: thank you
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Chatbot: You're most welcome!
You: exit
Chatbot: Goodbye!


# Prompt Engineering

In [27]:
def generate_healthcare_response(conversation_history, user_input, max_length=100, temperature=0.6, top_k=40, top_p=0.85):
    """
    This function generates a response to a user's query about mental health or healthcare,
    applying empathy and providing informative, supportive answers.
    """
    try:
        # Define custom healthcare-related prompt
        prompt = (
            "You are a supportive and empathetic mental health assistant. Respond to the user's concerns with "
            "care, providing accurate and helpful information about mental health and healthcare."
        )

        # Concatenate previous conversation for context and user input
        prompt += " ".join(conversation_history) + f" User: {user_input} Bot:"

        # Encode the prompt
        input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

        # Generate response using top-k and top-p sampling
        with torch.no_grad():
            output_ids = model.generate(
                input_ids,
                max_length=max_length,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                num_return_sequences=1,
                do_sample=True,
                early_stopping=True,
            )

        # Decode the generated response
        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        # Extract bot's part of the response
        bot_response = response.split("Bot:")[-1].strip()

        logging.info(f"Healthcare response generated successfully: {bot_response}")
        return bot_response

    except Exception as e:
        logging.error(f"Error generating healthcare response: {str(e)}")
        return "Sorry, I encountered an error while generating a healthcare response."

In [28]:
# Example usage
conversation_history = [
    "User: I've been feeling very anxious lately.",
    "Bot: I'm sorry to hear that you're feeling anxious. Anxiety can be tough to manage. "
         "Would you like to talk about what might be causing these feelings?"
]
user_input = "Yes, I feel like I can't control my thoughts sometimes."

In [29]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

Thank you so much for your kind words. I think I'm going to talk to my doctor about it.


In [30]:
conversation_history = [
    "User: I've been feeling really low for the past few weeks.",
    "Bot: It's tough to feel low for an extended period. Have there been specific triggers for these feelings?"
]
user_input = "Yes, I lost my job recently, and it's been hard to cope."


In [31]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I'm so sorry to hear that. I hope you are able to find a new job soon.


In [32]:
conversation_history = [
    "User: I've been experiencing a lot of stress with my studies.",
    "Bot: School can be overwhelming at times. What specific aspects of your studies are causing you stress?"
]
user_input = "I have so many deadlines, and I feel like I can't keep up."


In [33]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I am studying to be a psychologist and I am overwhelmed with the work I am doing.


In [34]:
conversation_history = [
    "User: I feel really lonely sometimes.",
    "Bot: Loneliness can be very difficult to cope with. Do you want to talk about what contributes to these feelings?"
]
user_input = "I haven't been able to see my friends lately due to the pandemic."


In [35]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I am sorry to hear that. I know how lonely it can be to be by yourself.


In [36]:
conversation_history = [
    "User: I can't seem to shake off my feelings of sadness.",
    "Bot: I'm sorry to hear that you're feeling this way. It's important to talk about our feelings. What do you think is causing this sadness?"
]
user_input = "I feel like I'm not accomplishing anything in my life."


In [37]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I don't know. I think it's a combination of genetic and environmental factors.


In [38]:
conversation_history = [
    "User: I've been having panic attacks lately.",
    "Bot: Panic attacks can be very frightening. I'm here to listen. When do you usually experience them?"
]
user_input = "They usually happen when I'm in crowded places."

In [39]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I get them when I am in a crowded place too. I try to avoid them as much as I can.


In [40]:
conversation_history = [
    "User: I'm feeling overwhelmed with everything happening around me.",
    "Bot: It's easy to feel overwhelmed with so much going on. Can you pinpoint any specific events or situations that are weighing on you?"
]
user_input = "There's a lot of negativity in the news lately, and it's hard to escape it."

In [41]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I can't think of any specific things right now, but I do know that mental health is a very important issue.


In [42]:
conversation_history = [
    "User: I've been struggling to sleep at night.",
    "Bot: Sleep issues can significantly affect our mental health. What do you think is keeping you awake?"
]
user_input = "I keep replaying my day in my head and worrying about tomorrow."


In [43]:
# Generating a healthcare-related response
response = generate_healthcare_response(conversation_history, user_input)
print(response)

I'm not sure. I think I just need more sleep. It's hard for me to fall asleep.
