In [2]:
import numpy as np
import nltk
import warnings
warnings.filterwarnings('ignore')
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
import json
import random
import pickle
# from google.colab import files
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation

In [3]:
lemmatizer = WordNetLemmatizer()

#Lists to store vocabulary, labels (intents), and tokenized patterns
vocab = []
labels = []
corpus = []
ignored_tokens = ['?', '!']

uploaded_file = files.upload()

try:
    filename = next(iter(uploaded_file))
    with open(filename, 'r') as f:
        file_content = f.read()
    data = json.loads(file_content)
except Exception as error:
    print(f"Error loading the file: {error}")
    raise

for item in data.get('intents', []):
    for sentence in item.get('patterns', []):
        #Tokenize each pattern(split into individual words)
        tokens = nltk.word_tokenize(sentence)
        vocab.extend(tokens)  #Add tokens to vocabulary list
        corpus.append((tokens, item['tag']))  # Add tokenized sentence with its label

        # Add unique tags(intents) to labels list
        if item['tag'] not in labels:
            labels.append(item['tag'])

#Normalize the vocabulary by lemmatizing,converting to lowercase and removing ignored tokens
vocab = sorted(set([lemmatizer.lemmatize(token.lower()) for token in vocab if token not in ignored_tokens]))

#Sort the labels
labels = sorted(set(labels))

#Output statistics aboutthe processed data
print(f"Processed {len(corpus)} patterns.")
print(f"Identified {len(labels)} unique intents: {labels}")
print(f"Found {len(vocab)} unique vocabulary words: {vocab}")

with open('processed_words.pkl', 'wb') as word_file:
    pickle.dump(vocab, word_file)

with open('processed_labels.pkl', 'wb') as label_file:
    pickle.dump(labels, label_file)

print("Data has been saved into pickle files.")


NameError: name 'files' is not defined

In [115]:
training_data = []
output_template = [0] * len(labels)
#Build the bag-of-words model for each document in the corpus
for sentence, label in corpus:
    word_bag = []
    tokenized_words = [lemmatizer.lemmatize(word.lower()) for word in sentence]

    #Create binary word occurrence representation (bag of words)
    word_bag = [1 if word in tokenized_words else 0 for word in vocab]
    output_row = list(output_template)
    output_row[labels.index(label)] = 1
    training_data.append([word_bag, output_row])

random.shuffle(training_data)
training_data = np.array(training_data, dtype=object)


X_train = np.array([item[0] for item in training_data])  #Input patterns(bag of words)
y_train = np.array([item[1] for item in training_data])  #Target intents(one-hot)
print("Training data is ready.")

#neural network model
model = Sequential()
model.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))  # First dense layer
model.add(Dropout(0.5))  #Regularization with dropout
model.add(Dense(64, activation='relu'))  #Second dense layer
model.add(Dropout(0.5))  #Dropout for overfitting prevention
model.add(Dense(len(y_train[0]), activation='softmax'))  # Output layer with softmax
optimizer = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(np.array(X_train), np.array(y_train), epochs=100, batch_size=5, verbose=1)
model.save('mental_health_bot_model.h5')

Training data is ready.
Epoch 1/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.0283 - loss: 4.3733
Epoch 2/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0398 - loss: 4.3050
Epoch 3/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0622 - loss: 4.1790
Epoch 4/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0451 - loss: 4.0102
Epoch 5/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.1190 - loss: 3.8952
Epoch 6/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0812 - loss: 3.7708
Epoch 7/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1089 - loss: 3.6039
Epoch 8/100
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1381 - loss: 3.3967
Epoch 9/100
[1m47/47[0



In [116]:
#Tokenize and lemmatize the input sentence
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)  # Split sentence into words
    return [lemmatizer.lemmatize(word.lower()) for word in sentence_words]  # Lemmatize and lowercase

#Convert the sentence into a bag-of-words representation
def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)  #Process input sentence
    bag = [0] * len(words)  #Initialize bag of words with 0s
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print(f"Found in bag: {w}")
    return np.array(bag)

#Predict the intent based on the input sentence
def predict_class(sentence, histogram):
    p = bow(sentence, words, show_details=False)  #Get bag of words
    res = model.predict(np.array([p]))[0]  #Predict intent probability
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    #Sort results by probability
    results.sort(key=lambda x: x[1], reverse=True)
    #Convert results to list of intents with probabilities
    return [{"intent": classes[r[0]], "probability": str(r[1])} for r in results]

#Generate a response based on predicted intent
def getResponse(ints, intents_json):
    if not ints:
        return "I'm sorry, I don't understand. Could you rephrase that?"

    tag = ints[0]['intent']  #Get the top predicted intent
    for i in intents_json['intents']:
        if i['tag'] == tag:
            return random.choice(i['responses'])  #Return random response for the intent

    return "I'm sorry, I don't understand. Could you rephrase that?"

def respond(message):
    ints = predict_class(message, model)
    return getResponse(ints, intents)


In [117]:
#Create a Gradio Interface
iface = gr.Interface(
    fn=respond,
    inputs=gr.Textbox(
        lines=2,
        placeholder="Type your message here...",
        label="Your Message"
    ),
    outputs=gr.Textbox(
        label="Bot's Response"
    ),
    title="🧠 Mental Health Chatbot",
    description="Interact with our mental health chatbot. Type your message and get a response!",

)

iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://5e6d095b01047a2565.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


