In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Embedding, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.text import Tokenizer

# Load and Preprocess Data
filename = "mental_H.txt"
with open(filename, "r", encoding="utf-8", errors="ignore") as file:
    raw_text = file.read().lower()
import re

# Remove unnecessary characters and normalize text
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = text.strip().lower()  # Convert to lowercase
    return text

raw_text = clean_text(raw_text)

# Tokenization
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts([raw_text])
sequences = tokenizer.texts_to_sequences([raw_text])[0]

vocab_size = len(tokenizer.word_index) + 1
seq_length = 100

# Create Input-Output Pairs
X = []
y = []
for i in range(0, len(sequences) - seq_length):
    X.append(sequences[i:i + seq_length])
    y.append(sequences[i + seq_length])

X = np.array(X)
y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)

# Reshape for LSTM Input
X = np.reshape(X, (X.shape[0], X.shape[1]))

# Model Definition
model = Sequential([
    Embedding(vocab_size, 256, input_length=seq_length),
    Bidirectional(LSTM(256, return_sequences=True)),
    Dropout(0.3),
    BatchNormalization(),
    Bidirectional(LSTM(256)),
    Dropout(0.3),
    Dense(vocab_size, activation="softmax")
])

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Model Summary


# Checkpointing


# Load Best Weights



In [3]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 256)          10240     
                                                                 
 bidirectional (Bidirectiona  (None, 100, 512)         1050624   
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 100, 512)          0         
                                                                 
 batch_normalization (BatchN  (None, 100, 512)         2048      
 ormalization)                                                   
                                                                 
 bidirectional_1 (Bidirectio  (None, 512)              1574912   
 nal)                                                            
                                                        

In [None]:
checkpoint = ModelCheckpoint("weights-best.hdf5", monitor="loss", save_best_only=True, verbose=1)
callbacks = [checkpoint]

# Train the Model
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks)

Epoch 1/50
Epoch 1: loss improved from inf to 1.67616, saving model to weights-best.hdf5
Epoch 2/50
Epoch 2: loss improved from 1.67616 to 1.32765, saving model to weights-best.hdf5
Epoch 3/50
Epoch 3: loss improved from 1.32765 to 1.22855, saving model to weights-best.hdf5
Epoch 4/50
Epoch 4: loss improved from 1.22855 to 1.17248, saving model to weights-best.hdf5
Epoch 5/50
Epoch 5: loss improved from 1.17248 to 1.13155, saving model to weights-best.hdf5
Epoch 6/50
Epoch 6: loss improved from 1.13155 to 1.10154, saving model to weights-best.hdf5
Epoch 7/50
Epoch 7: loss improved from 1.10154 to 1.07651, saving model to weights-best.hdf5
Epoch 8/50
Epoch 8: loss improved from 1.07651 to 1.05237, saving model to weights-best.hdf5
Epoch 9/50
Epoch 9: loss improved from 1.05237 to 1.03438, saving model to weights-best.hdf5
Epoch 10/50
Epoch 10: loss improved from 1.03438 to 1.01703, saving model to weights-best.hdf5
Epoch 11/50
Epoch 11: loss improved from 1.01703 to 1.00098, saving mode

In [4]:
model.load_weights("weights-best.hdf5")

# Generate Text
seed_idx = np.random.randint(0, len(X) - 1)
seed_sequence = X[seed_idx]

output = []
for _ in range(1000):  # Generate 1000 characters
    pred_input = np.reshape(seed_sequence, (1, len(seed_sequence)))
    pred_probs = model.predict(pred_input, verbose=0)
    next_idx = np.argmax(pred_probs)
    output.append(tokenizer.index_word[next_idx])

    # Update seed sequence
    seed_sequence = np.append(seed_sequence[1:], next_idx)

print("Generated Text:")
print("".join(output))


KeyboardInterrupt



In [6]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the trained model
model.load_weights("weights-best.hdf5")

# Define the knowledge base
with open('mental_h.txt', 'r', encoding='utf-8') as file:
    knowledge_base = file.read()

# Extract knowledge base sections
def find_relevant_info(user_input, knowledge_text):
    vectorizer = TfidfVectorizer().fit_transform([knowledge_text, user_input])
    similarity = vectorizer.toarray().dot(vectorizer.toarray().T)[0, 1]
    if similarity > 0.1:
        # Extract sentences with relevance
        return '\n'.join([sentence for sentence in knowledge_text.splitlines() if user_input.lower() in sentence.lower()])
    return "I don't have specific information about that. Let's explore general advice."

# Generate a creative response
def generate_response(user_input, tokenizer, model, max_sequence_length, output_length=100):
    input_sequence = tokenizer.texts_to_sequences([user_input])
    input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='pre')

    output = []
    for _ in range(output_length):
        pred_probs = model.predict(input_sequence, verbose=0)
        next_idx = np.argmax(pred_probs)
        output.append(tokenizer.index_word.get(next_idx, ""))

        input_sequence = np.append(input_sequence[0][1:], next_idx).reshape(1, max_sequence_length)

    return "".join(output)

# Example usage
user_input = input("You: ")
relevant_info = find_relevant_info(user_input, knowledge_base)
creative_response = generate_response(user_input, tokenizer, model, max_sequence_length=100, output_length=100)

response = f"Here is some information related to your query:\n{relevant_info}\n\nChatbot: {creative_response}"
print(response)


You:  stress


Chatbot: onsultmstatements 4 american psychiatric association and substance use disorders and adhd in the pre


In [4]:
model.load_weights("weights-best.hdf5")

# Generate Text
seed_idx = np.random.randint(0, len(X) - 1)
seed_sequence = X[seed_idx]

output = []
temperature = 1.0  # Lower values make text more deterministic; higher values make it more diverse.

for i in range(2000):  # Generate 1000 characters
    if i<1500:
        pred_input = np.reshape(seed_sequence, (1, len(seed_sequence)))
        pred_probs = model.predict(pred_input, verbose=0)
        next_idx = np.argmax(pred_probs)
        output.append(tokenizer.index_word[next_idx])

    # Update seed sequence
        seed_sequence = np.append(seed_sequence[1:], next_idx)
        
    else:
        if seed_idx == ".":
            break
        else:
            pred_input = np.reshape(seed_sequence, (1, len(seed_sequence)))
            pred_probs = model.predict(pred_input, verbose=0)
            next_idx = np.argmax(pred_probs)
            output.append(tokenizer.index_word[next_idx])

            # Update seed sequence
            seed_sequence = np.append(seed_sequence[1:], next_idx)
            
        

print("Generated Text:")
print("".join(output)+".")

Generated Text:
e mental health professionals and support to seek treatment options and can help a person’s mental health is a basic and include problems and support to more than the stressors are also present and prevent mental health at work is a loss of person continuity of suicide and substance use disorders in adults with mental health conditions and psychosocial disasters and suicide and well-being.

the development of mental health conditions in the brain that are several family, and individuals may feel an important to recommends person to mental illnesses and the professional will recommend that a person’s behavior and support workers with bipolar disorder (adhd) is a common mental health conditions are more likely to experience personal activity and antidepressant medications and an individual with a mental health professionals and support to a depressive episode and depression in the brain that they are more likely to develop better services and support to provide support to