In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Embedding, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.text import Tokenizer

# Load and Preprocess Data
filename = "mental_H.txt"
with open(filename, "r", encoding="utf-8", errors="ignore") as file:
    raw_text = file.read().lower()
import re

# Remove unnecessary characters and normalize text
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = text.strip().lower()  # Convert to lowercase
    return text

raw_text = clean_text(raw_text)

# Tokenization
##
tokenizer = Tokenizer(num_words=5000)  # Limit vocab size to 5000
tokenizer.fit_on_texts([raw_text])
##
sequences = tokenizer.texts_to_sequences([raw_text])[0]

vocab_size = len(tokenizer.word_index) + 1

##
seq_length = 150

# Create Input-Output Pairs
X = []
y = []
for i in range(0, len(sequences) - seq_length):
    X.append(sequences[i:i + seq_length])
    y.append(sequences[i + seq_length])

X = np.array(X)
y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)

# Reshape for LSTM Input
X = np.reshape(X, (X.shape[0], X.shape[1]))

# Model Definition
##
model = Sequential([
    Embedding(vocab_size, 256, input_length=seq_length),
    Bidirectional(LSTM(256, return_sequences=True)),
    Dropout(0.3),
    BatchNormalization(),
    Bidirectional(LSTM(256)),
    Dropout(0.3),
    Dense(vocab_size, activation="softmax")
])


model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])


In [3]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 150, 256)          1294592   
                                                                 
 bidirectional (Bidirection  (None, 150, 512)          1050624   
 al)                                                             
                                                                 
 dropout (Dropout)           (None, 150, 512)          0         
                                                                 
 batch_normalization (Batch  (None, 150, 512)          2048      
 Normalization)                                                  
                                                                 
 bidirectional_1 (Bidirecti  (None, 512)               1574912   
 onal)                                                           
                                                        

In [4]:
checkpoint = ModelCheckpoint("weights-best1.hdf5", monitor="loss", save_best_only=True, verbose=1)
callbacks = [checkpoint]

# Train the Model
model.fit(X, y, epochs=100, batch_size=64, callbacks=callbacks)

Epoch 1/100
  1/675 [..............................] - ETA: 34:35 - loss: 6.8948 - accuracy: 0.0000e+00

KeyboardInterrupt: 

In [4]:
model.load_weights("weights-best1.hdf5")

# Generate Text
seed_idx = np.random.randint(0, len(X) - 1)
seed_sequence = X[seed_idx]

output = []
for _ in range(1000):  # Generate 1000 characters
    pred_input = np.reshape(seed_sequence, (1, len(seed_sequence)))
    pred_probs = model.predict(pred_input, verbose=0)
    next_idx = np.argmax(pred_probs)
    output.append(tokenizer.index_word[next_idx])

    # Update seed sequence
    seed_sequence = np.append(seed_sequence[1:], next_idx)

print("Generated Text:")
print(" ".join(output))

Generated Text:
assess and improve quality of care and human rights conditions should be established to protect against inhuman and degrading treatment poor living conditions and involuntary admission and treatment people should be able to file complaints in cases of human rights violations replace psychiatric institutions with community care large institutions which are often associated with human rights violations should be replaced by community mental health services backed by care in general hospital and home care support mental health services need to link to services and supports in the community enabling people living with mental health conditions to access educational employment social service and housing opportunities on an equal basis with others change attitudes and raise awareness ministries of health health professionals civil society in particular people with lived experience organizations of persons with disabilities and ngos academic institutions professional organizati

In [5]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the trained model
model.load_weights("weights-best.hdf5")

# Define the knowledge base
with open('mental_h.txt', 'r', encoding='utf-8') as file:
    knowledge_base = file.read()

# Extract knowledge base sections
def find_relevant_info(user_input, knowledge_text):
    vectorizer = TfidfVectorizer().fit_transform([knowledge_text, user_input])
    similarity = vectorizer.toarray().dot(vectorizer.toarray().T)[0, 1]
    if similarity > 0.1:
        # Extract sentences with relevance
        return '\n'.join([sentence for sentence in knowledge_text.splitlines() if user_input.lower() in sentence.lower()])
    return "I don't have specific information about that. Let's explore general advice."

# Generate a creative response
def generate_response(user_input, tokenizer, model, max_sequence_length, output_length=100):
    input_sequence = tokenizer.texts_to_sequences([user_input])
    input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='pre')

    output = []
    for _ in range(output_length):
        pred_probs = model.predict(input_sequence, verbose=0)
        next_idx = np.argmax(pred_probs)
        output.append(tokenizer.index_word.get(next_idx, ""))

        input_sequence = np.append(input_sequence[0][1:], next_idx).reshape(1, max_sequence_length)

    return " ".join(output)

# Example usage
user_input = input("You: ")
relevant_info = find_relevant_info(user_input, knowledge_base)
creative_response = generate_response(user_input, tokenizer, model, max_sequence_length=100, output_length=100)

response = f"Here is some information related to your query:\n{relevant_info}\n\nChatbot: {creative_response}"
print(response)


ValueError: Cannot assign value to variable ' embedding/embeddings:0': Shape mismatch.The variable shape (5057, 256), and the assigned value shape (73, 256) are incompatible.

In [None]:
model.load_weights("weights-best.hdf5")

# Generate Text
seed_idx = np.random.randint(0, len(X) - 1)
seed_sequence = X[seed_idx]

output = []
temperature = 1.0  # Lower values make text more deterministic; higher values make it more diverse.

for i in range(2000):  # Generate 1000 characters
    if i<1500:
        pred_input = np.reshape(seed_sequence, (1, len(seed_sequence)))
        pred_probs = model.predict(pred_input, verbose=0)
        next_idx = np.argmax(pred_probs)
        output.append(tokenizer.index_word[next_idx])

    # Update seed sequence
        seed_sequence = np.append(seed_sequence[1:], next_idx)
        
    else:
        if seed_idx == ".":
            break
        else:
            pred_input = np.reshape(seed_sequence, (1, len(seed_sequence)))
            pred_probs = model.predict(pred_input, verbose=0)
            next_idx = np.argmax(pred_probs)
            output.append(tokenizer.index_word[next_idx])

            # Update seed sequence
            seed_sequence = np.append(seed_sequence[1:], next_idx)
            
        

print("Generated Text:")
print("".join(output)+".")

NameError: name 'model' is not defined