In [None]:
%pip install tensorflow keras nltk scikit-learn transformers


In [None]:
pip install --upgrade tensorflow

In [None]:
pip uninstall transformers -y


In [None]:
pip install google-generativeai

In [None]:
pip install nltk

In [None]:
pip install transformers tensorflow torch scikit-learn matplotlib

In [None]:
pip cache purge

In [None]:

pip install keras==2.13.1  transformers==4.33.0

In [None]:
pip show tensorflow keras transformers

In [None]:
pip install keras==3.0.5 transformers==4.38.1

In [None]:
pip install tensorflow-addons


In [None]:
#Another Version

In [None]:
pip install sentence-transformers

In [None]:
#Best Model
import os
import json
import numpy as np
import pickle
import matplotlib.pyplot as plt
import gc
import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras import backend as K  # To clear memory
from collections import Counter
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import (Input, Dense, Dropout, LSTM, BatchNormalization, Bidirectional, Layer,
                                     MultiHeadAttention, LayerNormalization, Add , Input)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.layers import GaussianNoise
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from imblearn.combine import SMOTETomek
from imblearn.over_sampling import SMOTE

# ---- Step 2: Define Attention Layer ----
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                   initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(1,),
                                   initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        output = x * a
        return K.sum(output, axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1]) # Output shape is (batch_size, embedding_dim)

# ---- Step 3: Load and Prepare Dataset ----
with open("combined_dataset.json", "r") as f:
    parsed_data = [json.loads(line) for line in f if line.strip()]

X_texts = [sample["Context"] for sample in parsed_data]
Y_labels = [sample["Response"] for sample in parsed_data]

# ---- Step 4: Define SBERT Variants to Test ----
sbert_variants = [
    'paraphrase-MiniLM-L12-v2'
]
#',
#,     'all-MiniLM-L6-v2',
embedding_path = "X_emb.npy"
label_path = "Y_encoded.npy"
# ---- Step 5: Encode Labels ----
if os.path.exists(embedding_path) and os.path.exists(label_path):
    print("✅ Loading saved embeddings...")
    X_emb = np.load(embedding_path)
    Y_encoded = np.load(label_path)
    with open("response_encoder.pkl", "rb") as f:
        label_encoder = pickle.load(f)
else:
    print("🔄 Generating SBERT embeddings...")
    sbert = SentenceTransformer(sbert_model_name)
    X_emb = sbert.encode(augmented_inputs, show_progress_bar=True)

    label_encoder = LabelEncoder()
    Y_encoded_labels = label_encoder.fit_transform(Y_labels)
    num_classes_original = len(label_encoder.classes_)
    Y_encoded = to_categorical(Y_encoded_labels, num_classes=num_classes_original)
    np.save(embedding_path, X_emb)
    np.save(label_path, Y_encoded)
    with open("response_encoder.pkl", "wb") as f:
        pickle.dump(label_encoder, f)


# ---- Step 6: Save Label Encoder ----
with open("response_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

# ---- Step 7: Remove Rare Classes (only 1 sample) ----
class_counts = Counter(np.argmax(Y_encoded, axis=1))
valid_classes = {cls for cls, count in class_counts.items() if count > 1}
valid_indices = [i for i, label in enumerate(np.argmax(Y_encoded, axis=1)) if label in valid_classes]

X_texts_filtered = [X_texts[i] for i in valid_indices]
# Re-extract filtered string labels
Y_labels_filtered = [Y_labels[i] for i in valid_indices]

# Re-encode using a new LabelEncoder for filtered classes
label_encoder = LabelEncoder()
Y_filtered_int = label_encoder.fit_transform(Y_labels_filtered)

# Save new encoder
with open("response_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

# Now one-hot encode with correct number of classes
current_num_classes = len(label_encoder.classes_)
Y_filtered = to_categorical(Y_filtered_int, num_classes=current_num_classes)

# ---- Step 8: Prepare Embeddings and Split Data ----
sbert_model_name = sbert_variants[0] # Using the first variant for this non-function block
print(f"\n🔄 Preparing embeddings with SBERT model: {sbert_model_name}")
sbert = SentenceTransformer(sbert_model_name)
X_emb = sbert.encode(X_texts_filtered)
Y_filtered_encoded_labels_step8 = np.argmax(Y_filtered, axis=1) # Get integer labels after filtering

# Resample using SMOTE + TomekLinks if safe
if min_samples_per_class < 2:
    print("⚠️ SMOTE skipped due to classes with <2 samples.")
    X_resampled, Y_resampled_encoded = X_emb, Y_filtered_encoded_labels_step8
else:
    smote_k = min(5, min_samples_per_class - 1)
    smote_k = max(smote_k, 1)
    print(f"✅ Applying SMOTE with k={smote_k} and TomekLinks...")
    smote = SMOTE(k_neighbors=smote_k, random_state=42)
    X_smote, Y_smote_encoded = smote.fit_resample(X_emb, Y_filtered_encoded_labels_step8)
    smote_tomek = SMOTETomek(random_state=42)
    X_resampled, Y_resampled_encoded = smote_tomek.fit_resample(X_smote, Y_smote_encoded)

unique_resampled_labels = np.unique(Y_resampled_encoded)
current_num_classes = len(unique_resampled_labels)

# --- FIX: Ensure Y_resampled_encoded values are within the valid range ---
Y_resampled_encoded = np.clip(Y_resampled_encoded, 0, current_num_classes - 1)

Y_resampled = to_categorical(Y_resampled_encoded, num_classes=current_num_classes)
print(f"📊 Resampled Class Distribution: {Counter(np.argmax(Y_resampled, axis=1))}")
print(f"🔢 Number of classes after resampling: {current_num_classes}")

# Train-Test Split (75% train, 25% test)
print(f"➡️ Before train_test_split: test_size = 0.25")
X_train, X_test, Y_train, Y_test = train_test_split(
    X_resampled, Y_resampled,
    test_size=0.25,  # 25% for the test set
    random_state=42,
    stratify=np.argmax(Y_resampled, axis=1)
)
print(f"✅ After train_test_split: Training size = {len(X_train)}, Testing size = {len(X_test)}")

# Reshape input for LSTM
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

Y_train_int = np.argmax(Y_train, axis=1)
Y_test_int = np.argmax(Y_test, axis=1)

# Compute class weights
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(Y_train_int),
    y=Y_train_int
)
class_weight_dict = dict(enumerate(class_weights))

print(f"📏 Training data size: {len(X_train)}")
print(f"📏 Testing data size: {len(X_test)}")

In [None]:
# ---- Step 9: Build Model ----
def build_model(input_shape, num_classes):
    def transformer_block(inputs, num_heads=4, ff_dim=256, dropout_rate=0.3):
        attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1])(inputs, inputs)
        attention_output = Dropout(dropout_rate)(attention_output)
        out1 = LayerNormalization(epsilon=1e-6)(Add()([inputs, attention_output]))

        ff_output = Dense(ff_dim, activation="relu")(out1)
        ff_output = Dense(inputs.shape[-1])(ff_output)
        ff_output = Dropout(dropout_rate)(ff_output)
        return LayerNormalization(epsilon=1e-6)(Add()([out1, ff_output]))

    inputs = Input(shape=input_shape)
    x = transformer_block(inputs)
    x = transformer_block(x)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)
    x = BatchNormalization()(x)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x = BatchNormalization()(x)

    x = AttentionLayer()(x)
    x = Dropout(0.3)(x)

    x = Dense(128, activation="relu")(x)
    x = Dropout(0.3)(x)

    outputs = Dense(num_classes, activation="softmax")(x)

    lr_schedule = ExponentialDecay(
        initial_learning_rate=0.0005,
        decay_steps=1100,
        decay_rate=0.95
    )

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss="sparse_categorical_crossentropy", optimizer=AdamW(learning_rate=lr_schedule), metrics=["accuracy"])
    return model

# Build the model
model = build_model(input_shape=X_train.shape[1:], num_classes=current_num_classes)

# ---- Step 10: Train Model ----
print(f"\n🚀 Training model using: {sbert_model_name}")

# 🧹 Clear session
K.clear_session()
gc.collect()

early_stop = EarlyStopping(monitor="val_loss", patience=8, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.8, patience=7, verbose=1, min_lr=1e-6)

history = model.fit(
    X_train, Y_train_int,  # Use integer labels for training
    validation_data=(X_test, Y_test_int),  # Use integer labels for validation
    epochs=20,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=[early_stop],
    verbose=1
)

# ---- Step 11: Evaluate and Save Model ----
test_loss, test_acc = model.evaluate(X_test, Y_test_int, verbose=1)  # Use integer labels for evaluation
print(f"✅ Test Accuracy with {sbert_model_name}: {test_acc * 100:.2f}%")

In [None]:
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
import json
import random
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K

# ---- Custom Attention Layer (required for model loading) ----
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(1,),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        output = x * a
        return K.sum(output, axis=1)

# ---- File Paths ----
MODEL_PATH = "optimized_lstm_model.keras"
LABEL_ENCODER_PATH = "response_encoder.pkl"
QUESTIONNAIRE_PATH = "generated_questionnaire.json"
MOOD_KEYWORDS_PATH = "mood_keywords.json"
SBERT_MODEL_NAME = "paraphrase-MiniLM-L12-v2"

# ---- Load Assets ----
model = load_model(MODEL_PATH, custom_objects={"AttentionLayer": AttentionLayer})

with open(LABEL_ENCODER_PATH, "rb") as f:
    label_encoder = pickle.load(f)

with open(QUESTIONNAIRE_PATH, "r") as f:
    questionnaire = json.load(f)

with open(MOOD_KEYWORDS_PATH, "r") as f:
    mood_keywords = json.load(f)

sbert = SentenceTransformer(SBERT_MODEL_NAME)

# ---- Helper Functions ----
def augment_input_with_mood_keywords(user_input):
    tokens = []
    for mood, keywords in mood_keywords.items():
        if any(word.lower() in user_input.lower() for word in keywords):
            tokens.append(f"intent:{mood.lower()}")
    return user_input + " " + " ".join(tokens)

def predict_response(user_input):
    """Return full response from the model"""
    augmented = augment_input_with_mood_keywords(user_input)
    embedding = sbert.encode([augmented])
    embedding = np.expand_dims(embedding, axis=1)
    prediction = model.predict(embedding, verbose=0)
    pred_index = np.argmax(prediction, axis=1)[0]
    return label_encoder.inverse_transform([pred_index])[0]

def detect_mood_from_response(response_text):
    """Infer mood category from keywords in the model response"""
    for mood, keywords in mood_keywords.items():
        if any(word.lower() in response_text.lower() for word in keywords):
            return mood
    return "Unknown"

def get_question_for_mood(mood):
    """Select a question based on detected mood"""
    for category, questions in questionnaire.items():
        if mood.lower() in category.lower():
            return random.choice(questions)
    return "Sorry, I couldn't find a relevant question for you."

# ---- Chat Loop ----
def chatbot():
    print("🧠 Mental Health Bot: Hello! I'm here to support you. Type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() in {"exit", "quit"}:
            print("🧠 Bot: Take care and stay safe! 💙")
            break

        full_response = predict_response(user_input)
        detected_mood = detect_mood_from_response(full_response)
        question = get_question_for_mood(detected_mood)

        print(f"\n🧠 Bot (Mood Detected: {detected_mood})")
        print(f"→ Response: {full_response}")
        print(f"→ Follow-up Question: {question}\n")

# ---- Run ----
if __name__ == "__main__":
    chatbot()


In [None]:
# ---- Imports ----
import os
import json
import numpy as np
import pickle
import matplotlib.pyplot as plt
import gc
import tensorflow as tf
from collections import Counter
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import (Input, Dense, Dropout, LSTM, BatchNormalization,
                                     Bidirectional, MultiHeadAttention, LayerNormalization,
                                     Add, GaussianNoise, Layer)
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from imblearn.combine import SMOTETomek
from imblearn.over_sampling import SMOTE


# ---- Step 1: Define Attention Layer ----
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(1,),
                                 initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        return K.sum(x * a, axis=1)


# ---- Step 2: Load and Prepare Dataset ----
with open("combined_dataset.json", "r") as f:
    parsed_data = [json.loads(line) for line in f if line.strip()]

X_texts = [sample["Context"] for sample in parsed_data]
Y_labels = [sample["Response"] for sample in parsed_data]


# # ---- Step 3: Label Encoding ----
# label_encoder = LabelEncoder()
# Y_encoded_labels = label_encoder.fit_transform(Y_labels)
# num_classes_original = len(label_encoder.classes_)
# Y_encoded = to_categorical(Y_encoded_labels, num_classes=num_classes_original)

# # Save label encoder
# with open("response_encoder.pkl", "wb") as f:
#     pickle.dump(label_encoder, f)


# # ---- Step 4: Remove Rare Classes ----
# class_counts = Counter(np.argmax(Y_encoded, axis=1))
# valid_classes = {cls for cls, count in class_counts.items() if count > 1}
# valid_indices = [i for i, y in enumerate(np.argmax(Y_encoded, axis=1)) if y in valid_classes]

# X_texts_filtered = [X_texts[i] for i in valid_indices]
# Y_filtered = Y_encoded[valid_indices]
# Y_filtered_encoded_labels = np.argmax(Y_filtered, axis=1)
# min_samples_per_class = min(Counter(Y_filtered_encoded_labels).values())

# print(f"📊 Filtered Class Distribution: {Counter(Y_filtered_encoded_labels)}")


label_counts = Counter(Y_labels)
valid_labels = {label for label, count in label_counts.items() if count > 1}
filtered_data = [(x, y) for x, y in zip(X_texts, Y_labels) if y in valid_labels]

X_texts_filtered = [x for x, _ in filtered_data]
Y_labels_filtered = [y for _, y in filtered_data]

# Refit label encoder ONLY on filtered labels
label_encoder = LabelEncoder()
Y_filtered_encoded = label_encoder.fit_transform(Y_labels_filtered)
current_num_classes = len(label_encoder.classes_)

# Save updated encoder
with open("response_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

min_samples_per_class = min(Counter(Y_filtered_encoded))
# ---- Step 5: SBERT Embeddings ----
sbert_model_name = 'paraphrase-MiniLM-L12-v2'
print(f"\n🔄 Generating SBERT embeddings with {sbert_model_name}")
sbert = SentenceTransformer(sbert_model_name)
X_emb = sbert.encode(X_texts_filtered, show_progress_bar=True)


# ---- Step 6: Resampling with SMOTE + TomekLinks ----
if min_samples_per_class < 2:
    print("⚠️ SMOTE skipped due to classes with <2 samples.")
    X_resampled, Y_resampled_encoded = X_emb, Y_filtered_encoded
else:
    smote_k = max(min(5, min_samples_per_class - 1), 1)
    smote = SMOTE(k_neighbors=smote_k, random_state=42)
    X_smote, Y_smote_encoded = smote.fit_resample(X_emb, Y_filtered_encoded)
    smote_tomek = SMOTETomek(random_state=42)
    X_resampled, Y_resampled_encoded = smote_tomek.fit_resample(X_smote, Y_smote_encoded)

Y_resampled_encoded = np.clip(Y_resampled_encoded, 0, current_num_classes - 1)
Y_resampled = to_categorical(Y_resampled_encoded, num_classes=current_num_classes)

print(f"📊 Resampled Class Distribution: {Counter(np.argmax(Y_resampled, axis=1))}")
print(f"🔢 Classes after resampling: {current_num_classes}")

if len(Y_resampled) * 0.25 >= current_num_classes:
    stratify_option = np.argmax(Y_resampled, axis=1)
else:
    print(f"⚠️ Skipping stratify: test size too small for {current_num_classes} classes.")
    stratify_option = None

#Training 
X_train, X_test, Y_train, Y_test = train_test_split(
    X_resampled, Y_resampled,
    test_size=0.25,
    random_state=42,
    stratify=stratify_option
)

X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)
Y_train_int = np.argmax(Y_train, axis=1)
Y_test_int = np.argmax(Y_test, axis=1)

class_weights = compute_class_weight("balanced", classes=np.unique(Y_train_int), y=Y_train_int)
class_weight_dict = dict(enumerate(class_weights))

print(f"📏 Training samples: {len(X_train)}, Test samples: {len(X_test)}")


# ---- Step 8: Build Model ----
def build_model(input_shape, num_classes):
    def transformer_block(inputs, num_heads=4, ff_dim=256, dropout_rate=0.3):
        attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1])(inputs, inputs)
        attention_output = Dropout(dropout_rate)(attention_output)
        out1 = LayerNormalization(epsilon=1e-6)(Add()([inputs, attention_output]))
        ff_output = Dense(ff_dim, activation="relu")(out1)
        ff_output = Dense(inputs.shape[-1])(ff_output)
        ff_output = Dropout(dropout_rate)(ff_output)
        return LayerNormalization(epsilon=1e-6)(Add()([out1, ff_output]))

    inputs = Input(shape=input_shape)
    x = transformer_block(inputs)
    x = transformer_block(x)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)
    x = BatchNormalization()(x)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x = BatchNormalization()(x)

    x = AttentionLayer()(x)
    x = Dropout(0.3)(x)

    x = Dense(256, activation="relu")(x)
    x = Dropout(0.3)(x)

    outputs = Dense(num_classes, activation="softmax")(x)

    lr_schedule = ExponentialDecay(initial_learning_rate=0.002, decay_steps=1200, decay_rate=0.97)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model


# ---- Step 9: Train Model ----
K.clear_session()
gc.collect()
model = build_model(input_shape=X_train.shape[1:], num_classes=current_num_classes)

early_stop = EarlyStopping(monitor="val_loss", patience=9, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.8, patience=7, verbose=1, min_lr=1e-6)

print(f"\n🚀 Training model with: {sbert_model_name}")
history = model.fit(
    X_train, Y_train_int,
    validation_data=(X_test, Y_test_int),
    epochs=30,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)


# ---- Step 10: Evaluate and Save Model ----
test_loss, test_acc = model.evaluate(X_test, Y_test_int, verbose=1)
print(f"✅ Test Accuracy with {sbert_model_name}: {test_acc * 100:.2f}%")

model_filename = f"model_{sbert_model_name.replace('-', '_')}.keras"
if os.path.exists(model_filename):
    os.remove(model_filename)
model.save(model_filename)
print(f"💾 Model saved as {model_filename}")


# ---- Step 11: Plot Training Curves ----
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()


# ---- Step 12: Save Final Model Based on Accuracy ----
final_model_name = "optimized_lstm_model.keras"
if os.path.exists(final_model_name):
    try:
        prev_model = load_model(final_model_name, custom_objects={"AttentionLayer": AttentionLayer})
        _, prev_acc = prev_model.evaluate(X_test, Y_test, verbose=1)
        if test_acc > prev_acc:
            os.replace(model_filename, final_model_name)
            print(f"✅ Final model updated: Accuracy improved from {prev_acc:.4f} → {test_acc:.4f}")
        else:
            print(f"⚠️ Existing model retained: Accuracy {prev_acc:.4f} is better or equal.")
    except Exception as e:
        print(f"⚠️ Couldn't load previous model: {e}")
        os.replace(model_filename, final_model_name)
        print(f"✅ Final model saved.")
else:
    os.rename(model_filename, final_model_name)
    print(f"✅ Final model saved as {final_model_name}")

In [None]:
import os
import json
import numpy as np
import pickle
import matplotlib.pyplot as plt
import gc
import tensorflow as tf
from tensorflow.keras import backend as K  # To clear memory
from collections import Counter
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import (Input, Dense, Dropout, LSTM, BatchNormalization, Bidirectional, Layer,
                                     MultiHeadAttention, LayerNormalization, Add)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GaussianNoise
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from imblearn.combine import SMOTETomek
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score, classification_report

# ---- Step 2: Define Attention Layer ----
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                   initializer="normal", trainable=True)
        self.b = self.add_weight(name="att_bias", shape=(1,),
                                   initializer="zeros", trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        output = x * a
        return K.sum(output, axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1]) # Output shape is (batch_size, embedding_dim)

# ---- Step 3: Load and Prepare Dataset ----
with open("combined_dataset.json", "r") as f:
    parsed_data = [json.loads(line) for line in f if line.strip()]

X_texts = [sample["Context"] for sample in parsed_data]
Y_labels = [sample["Response"] for sample in parsed_data]

# ---- Step 4: Define SBERT Variants to Test ----
sbert_variants = [
    'paraphrase-MiniLM-L12-v2'
]
#',
#,     'all-MiniLM-L6-v2',
# ---- Step 5: Encode Labels ----
label_encoder = LabelEncoder()
Y_encoded_labels = label_encoder.fit_transform(Y_labels)
num_classes_original = len(label_encoder.classes_)
Y_encoded = to_categorical(Y_encoded_labels, num_classes=num_classes_original)

# ---- Step 6: Save Label Encoder ----
with open("response_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

# ---- Step 7: Remove Rare Classes (only 1 sample) ----
class_counts = Counter(np.argmax(Y_encoded, axis=1))
valid_classes = {cls for cls, count in class_counts.items() if count > 1}
valid_indices = [i for i, label in enumerate(np.argmax(Y_encoded, axis=1)) if label in valid_classes]

X_texts_filtered = [X_texts[i] for i in valid_indices]
Y_filtered = Y_encoded[valid_indices]
Y_filtered_encoded_labels = np.argmax(Y_filtered, axis=1) # Get integer labels after filtering

filtered_class_counts = Counter(Y_filtered_encoded_labels)
min_samples_per_class = min(filtered_class_counts.values())
print(f"📊 Filtered Class Distribution: {filtered_class_counts}")

# ---- Step 8: Prepare Embeddings and Split Data ----
sbert_model_name = sbert_variants[0] # Using the first variant for this non-function block
print(f"\n🔄 Preparing embeddings with SBERT model: {sbert_model_name}")
sbert = SentenceTransformer(sbert_model_name)
X_emb = sbert.encode(X_texts_filtered)
Y_filtered_encoded_labels_step8 = np.argmax(Y_filtered, axis=1) # Get integer labels after filtering

# Resample using SMOTE + TomekLinks if safe
if min_samples_per_class < 2:
    print("⚠️ SMOTE skipped due to classes with <2 samples.")
    X_resampled, Y_resampled_encoded = X_emb, Y_filtered_encoded_labels_step8
else:
    smote_k = min(5, min_samples_per_class - 1)
    smote_k = max(smote_k, 1)
    print(f"✅ Applying SMOTE with k={smote_k} and TomekLinks...")
    smote = SMOTE(k_neighbors=smote_k, random_state=42)
    X_smote, Y_smote_encoded = smote.fit_resample(X_emb, Y_filtered_encoded_labels_step8)
    smote_tomek = SMOTETomek(random_state=42)
    X_resampled, Y_resampled_encoded = smote_tomek.fit_resample(X_smote, Y_smote_encoded)

unique_resampled_labels = np.unique(Y_resampled_encoded)
current_num_classes = len(unique_resampled_labels)

# --- FIX: Ensure Y_resampled_encoded values are within the valid range ---
Y_resampled_encoded = np.clip(Y_resampled_encoded, 0, current_num_classes - 1)

Y_resampled = to_categorical(Y_resampled_encoded, num_classes=current_num_classes)
print(f"📊 Resampled Class Distribution: {Counter(np.argmax(Y_resampled, axis=1))}")
print(f"🔢 Number of classes after resampling: {current_num_classes}")

# Train-Test Split (75% train, 25% test)
print(f"➡️ Before train_test_split: test_size = 0.25")
X_train, X_test, Y_train, Y_test = train_test_split(
    X_resampled, Y_resampled,
    test_size=0.25,  # 25% for the test set
    random_state=42,
    stratify=np.argmax(Y_resampled, axis=1)
)
print(f"✅ After train_test_split: Training size = {len(X_train)}, Testing size = {len(X_test)}")

# Reshape input for LSTM
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

Y_train_int = np.argmax(Y_train, axis=1)
Y_test_int = np.argmax(Y_test, axis=1)

# Compute class weights
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(Y_train_int),
    y=Y_train_int
)
class_weight_dict = dict(enumerate(class_weights))

print(f"📏 Training data size: {len(X_train)}")
print(f"📏 Testing data size: {len(X_test)}")
# ---- Step 9: Build Model ----
def build_model(input_shape, num_classes):
    def transformer_block(inputs, num_heads=4, ff_dim=256, dropout_rate=0.3):
        attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1])(inputs, inputs)
        attention_output = Dropout(dropout_rate)(attention_output)
        out1 = LayerNormalization(epsilon=1e-6)(Add()([inputs, attention_output]))

        ff_output = Dense(ff_dim, activation="relu")(out1)
        ff_output = Dense(inputs.shape[-1])(ff_output)
        ff_output = Dropout(dropout_rate)(ff_output)
        return LayerNormalization(epsilon=1e-6)(Add()([out1, ff_output]))

    inputs = Input(shape=input_shape)
    x = transformer_block(inputs)
    x = transformer_block(x)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)
    x = BatchNormalization()(x)

    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x = BatchNormalization()(x)

    x = AttentionLayer()(x)
    x = Dropout(0.3)(x)

    x = Dense(256, activation="relu")(x)
    x = Dropout(0.3)(x)

    outputs = Dense(num_classes, activation="softmax")(x)

    lr_schedule = ExponentialDecay(
        initial_learning_rate=0.002,
        decay_steps=1200,
        decay_rate=0.97
    )

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=Adam(learning_rate=0.0005),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Build the model
model = build_model(input_shape=X_train.shape[1:], num_classes=current_num_classes)

# ---- Step 10: Train Model ----
print(f"\n🚀 Training model using: {sbert_model_name}")

# 🧹 Clear session
K.clear_session()
gc.collect()

early_stop = EarlyStopping(monitor="val_loss", patience=9, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.8, patience=7, verbose=1, min_lr=1e-6)

history = model.fit(
    X_train, Y_train_int,  # Use integer labels for training
    validation_data=(X_test, Y_test_int),  # Use integer labels for validation
    epochs=30,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=[reduce_lr, early_stop],
    verbose=1
)

# ---- Step 11: Evaluate and Save Model ----
test_loss, test_acc = model.evaluate(X_test, Y_test_int, verbose=1)  # Use integer labels for evaluation
Y_pred_probs = model.predict(X_test, verbose=0)
Y_pred = np.argmax(Y_pred_probs, axis=1)
print(f"✅ Test Accuracy with {sbert_model_name}: {test_acc * 100:.2f}%")
model_filename = f"model_{sbert_model_name.replace('-', '_')}.keras"
if os.path.exists(model_filename):
    os.remove(model_filename)
model.save(model_filename)
print(f"💾 Model saved as {model_filename}")

# --- Optional: Plot training history ---
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()

# ---- Step 12: Save Model as Final ----
final_model_name = "optimized_lstm_model.keras"
if os.path.exists(final_model_name):
    try:
        previous_model = load_model(final_model_name, custom_objects={"AttentionLayer": AttentionLayer})
        _, prev_acc = previous_model.evaluate(X_test, Y_test, verbose=1)
        if test_acc > prev_acc:
            os.replace(model_filename, final_model_name)
            print(f"✅ Final model updated → Accuracy improved from {prev_acc*100:.2f}% → {test_acc*100:.2f}%")
        else:
            print(f"⚠️ Existing model retained → Accuracy {prev_acc*100:.2f}% is better or equal.")
    except Exception as e:
        print(f"⚠️ Couldn't load existing model: {e}")
        os.replace(model_filename, final_model_name)
        print(f"✅ Final model saved as {final_model_name}")
else:
    os.rename(model_filename, final_model_name)
    print(f"✅ Final model saved as {final_model_name}")


f1 = f1_score(Y_test_int, Y_pred, average="weighted")
print(f"✅ Test Accuracy with {sbert_model_name}: {test_acc * 100:.2f}%")
print(f"🎯 Weighted F1 Score: {f1 * 100:.2f}%")

# Optional: detailed class-level report
print("\n📋 Classification Report:\n")
print(classification_report(Y_test_int, Y_pred, target_names=label_encoder.classes_))

📊 Filtered Class Distribution: Counter({0: 4, 583: 3, 2089: 3, 1887: 3, 1697: 3, 1316: 3, 614: 3, 1171: 3, 1953: 3, 1404: 3, 1954: 3, 2301: 3, 1263: 3, 2304: 3, 2183: 3, 1440: 3, 522: 3, 1299: 3, 1635: 3, 749: 3, 1502: 3, 1647: 3, 1646: 3, 1989: 3, 625: 3, 827: 3, 429: 3, 413: 3, 1203: 3, 1032: 3, 1986: 3, 250: 3, 1219: 3, 1212: 3, 764: 3, 2060: 3, 1473: 2, 1284: 2, 2366: 2, 67: 2, 64: 2, 2051: 2, 778: 2, 66: 2, 1531: 2, 1463: 2, 2006: 2, 797: 2, 2288: 2, 1251: 2, 1567: 2, 411: 2, 62: 2, 1562: 2, 1912: 2, 1441: 2, 52: 2, 2007: 2, 1581: 2, 2357: 2, 2322: 2, 1806: 2, 1981: 2, 1980: 2, 2351: 2, 1936: 2, 2154: 2, 1584: 2, 1302: 2, 63: 2, 518: 2, 1352: 2, 529: 2, 2361: 2, 1805: 2, 1572: 2, 2321: 2, 952: 2, 1948: 2, 1008: 2, 539: 2, 1571: 2, 340: 2, 1609: 2, 1432: 2, 396: 2, 2228: 2, 422: 2, 1191: 2, 1688: 2, 2030: 2, 2084: 2, 1361: 2, 1486: 2, 1607: 2, 1719: 2, 208: 2, 1683: 2, 1990: 2, 450: 2, 1160: 2, 1807: 2, 217: 2, 1323: 2, 2409: 2, 1638: 2, 1371: 2, 1290: 2, 1321: 2, 434: 2, 420: 2, 3



✅ Applying SMOTE with k=1 and TomekLinks...




📊 Resampled Class Distribution: Counter({994: 2412, 67: 4, 64: 4, 778: 4, 66: 4, 797: 4, 411: 4, 62: 4, 52: 4, 63: 4, 518: 4, 529: 4, 952: 4, 539: 4, 340: 4, 396: 4, 422: 4, 208: 4, 450: 4, 217: 4, 434: 4, 420: 4, 374: 4, 968: 4, 106: 4, 583: 4, 473: 4, 469: 4, 837: 4, 603: 4, 329: 4, 600: 4, 620: 4, 134: 4, 372: 4, 410: 4, 33: 4, 607: 4, 242: 4, 937: 4, 586: 4, 921: 4, 31: 4, 682: 4, 421: 4, 83: 4, 720: 4, 559: 4, 339: 4, 180: 4, 409: 4, 124: 4, 978: 4, 866: 4, 111: 4, 701: 4, 657: 4, 668: 4, 461: 4, 232: 4, 271: 4, 348: 4, 685: 4, 853: 4, 505: 4, 237: 4, 113: 4, 24: 4, 288: 4, 426: 4, 666: 4, 957: 4, 982: 4, 397: 4, 614: 4, 762: 4, 418: 4, 202: 4, 522: 4, 14: 4, 337: 4, 495: 4, 241: 4, 105: 4, 193: 4, 104: 4, 498: 4, 929: 4, 261: 4, 749: 4, 239: 4, 948: 4, 402: 4, 554: 4, 347: 4, 346: 4, 187: 4, 582: 4, 843: 4, 406: 4, 443: 4, 615: 4, 899: 4, 890: 4, 598: 4, 943: 4, 845: 4, 304: 4, 78: 4, 175: 4, 307: 4, 206: 4, 584: 4, 163: 4, 702: 4, 842: 4, 713: 4, 656: 4, 747: 4, 811: 4, 35: 4, 2

In [None]:
import os
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sentence_transformers import SentenceTransformer

# ---- Step 1: Load SBERT, Model, and Encoder ----
sbert_model_name = "paraphrase-MiniLM-L12-v2"
print("🔄 Loading SBERT and trained model...")

sbert = SentenceTransformer(sbert_model_name)
model = load_model("optimized_lstm_model.keras", custom_objects={"AttentionLayer": tf.keras.layers.Layer})
with open("response_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# ---- Step 2: Prediction Function ----
def predict_response(user_input):
    # SBERT embedding
    embedding = sbert.encode([user_input])
    embedding = np.expand_dims(embedding, axis=1)  # Add sequence dimension

    # Model prediction
    probs = model.predict(embedding)
    pred_index = np.argmax(probs, axis=1)[0]
    confidence = probs[0][pred_index]

    # Decode label
    predicted_response = label_encoder.inverse_transform([pred_index])[0]
    return predicted_response, confidence

# ---- Step 3: Run Chatbot Loop ----
print("\n🧠 Chatbot is ready! Type your message (or type 'exit' to quit):\n")
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("👋 Goodbye!")
        break

    response, confidence = predict_response(user_input)
    print(f"Bot: {response} (confidence: {confidence:.2f})\n")
