<a href="https://colab.research.google.com/github/Mitul060299/Mental-Health-Chatbot/blob/main/Code_version_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import importlib.util
import sys
import torch
import math
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from datasets import load_dataset, Dataset, concatenate_datasets
from sklearn.model_selection import train_test_split
import psutil
import gc
import time
import requests

# Install rouge_score (for Jupyter/Colab environments)
%pip install rouge_score
from rouge_score import rouge_scorer
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling, pipeline

# Load and merge datasets
def load_and_merge_datasets():
    # STEP 1: Load HOPE Therapy Data
    hope_path = "/content/SPARTA_WSDM2022/HOPE_data/HOPE_therapy_session_transcripts"
    try:
        files = [f for f in os.listdir(hope_path) if f.endswith(".csv")]
    except FileNotFoundError:
        print(f"Error: HOPE dataset directory not found at {hope_path}")
        files = []

    hope_pairs = []
    for file in files:
        df = pd.read_csv(os.path.join(hope_path, file))
        df['Speaker'] = df['Type'].map({'T': 'Therapist', 'P': 'Client'})
        df['Content'] = df['Utterance']
        for i in range(1, len(df)):
            if df.loc[i-1, 'Speaker'] == "Client" and df.loc[i, 'Speaker'] == "Therapist":
                hope_pairs.append({
                    "prompt": f"Client: {df.loc[i-1, 'Content']}",
                    "response": f"Therapist: {df.loc[i, 'Content']}",
                    "source": "HOPE"
                })
    print(f"Extracted {len(hope_pairs)} dialogue pairs from HOPE dataset")

    # STEP 2: Load EmpatheticDialogues
    try:
        empathetic_ds = load_dataset("empathetic_dialogues")
        empathy_pairs = []
        prev_conv_id = None
        context = ""
        for row in empathetic_ds['train']:
            if row['utterance_idx'] > 0 and row['conv_id'] == prev_conv_id:
                empathy_pairs.append({
                    "prompt": f"Client: {context}",
                    "response": f"Therapist: {row['utterance']}",
                    "emotion": row['context'],
                    "source": "EmpatheticDialogues"
                })
            context = row['utterance']
            prev_conv_id = row['conv_id']
        print(f"Extracted {len(empathy_pairs)} dialogue pairs from EmpatheticDialogues dataset")
    except Exception as e:
        print(f"Error loading EmpatheticDialogues: {e}")
        empathy_pairs = []

    # STEP 3: Load CounselChat
    try:
        url = "https://raw.githubusercontent.com/nbertagnolli/counsel-chat/master/data/counselchat-data.csv"
        response = requests.get(url)
        with open("counselchat-data.csv", "wb") as f:
            f.write(response.content)
        cc_df = pd.read_csv("counselchat-data.csv")
        counsel_pairs = []
        for _, row in cc_df.iterrows():
            if pd.notnull(row['questionText']) and pd.notnull(row['answerText']):
                counsel_pairs.append({
                    "prompt": f"Client: {row['questionText']}",
                    "response": f"Therapist: {row['answerText'].replace('<p>','')}",
                    "source": "CounselChat"
                })
        print(f"Extracted {len(counsel_pairs)} dialogue pairs from CounselChat dataset")
    except Exception as e:
        print(f"Error loading CounselChat: {e}")
        counsel_pairs = []

    # STEP 4: Merge All Dialogues
    dialogue_data = hope_pairs + empathy_pairs + counsel_pairs
    print(f"Total dialogue pairs: {len(dialogue_data)}")

    print("\nSample data from each source:")
    for source in ["HOPE", "EmpatheticDialogues", "CounselChat"]:
        samples = [d for d in dialogue_data if d.get("source") == source]
        if samples:
            print(f"\n{source} sample:")
            sample = np.random.choice(samples)
            print(f"Prompt: {sample['prompt']}")
            print(f"Response: {sample['response']}")

    dialogue_data = [{'prompt': d['prompt'], 'response': d['response']} for d in dialogue_data]
    return Dataset.from_list(dialogue_data)

# Load and split dataset
dialogue_data = load_and_merge_datasets()
train_data, val_data = train_test_split(dialogue_data.to_pandas(), test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train_data.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_data.reset_index(drop=True))

# Define MentalHealthChatbot class
class MentalHealthChatbot:
    def __init__(self, bert_model, bert_tokenizer, gpt_model, gpt_tokenizer):
        self.bert_model = bert_model
        self.bert_tokenizer = bert_tokenizer
        self.gpt_model = gpt_model
        self.gpt_tokenizer = gpt_tokenizer
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.bert_model.to(self.device)
        self.gpt_model.to(self.device)
        self.bert_model.eval()
        self.gpt_model.eval()

    def analyze_emotion(self, text):
        inputs = self.bert_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = self.bert_model(**inputs)
            emotion_scores = outputs.logits[0]
            emotion_id = torch.argmax(emotion_scores).item()
            emotion = self.bert_model.config.id2label.get(emotion_id, "neutral")
            confidence = torch.softmax(emotion_scores, dim=0)[emotion_id].item()
        return emotion, confidence

    def generate_response(self, user_input, max_length=50):
        emotion, confidence = self.analyze_emotion(user_input)
        enhanced_prompt = f"Client (feeling {emotion}): {user_input}\nTherapist:"
        inputs = self.gpt_tokenizer(enhanced_prompt, return_tensors="pt")
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        try:
            output_sequences = self.gpt_model.generate(
                **inputs,
                max_length=len(inputs["input_ids"][0]) + max_length,
                num_return_sequences=1,
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
                pad_token_id=self.gpt_tokenizer.eos_token_id,
                repetition_penalty=1.2
            )
            full_text = self.gpt_tokenizer.decode(output_sequences[0], skip_special_tokens=True)
            therapist_response = full_text.split("Therapist:", 1)[1].strip() if "Therapist:" in full_text else full_text
        except Exception as e:
            print(f"Error generating response: {e}")
            therapist_response = "I'm here to help. Could you share more?"
        return {
            "response": therapist_response,
            "emotion_detected": emotion,
            "confidence": confidence
        }

# Load models
try:
    bert_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
    bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    gpt_model = AutoModelForCausalLM.from_pretrained("gpt2", ignore_mismatched_sizes=True)
    gpt_tokenizer = AutoTokenizer.from_pretrained("gpt2")
    gpt_tokenizer.pad_token = gpt_tokenizer.eos_token
except FileNotFoundError:
    print("Fine-tuned models not found. Using pre-trained models.")
    bert_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
    bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    gpt_model = AutoModelForCausalLM.from_pretrained("gpt2", ignore_mismatched_sizes=True)
    gpt_tokenizer = AutoTokenizer.from_pretrained("gpt2")
    gpt_tokenizer.pad_token = gpt_tokenizer.eos_token

# Initialize chatbot
chatbot = MentalHealthChatbot(bert_model, bert_tokenizer, gpt_model, gpt_tokenizer)

# Evaluation functions
def calculate_perplexity(model, tokenizer, text):
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
    return math.exp(loss.item())

def calculate_bleu(reference, hypothesis):
    smoothie = SmoothingFunction().method4
    return sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smoothie)

def calculate_rouge(reference, hypothesis):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    return scorer.score(reference, hypothesis)

def distinct_n_gram(responses, n=2):
    all_ngrams = []
    for response in responses:
        tokens = response.split()
        ngrams = list(zip(*[tokens[i:] for i in range(n)]))
        all_ngrams.extend(ngrams)
    total = len(all_ngrams)
    unique = len(set(all_ngrams))
    return unique / total if total > 0 else 0

def measure_inference_time(generated_text, prompt, tokenizer):
    return len(generated_text.split()) / 50.0

def get_memory_usage():
    process = psutil.Process(os.getpid())
    cpu_mem = process.memory_info().rss / 1024 ** 2
    gpu_mem = torch.cuda.max_memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else None
    if torch.cuda.is_available():
        torch.cuda.reset_max_memory_allocated()
    gc.collect()
    return {'cpu_memory_MB': cpu_mem, 'gpu_memory_MB': gpu_mem}

# Evaluation using a subset of val_dataset
eval_subset = val_dataset.select(range(min(10, len(val_dataset))))
input_prompts = [item["prompt"] for item in eval_subset]
reference_responses = [item["response"] for item in eval_subset]
generated_responses = []

# Generate responses
gpt_model.eval()
for prompt in tqdm(input_prompts, desc="Generating responses"):
    response = chatbot.generate_response(prompt)
    generated_responses.append(response["response"])

# Initialize accumulators
bleu_scores = []
rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}
perplexities = []
inference_times = []
memory_usages = []

for ref, gen, prompt in zip(reference_responses, generated_responses, input_prompts):
    bleu_scores.append(calculate_bleu(ref, gen))
    rouge = calculate_rouge(ref, gen)
    for key in rouge_scores:
        rouge_scores[key].append(rouge[key].fmeasure)
    perplexities.append(calculate_perplexity(gpt_model, gpt_tokenizer, gen))
    inference_times.append(measure_inference_time(gen, prompt, gpt_tokenizer))
    memory_usages.append(get_memory_usage())

# Compute distinct-n
distinct_1 = distinct_n_gram(generated_responses, n=1)
distinct_2 = distinct_n_gram(generated_responses, n=2)

# Print evaluation results
print("\n--- Evaluation Metrics ---")
print(f"Avg BLEU Score: {sum(bleu_scores)/len(bleu_scores):.4f}")
print(f"Avg ROUGE-1: {sum(rouge_scores['rouge1'])/len(rouge_scores['rouge1']):.4f}")
print(f"Avg ROUGE-2: {sum(rouge_scores['rouge2'])/len(rouge_scores['rouge2']):.4f}")
print(f"Avg ROUGE-L: {sum(rouge_scores['rougeL'])/len(rouge_scores['rougeL']):.4f}")
print(f"Avg Perplexity: {sum(perplexities)/len(perplexities):.4f}")
print(f"Distinct-1: {distinct_1:.4f}")
print(f"Distinct-2: {distinct_2:.4f}")
print(f"Avg Inference Time: {sum(inference_times)/len(inference_times):.4f} seconds")
print(f"Avg CPU Memory: {sum([m['cpu_memory_MB'] for m in memory_usages])/len(memory_usages):.2f} MB")
if torch.cuda.is_available():
    print(f"Avg GPU Memory: {sum([m['gpu_memory_MB'] for m in memory_usages if m['gpu_memory_MB'] is not None])/len(memory_usages):.2f} MB")

# Interactive user input for testing the chatbot
print("\n--- Interactive Chatbot Testing ---")
print("Enter your message (or type 'exit' to quit):")
while True:
    user_input = input(">> ")
    if user_input.lower() == 'exit':
        print("Exiting chatbot.")
        break
    if not user_input.strip():
        print("Please enter a valid message.")
        continue
    response = chatbot.generate_response(user_input)
    print(f"Detected emotion: {response['emotion_detected']} (confidence: {response['confidence']:.2f})")
    print(f"Therapist: {response['response']}\n")

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=2c9d72e3e84bcfe456e5a185ab53f78fdc3aa0e25f07ec918befa0432a7fed93
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Error: HOPE dataset directory not found at /content/SPARTA_WSDM2022/HOPE_data/HOPE_therapy_session_transcripts
Extracted 0 dialogue pairs from HOPE dataset


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/4.51k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.15k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/76673 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/12030 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10943 [00:00<?, ? examples/s]

Error loading EmpatheticDialogues: Loading a dataset cached in a LocalFileSystem is not supported.
Extracted 1383 dialogue pairs from CounselChat dataset
Total dialogue pairs: 1383

Sample data from each source:

CounselChat sample:
Prompt: Client: How does a counselor decide when to end counseling sessions or to terminate working with a client?
Response: Therapist: There are typically three reasons why therapy is terminated:</p>1) Client has met therapy goals</p>2) Client is not progressing&nbsp;</p>3) Therapist is not a good fit for client</p>In order to properly assess whether therapy is helping and what progress is being made, the therapist needs to have ways of consistently checking in with clients sessions-by-session to determine what is helping, what isn't, and where the client is at in relation to their original therapy goals. When a client has met their goals, that is a good time to end counselling sessions unless the client has new goals or simply wants to check-in periodical

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Generating responses: 100%|██████████| 10/10 [00:41<00:00,  4.14s/it]
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.



--- Evaluation Metrics ---
Avg BLEU Score: 0.0045
Avg ROUGE-1: 0.1586
Avg ROUGE-2: 0.0146
Avg ROUGE-L: 0.0808
Avg Perplexity: 35.6649
Distinct-1: 0.6093
Distinct-2: 0.9763
Avg Inference Time: 0.7780 seconds
Avg CPU Memory: 2554.60 MB

--- Interactive Chatbot Testing ---
Enter your message (or type 'exit' to quit):
>> I am feeling sad.
Detected emotion: LABEL_1 (confidence: 0.61)
Therapist: You're right, that's not the point of this game! It is a simple and beautiful world filled with possibilities for you to explore as if nothing else exists or has happened in it already... If anything does exist we'll be there soon enough so

>> exit
Exiting chatbot.
