In [2]:
pip install transformers datasets peft accelerate evaluate torch



In [3]:
# First Understanding the Dataset
import pandas as pd
from datasets import load_dataset
from sklearn.model_selection import train_test_split

# Load dataset from Hugging Face
dataset = load_dataset("Amod/mental_health_counseling_conversations")

# Convert to pandas DataFrame for easier manipulation (optional)
df = dataset['train'].to_pandas()

# Create QA pairs where:
# - Context + Response becomes our "context"
# - We'll generate questions about the psychological advice
qa_pairs = []

for _, row in df.iterrows():
    context = f"Patient: {row['Context']}\nPsychologist: {row['Response']}"

    # Create different types of questions
    qa_pairs.append({
        "context": context,
        "question": "What was the patient's concern?",
        "answer": row['Context']
    })

    qa_pairs.append({
        "context": context,
        "question": "What was the psychologist's response?",
        "answer": row['Response']
    })

    qa_pairs.append({
        "context": context,
        "question": "What mental health advice was given?",
        "answer": row['Response']
    })

# Convert back to Hugging Face dataset
from datasets import Dataset
dataset = Dataset.from_pandas(pd.DataFrame(qa_pairs))

# Split dataset
dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
# Loading the Base Model and Tokenizer
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Data Preprocessing
def preprocess_for_bert(examples):
    # Format: [CLS] question [SEP] context [SEP]
    inputs = tokenizer(
        examples["question"],
        examples["context"],
        max_length=512,
        truncation="only_second",
        padding="max_length",
        return_offsets_mapping=True
    )

    # Answer position handling
    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answer"]
    start_positions = []
    end_positions = []

    for i, offsets in enumerate(offset_mapping):
        answer = answers[i]
        context = examples["context"][i]

        # Find answer start/end in original text
        start_char = context.find(answer)
        end_char = start_char + len(answer) if start_char != -1 else -1

        # If answer not found or invalid, mark as unanswerable
        if start_char == -1 or end_char > len(context):
            start_positions.append(0)
            end_positions.append(0)
            continue

        # Find token start index
        token_start_index = 0
        while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
            token_start_index += 1
        token_start_index -= 1  # Move back to the correct token

        # Find token end index
        token_end_index = len(offsets) - 1
        while token_end_index >= 0 and offsets[token_end_index][1] >= end_char:
            token_end_index -= 1
        token_end_index += 1  # Move forward to the correct token

        start_positions.append(token_start_index)
        end_positions.append(token_end_index)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

# Apply preprocessing to datasets
tokenized_train = train_dataset.map(preprocess_for_bert, batched=True)
tokenized_eval = eval_dataset.map(preprocess_for_bert, batched=True)


Map:   0%|          | 0/8428 [00:00<?, ? examples/s]

Map:   0%|          | 0/2108 [00:00<?, ? examples/s]

In [6]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=16,  # Scaling factor
    target_modules=["query", "value"],  # BERT's attention matrices
    lora_dropout=0.1,
    bias="none",
    task_type="QUESTION_ANS"
)

model = get_peft_model(model, lora_config)

In [7]:
from transformers import TrainingArguments, Trainer
import torch
import numpy as np
from evaluate import load

# Load evaluation metric
squad_metric = load("squad")

def compute_metrics(p):
    start_logits, end_logits = p.predictions
    start_positions, end_positions = p.label_ids  # ✅ Fix: unpack tuple

    # Convert logits to predictions
    all_start_preds = np.argmax(start_logits, axis=1)
    all_end_preds = np.argmax(end_logits, axis=1)

    exact_matches = []
    f1_scores = []

    for i in range(len(start_positions)):
        if start_positions[i] == 0 and end_positions[i] == 0:
            # No answer case
            exact_matches.append(0)
            f1_scores.append(0)
            continue

        # ✅ Use eval dataset for evaluation
        input_ids = tokenized_eval["input_ids"][i]

        pred_answer = tokenizer.decode(
            input_ids[all_start_preds[i]:all_end_preds[i]+1],
            skip_special_tokens=True
        )
        true_answer = tokenizer.decode(
            input_ids[start_positions[i]:end_positions[i]+1],
            skip_special_tokens=True
        )

        exact_matches.append(1 if pred_answer == true_answer else 0)

        pred_tokens = pred_answer.split()
        true_tokens = true_answer.split()
        common_tokens = set(pred_tokens) & set(true_tokens)
        precision = len(common_tokens) / len(pred_tokens) if pred_tokens else 0
        recall = len(common_tokens) / len(true_tokens) if true_tokens else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0
        f1_scores.append(f1)

    return {
        "exact_match": np.mean(exact_matches),
        "f1": np.mean(f1_scores)
    }

# ✅ TrainingArguments
training_args = TrainingArguments(
    output_dir="./bert_mental_health_qa",
    learning_rate=3e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=1,
    weight_decay=0.01,
    eval_strategy="epoch",  # Fixed name
    save_strategy="epoch",
    load_best_model_at_end=True,
    fp16=True,
    gradient_accumulation_steps=2,
    report_to="none",  # Fully disables wandb, tensorboard, etc.
)

# ✅ Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


  trainer = Trainer(
No label_names provided for model class `PeftModelForQuestionAnswering`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [8]:
import os
os.environ["WANDB_DISABLED"] = "true"

# Train the model
trainer.train()

# Save the model (Take hell lot of time so commented)
#model.save_pretrained("./mental_health_qa_peft")
#tokenizer.save_pretrained("./mental_health_qa_peft")

# Evaluate
# final_metrics = trainer.evaluate()
# print("Final evaluation metrics:", final_metrics)

Epoch,Training Loss,Validation Loss,Exact Match,F1
0,5.5059,,0.056926,0.298959


TrainOutput(global_step=1053, training_loss=2.614368406116453, metrics={'train_runtime': 1147.8525, 'train_samples_per_second': 7.342, 'train_steps_per_second': 0.917, 'total_flos': 2208835568369664.0, 'train_loss': 2.614368406116453, 'epoch': 0.9995253915519696})

In [9]:
from transformers import pipeline

# Create QA pipeline
qa_pipeline = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

# Example usage
counseling_context = """
Patient: I've been having trouble sleeping because of work stress.
Psychologist: Sleep issues are common with stress. Have you tried establishing
a bedtime routine? Relaxation techniques like deep breathing before bed can also help.
"""

questions = [
    "What is the patient's problem?",
    "What sleep advice did the psychologist give?",
    "What relaxation technique was suggested?"
]

for question in questions:
    result = qa_pipeline(question=question, context=counseling_context)
    print(f"Q: {question}")
    print(f"A: {result['answer']} (score: {result['score']:.2f})")
    print()

Device set to use cuda:0
The model 'PeftModelForQuestionAnswering' is not supported for question-answering. Supported models are ['AlbertForQuestionAnswering', 'BartForQuestionAnswering', 'BertForQuestionAnswering', 'BigBirdForQuestionAnswering', 'BigBirdPegasusForQuestionAnswering', 'BloomForQuestionAnswering', 'CamembertForQuestionAnswering', 'CanineForQuestionAnswering', 'ConvBertForQuestionAnswering', 'Data2VecTextForQuestionAnswering', 'DebertaForQuestionAnswering', 'DebertaV2ForQuestionAnswering', 'DiffLlamaForQuestionAnswering', 'DistilBertForQuestionAnswering', 'ElectraForQuestionAnswering', 'ErnieForQuestionAnswering', 'ErnieMForQuestionAnswering', 'FalconForQuestionAnswering', 'FlaubertForQuestionAnsweringSimple', 'FNetForQuestionAnswering', 'FunnelForQuestionAnswering', 'GPT2ForQuestionAnswering', 'GPTNeoForQuestionAnswering', 'GPTNeoXForQuestionAnswering', 'GPTJForQuestionAnswering', 'IBertForQuestionAnswering', 'LayoutLMv2ForQuestionAnswering', 'LayoutLMv3ForQuestionAnswer

Q: What is the patient's problem?
A: I've been having trouble sleeping because of work stress.
Psychologist (score: 0.01)

Q: What sleep advice did the psychologist give?
A: Sleep issues are common (score: 0.02)

Q: What relaxation technique was suggested?
A: Sleep issues are common (score: 0.02)

