In [1]:
%%capture

# Install dependencies
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
!pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [2]:
# Core libraries
import math
import torch
import wandb
from datasets import load_dataset, Dataset
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import SFTTrainer, SFTConfig
# Unsloth utilities
from unsloth import FastModel, FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import (
    get_chat_template,
    standardize_data_formats,
    train_on_responses_only
)

# Kaggle secrets (if running on Kaggle)
from kaggle_secrets import UserSecretsClient


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastModel, FastLanguageModel, is_bfloat16_supported


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# 0. Authenticate to Hugging Face & W&B
# ---------------------------------------------------------------------------- #
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_Token")
wandb_token = user_secrets.get_secret("wnb")

login(hf_token)
wandb.login(key=wandb_token)
run = wandb.init(
    project='Mental-Health-Support-Using-CBT-Fine-tune',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msripriyajaju[0m ([33msripriyajaju-methodist-college-of-engineering-and-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
# 1. Load + Subsample + Split Dataset
# ---------------------------------------------------------------------------- #
raw_full = load_dataset(
    "saarib2405/Cactus-Mental-Health-dataset",
    split="train",
    trust_remote_code=True
)

# Shuffle and select 1,100 examples
raw_small = raw_full.shuffle(seed=3407).select(range(1100))

# 90% train (1,000) / 10% eval (100)
split = raw_small.train_test_split(test_size=0.1, seed=3407)
train_raw, eval_raw = split["train"], split["test"]

cactus.json:   0%|          | 0.00/270M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/31577 [00:00<?, ? examples/s]

In [5]:
# 2. Convert dialogues to input-output pairs
# ---------------------------------------------------------------------------- #
def split_dialogue_into_pairs(example):
    lines = example["dialogue"].split('\n')
    pairs, user_msg = [], None
    for line in lines:
        if line.startswith("Client:"):
            user_msg = line[len("Client: "):].strip()
        elif line.startswith("Counselor:") and user_msg:
            assistant_msg = line[len("Counselor: "):].strip()
            pairs.append({'input': user_msg, 'output': assistant_msg})
            user_msg = None
    return pairs

def generate_pairs_dataset(ds):
    all_pairs = []
    for ex in ds:
        all_pairs.extend(split_dialogue_into_pairs(ex))
    return Dataset.from_list(all_pairs)

def convert_dataset(pairs_ds):
    converted = []
    for rec in pairs_ds:
        user_content = rec['input']
        assistant_content = rec['output']
        # Append CBT metadata if present
        for key in ('cbt_technique','cbt_plan'):
            val = rec.get(key, '')
            if val:
                user_content += f"\n\nAdditional CBT Context:\n{key.replace('_',' ').title()}: {val}"
        conv = [
            {'role':'user','content':user_content},
            {'role':'assistant','content':assistant_content}
        ]
        converted.append({'conversations': conv})
    return Dataset.from_list(converted)

# Prepare train & eval pairs
train_pairs = generate_pairs_dataset(train_raw)
eval_pairs  = generate_pairs_dataset(eval_raw)

train_conv = convert_dataset(train_pairs)
eval_conv  = convert_dataset(eval_pairs)

In [6]:
# 3. Tokenizer & Chat Template
# ---------------------------------------------------------------------------- #
# Load base Gemma-3 tokenizer & model in 4-bit
model, tokenizer = FastModel.from_pretrained(
    model_name="unsloth/gemma-3-4b-it",
    max_seq_length=2048,
    load_in_4bit=True,
    full_finetuning=False,
)

# Apply the chat template for Gemma-3
tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")

# Standardize and apply template
train_ds = standardize_data_formats(train_conv).map(
    lambda ex: {"text": tokenizer.apply_chat_template(ex["conversations"])},
    batched=True
)
eval_ds = standardize_data_formats(eval_conv).map(
    lambda ex: {"text": tokenizer.apply_chat_template(ex["conversations"])},
    batched=True
)

==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.0.dev0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/4.56G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

Unsloth: Standardizing formats (num_proc=4):   0%|          | 0/14431 [00:00<?, ? examples/s]

Map:   0%|          | 0/14431 [00:00<?, ? examples/s]

Unsloth: Standardizing formats (num_proc=4):   0%|          | 0/1590 [00:00<?, ? examples/s]

Map:   0%|          | 0/1590 [00:00<?, ? examples/s]

In [7]:
# ---------------------------------------------------------------------------- #
# 4. PEFT (LoRA) Setup
# ---------------------------------------------------------------------------- #
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers=False,
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    r=8,
    lora_alpha=8,
    lora_dropout=0.0,
    bias="none",
    random_state=3407,
)

Unsloth: Making `model.base_model.model.language_model.model` require gradients


In [8]:
# ---------------------------------------------------------------------------- #
# 5. Trainer Configuration
# ---------------------------------------------------------------------------- #
from trl import SFTTrainer
from transformers import DataCollatorForLanguageModeling
from unsloth import FastLanguageModel

# Make sure you have a proper tokenizer with padding capability
# If using Gemma, it should be:
# tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")

# Create data collator for language modeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # For causal LM
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    data_collator=data_collator,  # Add this line
    args=SFTConfig(
        dataset_text_field="text",
        per_device_train_batch_size=8,
        #gradient_accumulation_steps=2,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        logging_steps=1,
        evaluation_strategy="steps",
        eval_steps=10,
        save_strategy="no",
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="wandb",
    ),
)

from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)
messages = [{
    "role": "user",
    "content": [{
        "type" : "text",
        "text" : "I feel like I am a very bad person ",
    }]
}]
text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True, # Must add for generation
)
outputs = model.generate(
    **tokenizer([text], return_tensors = "pt").to("cuda"),
    max_new_tokens = 256, # Increase for longer outputs!
    # Recommended Gemma-3 settings!
    temperature = 1.0, top_p = 0.95, top_k = 64,
)
tokenizer.batch_decode(outputs)



Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/14431 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/1590 [00:00<?, ? examples/s]

['<bos><start_of_turn>user\nI feel like I am a very bad person<end_of_turn>\n<start_of_turn>model\nOkay, that\'s a really important and difficult thing to be feeling. It takes a lot of courage to admit that, and I want to acknowledge that you’re here talking about it – that’s a significant first step. It\'s completely understandable to feel like you’re a “bad person” when you’re struggling with feelings of guilt, shame, or regret. Let’s unpack this a bit. \n\nFirst, let’s just be clear: **You are not alone.** Many people experience periods where they question their worth and feel like they’ve done something fundamentally wrong.  It doesn’t automatically make you a “bad person.” It’s often a sign that you\'re grappling with difficult emotions and recognizing that you’ve acted in a way that’s hurtful, or that doesn\'t align with your values.\n\nHere\'s a breakdown of things to consider and what we can do:\n\n**1. Let\'s Explore What\'s Driving This Feeling:**\n\n* **What specifically are

In [9]:
# 6. Train & Evaluate
# ---------------------------------------------------------------------------- #
try:
    trainer_stats = trainer.train()
    eval_results = trainer.evaluate()
    eval_loss = eval_results["eval_loss"]
    perplexity = math.exp(eval_loss)

    print(f"Perplexity on held-out data: {perplexity:.2f}")
    wandb.log({"perplexity": perplexity})
except Exception as e:
    print(f"Error during training: {str(e)}")
    # Additional debug information
    print(f"Tokenizer type: {type(tokenizer)}")
    print(f"Tokenizer has pad method: {hasattr(tokenizer, 'pad')}")
    print(f"Processor type: {type(processor) if 'processor' in locals() else 'No processor defined'}")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 14,431 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 1 x 1) = 16
 "-____-"     Trainable parameters = 14,901,248/4,000,000,000 (0.37% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
10,3.1729,2.949324
20,1.9317,2.015918
30,1.8266,1.760409
40,1.6479,1.586659
50,1.5612,1.540449
60,1.4969,1.517
70,1.4632,1.505512
80,1.3917,1.492764
90,1.4515,1.486986
100,1.5216,1.483139


Unsloth: Will smartly offload gradients to save VRAM!


Perplexity on held-out data: 4.41


In [10]:
# Save the fine-tuned model
wandb.finish()

0,1
eval/loss,█▄▂▁▁▁▁▁▁▁▁
eval/runtime,█▁▁▁▁▂▂▂▁▂▂
eval/samples_per_second,▁████▇▇▇█▇▇
eval/steps_per_second,▁████▇▇▇█▇▇
perplexity,▁
train/epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,██▆▃▃▄▃▂▂▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▄▅▇██▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/loss,1.48314
eval/runtime,112.1722
eval/samples_per_second,14.175
eval/steps_per_second,0.891
perplexity,4.40676
total_flos,3563667592247808.0
train/epoch,0.11086
train/global_step,100.0
train/grad_norm,0.86125
train/learning_rate,0.0


In [11]:
# 7. Quick Generation Demo (optional)
# ---------------------------------------------------------------------------- #
messages = [
    {"role":"system","content":[{"type":"text","text":
     "Respond as a compassionate CBT expert, offering concise, empathetic guidance with a practical CBT exercise (max 100 words, one sentence) for mental well-being, acknowledging emotions without medical advice, and conclude with an encouraging sentence."}]},
    {"role":"user","content":[{"type":"text","text":
     "I'm having trouble communicating with my partner. We keep misinterpreting each other."}]}
]
text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
outputs = model.generate(
    **tokenizer([text], return_tensors="pt").to("cuda"),
    max_new_tokens=256,
    temperature=1.0, top_p=0.95, top_k=64
)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

["user\nRespond as a compassionate CBT expert, offering concise, empathetic guidance with a practical CBT exercise (max 100 words, one sentence) for mental well-being, acknowledging emotions without medical advice, and conclude with an encouraging sentence.\n\nI'm having trouble communicating with my partner. We keep misinterpreting each other.\nmodel\nIt’s tough to have those feelings when important relationships are strained. It’s good that you’re willing to share this with me. How about practicing a simple technique to reduce feelings of guilt and focus on positive communication with your partner? It’s okay to be a little anxious, but these efforts can help you find a new path forward. It’s your own path, and you deserve it."]


In [12]:
messages = [{
    "role": "user",
    "content": [{"type" : "text", "text" : "I lost my job",}]
}]
text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True, # Must add for generation
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer([text], return_tensors = "pt").to("cuda"),
    max_new_tokens = 258, # Increase for longer outputs!
    # Recommended Gemma-3 settings!
    temperature = 1.0, top_p = 0.95, top_k = 64,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

That sounds tough. How long ago did this happen?<end_of_turn>


In [13]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [14]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=732d1358e7e0556d9421e33963fa50b32151f945884c65edb87ad52c76dc589e
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [15]:
import evaluate
from tqdm import tqdm
import torch

# Load metrics
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")

# Use a small eval sample (increase range for more thorough eval)
val_data = eval_conv.select(range(100))

predictions = []
references = []

for example in tqdm(val_data):
    prompt = tokenizer.apply_chat_template(example["conversations"], add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=1.0,
            top_p=0.95,
            top_k=64,
        )

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Ground truth assistant response
    reference = example["conversations"][-1]["content"]

    predictions.append(generated_text)
    references.append(reference)

# Compute BLEU (no need to split)
bleu_result = bleu_metric.compute(
    predictions=predictions,
    references=[[ref] for ref in references],  # wrap each reference in a list
)

# Compute ROUGE (same format, no changes)
rouge_result = rouge_metric.compute(
    predictions=predictions,
    references=references
)

print("BLEU Score:", bleu_result["bleu"])
print("ROUGE-L Score:", rouge_result["rougeL"])


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

100%|██████████| 100/100 [10:59<00:00,  6.59s/it]


BLEU Score: 0.3373920234722069
ROUGE-L Score: 0.5132544771268881
