<a href="https://colab.research.google.com/github/Sanya003/llama3-mental-health-ai/blob/main/Llama3_Mental_Health_Counseling_FineTune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Installation

In [None]:
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting xformers==0.0.29.post3
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting trl
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting cut_cross_entropy
  Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)
Collecting unsloth_zoo
  Downloading unsloth_zoo-2025.3.15-py3-none-any.whl.metadata (17 kB)
Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl (43.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.4/43.4 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.15.2-py3-none-any.whl (318 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

### Unsloth

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = torch.float16
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)

==((====))==  Unsloth 2025.3.17: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

### Data Preparation

In [None]:
prompt = """Based on the given context, generate an appropriate response

### Context:
{}

### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    contexts = examples["Context"]
    responses = examples["Response"]
    texts = []

    for i, j in zip(contexts, responses):
      text = prompt.format(i, j) + EOS_TOKEN
      texts.append(text)
    return {"text" : texts}
pass

from datasets import load_dataset
dataset = load_dataset("Amod/mental_health_counseling_conversations", split = 'train')
dataset = dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/3512 [00:00<?, ? examples/s]

In [None]:
dataset[0]

{'Context': "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.\n   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.\n   How can I change my feeling of being worthless to everyone?",
 'Response': "If everyone thinks you're worthless, then maybe you need to find new people to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to understand why you're not worthless, then go back to the same crowd and be knocked down again.There are many inspirational messages you can find in social media. \xa0Maybe read some of the ones which state that no person is worthless, and that everyone has a good purpose to their life.Also, since our culture is so saturated with the belief that if someone doesn't feel good about themselves that this is someh

### Model Training

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 8,
        warmup_steps = 10,
        num_train_epochs = 5,
        max_steps = 200,
        learning_rate = 3e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_torch",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 42,
        output_dir = "outputs",
        report_to = "none",
    ),
)

trainer_stats = trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/3512 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,512 | Num Epochs = 2 | Total steps = 200
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)


Step,Training Loss
10,2.4893
20,2.3558
30,2.2988
40,2.2747
50,2.2601
60,2.2155
70,2.1726
80,2.1495
90,2.1499
100,2.07


### Inference

In [None]:
inputs = tokenizer(
    [
        prompt.format(
            "I'm struggling with self-doubt and feeling like I'm not good enough. I keep comparing myself to others and it makes me feel worse. What should I do?",  # context
            " ",  # response
        )
    ] * 1,
    return_tensors="pt",
).to("cuda")

# Generate response
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=512, use_cache = True,
                   temperature = 0.7, min_p = 0.9)

<|begin_of_text|>Based on the given context, generate an appropriate response

### Context:
I'm struggling with self-doubt and feeling like I'm not good enough. I keep comparing myself to others and it makes me feel worse. What should I do?

### Response:
 
I'm sorry to hear that you are struggling with self-doubt. It sounds like you are comparing yourself to others and feeling worse. I would recommend that you seek out a therapist to help you work through your feelings. A therapist can help you to gain insight into your feelings and help you to develop coping skills to help you to manage your feelings. I would also recommend that you read the book "The Gifts of Imperfection" by Brené Brown. It is a great book that can help you to understand that everyone has their own unique strengths and weaknesses and that it is okay to be imperfect. I hope this helps. Be well.
<|eot_id|>


In [None]:
inputs = tokenizer(
    [
        prompt.format(
            "I have an important presentation tomorrow, but I'm extremely nervous. How do I calm down and do well?",  # context
            " ",  # response
        )
    ] * 1,
    return_tensors="pt",
).to("cuda")

FastLanguageModel.for_inference(model)

outputs = model.generate(
    input_ids = inputs["input_ids"],
    max_new_tokens = 256,
    temperature = 0.7,
    min_p = 0.9,
    repetition_penalty = 1.1,
    do_sample =True,
    use_cache = True
)
tokenizer.batch_decode(outputs)

["<|begin_of_text|>Based on the given context, generate an appropriate response\n\n### Context:\nI have an important presentation tomorrow, but I'm extremely nervous. How do I calm down and do well?\n\n### Response:\n \nThe best way to feel less anxious is to prepare for your presentation. \xa0If you are able to know what you will be presenting about, then you can organize your thoughts in a logical order so that you can deliver your message clearly and concisely. \xa0This will help you to stay focused during your presentation. \xa0Also, if you are able to practice your presentation beforehand, this will also help you to become more familiar with it and reduce anxiety. \xa0Additionally, try to focus on your strengths as a speaker and remind yourself of all the times when you have successfully presented before. \xa0Finally, take deep breaths throughout the day to help you relax.\n<|eot_id|>"]

### Saving, loading finetuned models

In [None]:
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

In [None]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model",
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

inputs = tokenizer(
    [
        prompt.format(
            "I feel overwhelmed with work. What should I do?",  # context
            " ",  # response
        )
    ] * 1,
    return_tensors="pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    input_ids = inputs["input_ids"],
    streamer = text_streamer,
    max_new_tokens = 256,
    use_cache = True,
    temperature = 0.7,
    min_p = 0.9
)

==((====))==  Unsloth 2025.3.17: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
I'm glad you're reaching out for help.  I'm not sure if you're feeling overwhelmed with work or life in general.  If you're feeling overwhelmed with work, I would suggest that you talk with your boss about your workload.  If you're feeling overwhelmed with life in general, I would suggest that you talk with a therapist about your feelings.  If you're feeling overwhelmed with both work and life, I would suggest that you talk with your boss about your workload and then talk with a therapist about your feelings.  I hope this helps.  Best of