# 1.0 Install Packages and Import Libraries

In [1]:
!pip install -q -U bitsandbytes transformers peft accelerate datasets einops evaluate trl rouge_score wandb

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.4/293.4 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.3/20.3 MB[0m [31m37.0 MB/s[0m eta [36m0:00:

In [2]:
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig
)
from tqdm import tqdm
from trl import SFTTrainer
import torch
import time
import pandas as pd
import numpy as np
from huggingface_hub import interpreter_login
from huggingface_hub import login
import wandb
import os

# # Login to HuggingFace
# interpreter_login()

# Login to Huggingface
api_token = ""
login(token=api_token)

In [3]:
# Login to weights and biases (to track training metrics)
# # wandb.login()
# # %env WANDB_PROJECT=Fine-Tune-QLoRA

#---------------------------------------------------------------

# Set your W&B API key here
os.environ["WANDB_API_KEY"] = ""

# Log in to W&B
wandb.login()

# Set your W&B project
os.environ["WANDB_PROJECT"] = "Fine-Tune-MentalHealth"

print("Successfully logged into Weights & Biases!")

[34m[1mwandb[0m: Currently logged in as: [33m0132114[0m ([33m0132114-uow-malaysia[0m). Use [1m`wandb login --relogin`[0m to force relogin


Successfully logged into Weights & Biases!


# 2.0 Load the Processed Dataset

In [4]:
from google.colab import drive
from datasets import load_from_disk

drive.mount('/content/drive')

# Load datasets
train_dataset = load_from_disk('/content/drive/My Drive/mental_health_dataset/hf_train_dataset_v4')
val_dataset = load_from_disk('/content/drive/My Drive/mental_health_dataset/hf_val_dataset_v4')

print("Datasets loaded!")

Mounted at /content/drive
Datasets loaded!


In [5]:
print(train_dataset)
print(val_dataset)

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 796
})
Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 99
})


# 3.0 Configure Bits and Bytes

In [6]:
# Ensure the computation uses 16-bit floating-point (reduce memory usage, speed up training)
compute_dtype = getattr(torch, "float16")

# Configure Bits and Bytes to load the model in 4-bit (quantized)
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, # Load the weights in 4 bit
        bnb_4bit_quant_type='nf4', # Use nf4 datatype
        bnb_4bit_compute_dtype=compute_dtype, # Uses 16-bit floating-point (float16)
        bnb_4bit_use_double_quant=True, # Enable double quantization
    )

# 4.0 Load the Pretrained Model in 4-bit (Quantized)

In [7]:
# Load the pretrained model, 'meta-llama/Llama-3.2-1B-Instruct' required authorization
base_model_name = 'Qwen/Qwen2.5-1.5B-Instruct'
device_map = "auto" #{"": 0}
base_model = AutoModelForCausalLM.from_pretrained(base_model_name,
                                                      device_map=device_map,
                                                      quantization_config=bnb_config, # To load in 4-bit and double quantization
                                                      trust_remote_code=True,
                                                      use_cache = False,
                                                      use_auth_token=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

# 5.0 Configure the Tokenizer

In [8]:
# Configure the tokenizer, use left-padding to optimize memory usage during training.
tokenizer = AutoTokenizer.from_pretrained(base_model_name,
                                          trust_remote_code=True,
                                          padding_side="left",
                                          add_eos_token=True,
                                          add_bos_token=True,
                                          use_fast=False)

tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

# 6.0 Test the Base Model's Response

In [9]:
# Insert prompt
prompt = "I've been married for 3 years and I have two kids. During my 3-years married, my husband cheated on me twice. The second time really got to me and at my lowest I hurt him back. We aren't communicating as well as I would like. What do I do?"

# Format the prompt
messages = [
    {"role": "system", "content": "You are a helpful mental health therapist."},
    {"role": "user", "content": prompt}
]

# Apply chat template
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False, # Keep text as string
    add_generation_prompt=True # Adds additional instructions (if needed)
)

# Tokenize the text
model_inputs = tokenizer([text], return_tensors="pt").to(base_model.device)

# Generate response
generated_ids = base_model.generate(
    **model_inputs,
    max_new_tokens=512,
    temperature=0.9
)

# Get the generated tokens
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

# Decode the tokens into text
base_response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

print("BASE MODEL RESPONSE \n============================================== \n", base_response)

BASE MODEL RESPONSE 
 It's understandable that you're feeling very emotional about the cheating situation. Here are some steps you can consider:

1. **Reflect on Your Own Feelings**: It might help to talk about how you feel with someone who is close to you or a professional if needed.

2. **Communicate Clearly**: You mentioned not being able to communicate effectively. Try setting up regular check-ins where both of you can express your feelings without fear of judgment. Make sure these conversations are safe spaces for each other.

3. **Seek Support**: Consider talking to friends or family members who understand what you’re going through. They can offer different perspectives and support.

4. **Talk About the Cheating in a Safe Environment**: If discussing it feels uncomfortable, consider writing down your thoughts and feelings in a journal or seeking a therapist to process this emotionally difficult experience.

5. **Work on Communication**: Once you feel more comfortable, work on imp

# 8.0 Fine Tuning

## 8.1 Configure LoRA and Initialize LoRA adapter (LoRA trainable version of the model)
- LoRA adapter: 2 smaller matrices that are fine tuned

In [10]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Configure the LoRA parameters
config = LoraConfig(
    r=64, # Rank, no. of parameters trained (E.g., for a 512x512 (262144) matrix, if rank = 64, the LoRA adapter uses 512x64 and 64x512 parameters.)
    lora_alpha=128, # Alpha, how much the model adapts to the new training data.
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj'
        # 'lora_magnitude_vector'
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
    use_dora=True
)

# Enable gradient checkpointing to reduce memory usage during fine-tuning
base_model.gradient_checkpointing_enable()

# Prepare the base model for QLoRA
base_model = prepare_model_for_kbit_training(base_model)

# Get the LoRA trainable version of the model (LoRA adapter)
peft_model = get_peft_model(base_model, config)

# Check the no. of trainable parameters
peft_model.print_trainable_parameters()

trainable params: 11,984,896 || all params: 1,555,699,200 || trainable%: 0.7704


## 8.2 Define 'TrainingArguments' and Create 'Trainer' Instance

In [11]:
import transformers

# Define the output directory
output_model_name = f'Qwen2.5-Mental-Health-Bot-1.5B-{time.strftime("%Y%m%d")}'
output_dir = f'./{output_model_name}'

# Define the training arguments
peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=50, # For the first n steps, learning rate slowly increases
    per_device_train_batch_size=4,
    per_device_eval_batch_size=2, # evaluation batch size
    gradient_accumulation_steps=2, # Updates model every n batch
    num_train_epochs=5,
    learning_rate=5e-4, #(0.00002)
    optim="paged_adamw_8bit", # Optimizer type used to update weights
    logging_steps=25, # Log the loss output every n steps
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=10, # Save model every 10 steps
    eval_strategy="steps", # evaluation strategy (High GPU RAM)
    eval_steps=25, # evaluation steps (High GPU RAM)
    do_eval=True,
    gradient_checkpointing=True,
    report_to="wandb",
    overwrite_output_dir = 'True',
    group_by_length=True,
    fp16=True,
)

# Disable caching to save memory
peft_model.config.use_cache = False

# Create the 'Trainer' instance
peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

## 8.3 Start Training

In [12]:
# To save memory
del base_model
del bnb_config
torch.cuda.empty_cache()

In [13]:
# Start training the model
peft_trainer.train()

# Stop reporting to wandb
wandb.finish()



[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss,Validation Loss
25,4.8691,2.222017
50,4.4199,2.184804
75,4.3734,2.161594
100,4.282,2.148523
125,4.0865,2.166323
150,4.1364,2.143859
175,4.1442,2.130929
200,4.0243,2.123144
225,3.7003,2.17418
250,3.7287,2.152219


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Tr

0,1
eval/loss,▅▄▃▂▃▂▁▁▃▂▂▂▆▇▆▅▇█▆
eval/runtime,▁▂█▃▄▅▃▄▃▂▃▃▃▃▃▃▄▇▂
eval/samples_per_second,█▆▁▆▅▄▆▅▆▇▅▅▅▆▅▆▅▂▇
eval/steps_per_second,█▆▁▆▅▄▆▅▆▇▅▅▅▆▅▆▅▂▇
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▂▁▁▂▂▂▂▄▄▄▅▆▇▇█▅▆▆
train/learning_rate,▄██▇▇▆▆▆▅▅▄▄▃▃▃▂▂▁▁
train/loss,█▆▆▆▅▅▅▅▄▄▄▃▃▃▂▂▁▁▁

0,1
eval/loss,2.23206
eval/runtime,13.0799
eval/samples_per_second,7.569
eval/steps_per_second,3.823
total_flos,1.0997030135881728e+16
train/epoch,4.95477
train/global_step,495.0
train/grad_norm,1.40505
train/learning_rate,2e-05
train/loss,2.9635


In [14]:
# Free memory for merging weights
del peft_trainer
torch.cuda.empty_cache()

# 9.0 Merge Fine Tuned LoRA Adapter to the Base Model

In [15]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Configure Bits and Bytes to load the model in 4-bit (quantized)
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, # Load the weights in 4 bit
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype, # Uses 16-bit floating-point (float16)
        bnb_4bit_use_double_quant=True, # Enable double quantization
    )

base_model_name = 'Qwen/Qwen2.5-1.5B-Instruct'
base_model = AutoModelForCausalLM.from_pretrained(base_model_name,
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)



In [16]:
eval_tokenizer = AutoTokenizer.from_pretrained(base_model_name, add_bos_token=True, trust_remote_code=True, use_fast=False)
eval_tokenizer.pad_token = eval_tokenizer.eos_token

In [17]:
from peft import PeftModel

final_dir = f'/content/{output_model_name}/checkpoint-495'

# Get the LoRA adapter
ft_model = PeftModel.from_pretrained(base_model, final_dir, torch_dtype=torch.float16, is_trainable=False)

# Merge the LoRA adapter with the base model and save the merged model
lora_merged_model = ft_model.merge_and_unload()



# 10.0 Test the Fine Tuned Model

In [18]:
# Insert prompt
prompt = "I've been married for 3 years and I have two kids. During my 3-years married, my husband cheated on me twice. The second time really got to me and at my lowest I hurt him back. We aren't communicating as well as I would like. What do I do?"

# Format the prompt
messages = [
    {"role": "system", "content": "You are a helpful mental health therapist."},
    {"role": "user", "content": prompt}
]

# Apply chat template
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False, # Keep text as string
    add_generation_prompt=True # Adds additional instructions (if needed)
)

# Tokenize the text
model_inputs = tokenizer([text], return_tensors="pt").to(lora_merged_model.device)

# Generate response
generated_ids = lora_merged_model.generate(
    **model_inputs,
    max_new_tokens=512,
    temperature=0.5
)

# Get the generated tokens
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

# Decode the tokens into text
ft_response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

print("BASE RESPONSE \n============================================== \n", base_response)
print("\n--------------------------------------------------------------------------------------------------------------------- \n")
print("FINE TUNED RESPONSE \n============================================== \n", ft_response)

BASE RESPONSE 
 It's understandable that you're feeling very emotional about the cheating situation. Here are some steps you can consider:

1. **Reflect on Your Own Feelings**: It might help to talk about how you feel with someone who is close to you or a professional if needed.

2. **Communicate Clearly**: You mentioned not being able to communicate effectively. Try setting up regular check-ins where both of you can express your feelings without fear of judgment. Make sure these conversations are safe spaces for each other.

3. **Seek Support**: Consider talking to friends or family members who understand what you’re going through. They can offer different perspectives and support.

4. **Talk About the Cheating in a Safe Environment**: If discussing it feels uncomfortable, consider writing down your thoughts and feelings in a journal or seeking a therapist to process this emotionally difficult experience.

5. **Work on Communication**: Once you feel more comfortable, work on improving

In [19]:
print("Hi there. Thank you reaching out for help. It takes a lot of courage for someone that is being abused to reach out for help. I want you to know that I am here to help you in any way that I can.\xa0First and foremost, you did not mention whether or not you had children yourself by this man. I’m going to assume there are. So, the most important thing right at this moment is asking yourself, if you and your children are safe? Is the boyfriend currently living in the home or is he coming and going? If he is coming and going or currently living in the home, my suggestion for you to is to pack a bag for you and your children and go somewhere safe (i.e. family members home, friend or a shelter temporarily). I cannot stress this enough. Although, some people tend to minimize their abusers behavior (i.e. they only do it when they are drinking, they only do it when they are mad), it’s those individuals that find themselves or their children in a serious situation. So, you can never take abuse (physical or mental) lightly because you never know what is going through that persons mind at that particular time. It’s always better to be safe than sorry.\xa0Secondly, it will not be safe, helpful or productive to attempt to talk to the abuser about your relationship. Abusers often times try and manipulate you into staying by apologizing or attempting to justify their behaviors. This is the time you would want to go see a magistrate in order to get a protective order for you and your children.It’s unfortunate that your significant other was involved with another women causing pregnancy; however, that is no longer your concern. Your concern at this point is keeping yourself and your children safe. I would also suggest counseling for you and your children in order to work through these issues. I hope this was helpful and please stay safe and take care of yourself.")

Hi there. Thank you reaching out for help. It takes a lot of courage for someone that is being abused to reach out for help. I want you to know that I am here to help you in any way that I can. First and foremost, you did not mention whether or not you had children yourself by this man. I’m going to assume there are. So, the most important thing right at this moment is asking yourself, if you and your children are safe? Is the boyfriend currently living in the home or is he coming and going? If he is coming and going or currently living in the home, my suggestion for you to is to pack a bag for you and your children and go somewhere safe (i.e. family members home, friend or a shelter temporarily). I cannot stress this enough. Although, some people tend to minimize their abusers behavior (i.e. they only do it when they are drinking, they only do it when they are mad), it’s those individuals that find themselves or their children in a serious situation. So, you can never take abuse (phys

# 11.0 Push to HuggingFace

In [20]:
lora_merged_model.save_pretrained("merged",safe_serialization=True)
tokenizer.save_pretrained("merged")

pushed_model_name = "Qwen2.5-Mental-Health-Bot-1.5B-v3.0"

# Push merged model to the hub
lora_merged_model.push_to_hub(pushed_model_name) # the name of the model you want
tokenizer.push_to_hub(pushed_model_name)

model.safetensors:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/hezronling/Qwen2.5-Mental-Health-Bot-1.5B-v3.0/commit/d325c41ae40a80626ba1129191b9d9e7ca746033', commit_message='Upload tokenizer', commit_description='', oid='d325c41ae40a80626ba1129191b9d9e7ca746033', pr_url=None, repo_url=RepoUrl('https://huggingface.co/hezronling/Qwen2.5-Mental-Health-Bot-1.5B-v3.0', endpoint='https://huggingface.co', repo_type='model', repo_id='hezronling/Qwen2.5-Mental-Health-Bot-1.5B-v3.0'), pr_revision=None, pr_num=None)

In [None]:
from google.colab import runtime
runtime.unassign()