In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mentalhealth/counselchat.csv


In [2]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 
dtype = None 
load_in_4bit = True 

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(hf_token)

In [6]:
import wandb

wb_token = user_secrets.get_secret("wandb_api")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune-Llama-3.2-3B-Instruct on Counsel-Chat Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mgodwinadegbehingbe[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [7]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token, 
)

==((====))==  Unsloth 2025.2.12: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  
    loftq_config=None,
)


Unsloth 2025.2.12 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [10]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

# Define the formatting function for your dataset
def formatting_prompts_func(examples):
    questions = examples["questionText"]
    answers = examples["answerText"]
    texts = []
    for question, answer in zip(questions, answers):
        # Use the train_prompt_style to format each example without a chain-of-thought section
        text = train_prompt_style.format(question, answer) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Load your CSV dataset into a Hugging Face Dataset
from datasets import Dataset
dataset = Dataset.from_csv("/kaggle/input/mentalhealth/counselchat.csv", encoding="ISO-8859-1")

# Apply the formatting function to your dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

# Preview the formatted text
print(dataset["text"][0])


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2129 [00:00<?, ? examples/s]

Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request in a supportive and empathetic manner.

### Instruction:
You are a compassionate mental health counselor with expertise in emotional support and mental wellness. 
Please respond to the following inquiry with empathy, understanding, and practical guidance.

### Inquiry:
I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.
   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.
   How can I change my feeling of being worthless to everyone?

### Response:
If everyone thinks you're worthless, then maybe you need to find new people to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to 

In [9]:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request in a supportive and empathetic manner.

### Instruction:
You are a compassionate mental health counselor with expertise in emotional support and mental wellness. 
Please respond to the following inquiry with empathy, understanding, and practical guidance.

### Inquiry:
{}

### Response:
{}"""

In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)


Applying chat template to train dataset (num_proc=2):   0%|          | 0/2129 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/2129 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/2129 [00:00<?, ? examples/s]

In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,129 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
10,2.5436
20,2.0665
30,2.0284
40,1.9933
50,1.916
60,1.9151


In [13]:
# Save the fine-tuned model
wandb.finish()

0,1
train/epoch,▁▂▄▅▇██
train/global_step,▁▂▄▅▇██
train/grad_norm,█▁▅▇▂▂
train/learning_rate,█▇▅▄▂▁
train/loss,█▃▂▂▁▁

0,1
total_flos,3574615023845376.0
train/epoch,0.22535
train/global_step,60.0
train/grad_norm,0.26949
train/learning_rate,0.0
train/loss,1.9151
train_loss,2.07716
train_runtime,301.5842
train_samples_per_second,1.592
train_steps_per_second,0.199


In [15]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request in a supportive and empathetic manner.

### Instruction:
You are a compassionate mental health counselor with expertise in emotional support and mental wellness. 
Please respond to the following inquiry with empathy, understanding, and practical guidance.

### Inquiry:
{}

### Response:
{}"""

In [16]:
question = "I am depressed , what do i do?"


FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])



I am sorry to hear that you are depressed.  There are several things you can do to help yourself, and I will list them below.  Please talk to a mental health professional, such as a therapist or counselor, to discuss your feelings and develop a plan to help you get better.  It is very important to seek help from a mental health professional if you are depressed.  Depression can be a very serious illness that can cause you to become suicidal.  If you are feeling suicidal, please call a suicide hotline, such as the National Suicide Prevention Lifeline at 1-800-273-TALK (8255).<|eot_id|>


In [17]:
question = "A 59-year-old man presents with a fever, chills, night sweats, and generalized fatigue, and is found to have a 12 mm vegetation on the aortic valve. Blood cultures indicate gram-positive, catalase-negative, gamma-hemolytic cocci in chains that do not grow in a 6.5% NaCl medium. What is the most likely predisposing factor for this patient's condition?"

inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


The patient's symptoms of fever, chills, night sweats, and generalized fatigue are classic for endocarditis. The fact that the patient has a vegetation on the aortic valve suggests that the patient has a condition known as subacute bacterial endocarditis. The fact that the patient has a vegetation on the aortic valve is a predisposing factor for this condition. The most common predisposing factors for subacute bacterial endocarditis are a history of a heart defect, a history of intravenous drug use, and a history of a prosthetic heart valve. The fact that the patient is a 59-year-old man with a history of intravenous drug use is a predisposing factor for this patient's condition.<|eot_id|>


In [18]:
new_model_online = "GodwinSage/Mental-Health"
new_model_local = "Mental-Health"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)


('Mental-Health/tokenizer_config.json',
 'Mental-Health/special_tokens_map.json',
 'Mental-Health/tokenizer.json')

In [19]:
model.push_to_hub(new_model_online) # Online saving
tokenizer.push_to_hub(new_model_online) # Online saving

README.md:   0%|          | 0.00/615 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

Saved model to https://huggingface.co/GodwinSage/Mental-Health


  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [20]:
model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)
model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")

Unsloth: You have 2 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 2.4G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 19.04 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 28/28 [00:00<00:00, 30.52it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving Mental-Health/pytorch_model-00001-of-00002.bin...
Unsloth: Saving Mental-Health/pytorch_model-00002-of-00002.bin...
Done.


Unsloth: You are pushing to hub in Kaggle environment.
To save memory, we shall move GodwinSage/Mental-Health to /tmp/Mental-Health


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 18.99 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 28/28 [00:00<00:00, 32.05it/s]


Unsloth: Saving tokenizer...

No files have been modified since last commit. Skipping to prevent empty commit.


 Done.
Unsloth: Saving /tmp/Mental-Health/pytorch_model-00001-of-00002.bin...
Unsloth: Saving /tmp/Mental-Health/pytorch_model-00002-of-00002.bin...


  0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/GodwinSage/Mental-Health
