In [2]:
! pip install -r /content/requirements.txt

Collecting bitsandbytes (from -r /content/requirements.txt (line 3))
  Using cached bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting trl (from -r /content/requirements.txt (line 4))
  Downloading trl-0.16.1-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->-r /content/requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->-r /content/requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->-r /content/requirements.txt (line 1))
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->-r /content/requirements.txt (line 1))
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-no

In [50]:
import os
from dataclasses import dataclass, field
from typing import Optional

In [51]:
import torch as tf
from datasets import load_dataset, load_from_disk
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments
)

In [52]:
from tqdm.notebook import tqdm
from trl import SFTTrainer
from huggingface_hub import interpreter_login


In [54]:
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

Enter your token (input will not be visible): ··········
Add token as git credential? (Y/n) n


In [68]:
dataset = load_dataset("Amod/mental_health_counseling_conversations",split ="train")

In [69]:
import pandas as pd

df = pd.DataFrame(dataset)

In [70]:
df.head()

Unnamed: 0,Context,Response
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...


In [71]:
def format_row(row):
  question = row["Context"]
  answer = row["Response"]
  formatted_string = f"[INST] {question}[/INST] {answer}"
  return formatted_string



In [72]:
df["formatted"] = df.apply(format_row,axis=1)

In [73]:
new_df = df.rename(columns = {"formatted":"text"})

In [74]:
new_df


Unnamed: 0,Context,Response,text
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb...",[INST] I'm going through some things with my f...
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see...",[INST] I'm going through some things with my f...
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...,[INST] I'm going through some things with my f...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...,[INST] I'm going through some things with my f...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...,[INST] I'm going through some things with my f...
...,...,...,...
3507,My grandson's step-mother sends him to school ...,Absolutely not! It is never in a child's best ...,[INST] My grandson's step-mother sends him to ...
3508,My boyfriend is in recovery from drug addictio...,I'm sorry you have tension between you and you...,[INST] My boyfriend is in recovery from drug a...
3509,The birth mother attempted suicide several tim...,"The true answer is, ""no one can really say wit...",[INST] The birth mother attempted suicide seve...
3510,I think adult life is making him depressed and...,How do you help yourself to believe you requir...,[INST] I think adult life is making him depres...


In [75]:
text = new_df["text"]

In [76]:
text.to_csv("/content/formated_data.csv",index = False)

In [77]:
train_dataset = load_dataset("csv",data_files = "/content/formated_data.csv",split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [78]:
train_dataset

Dataset({
    features: ['text'],
    num_rows: 3512
})

## FineTunnig

In [84]:
base_model = "microsoft/phi-2"
new_model = "phi2-mental-health"

tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast = True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4", #this is a datatype of quantization it's fullform is normalized float 4
    bnb_4bit_compute_dtype = tf.float16,
    bnb_4bit_use_double_quant = False
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config = bnb_config,
    trust_remote_code = True,
    flash_attn = True,
    flash_rotary = True,
    fused_dense = True,
    low_cpu_mem_usage = True,
    device_map = {"":0},
    revision = "refs/pr/23"
)

model.config.use_cache = False
model.config.pretraining_tp = 1

model = prepare_model_for_kbit_training(model,use_gradient_checkpointing = True)

training_arguments = TrainingArguments(
    output_dir = "/content/mhGPT",
    num_train_epochs = 2,
    per_device_train_batch_size = 3,
    gradient_accumulation_steps = 32,
    eval_strategy = "no",
    eval_steps = 1500,
    logging_steps = 15,
    optim = "paged_adamw_8bit",
    learning_rate = 2e-4,
    lr_scheduler_type = "cosine",
    save_steps = 1500,
    warmup_ratio = 0.05,
    weight_decay = 0.01,
    max_steps = -1
)

peft_config = LoraConfig(
    r = 32,
    lora_alpha = 64,
    lora_dropout = 0.05,
    bias = "none",
    task_type = "CAUSAL_LM",
    target_modules = ["Wqkv","fc1","fc2"]

)

trainer = SFTTrainer(
    model = model,
    train_dataset = train_dataset,
    peft_config = peft_config,
    args = training_arguments

)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 250.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 158.12 MiB is free. Process 5370 has 14.58 GiB memory in use. Of the allocated memory 13.32 GiB is allocated by PyTorch, and 1.13 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [83]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 100.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 58.12 MiB is free. Process 5370 has 14.68 GiB memory in use. Of the allocated memory 13.37 GiB is allocated by PyTorch, and 1.18 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)