In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
!pip install -qqq -U transformers datasets accelerate peft trl bitsandbytes wandb --progress-bar off

In [None]:
import gc
import os

import torch
import wandb
from datasets import load_dataset
from google.colab import userdata
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

# Model
base_model = "mistralai/Mistral-7B-v0.1"
new_model = "test2Mistral"

# Defined in the secrets tab in Google Colab
wb_token = userdata.get('wandb')
wandb.login(key=wb_token)

# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

[34m[1mwandb[0m: Currently logged in as: [33mnatha899[0m ([33mstudent899[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
dataset = load_dataset('json', data_files='/content/dataset4pm.json', split='all')
dataset = dataset.shuffle(seed=42).select(range(1000))

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)
dataset = dataset.train_test_split(test_size=0.01)

Map (num_proc=12):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['source', 'chosen', 'rejected', 'prompt'],
        num_rows: 990
    })
    test: Dataset({
        features: ['source', 'chosen', 'rejected', 'prompt'],
        num_rows: 10
    })
})


In [None]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    beta=0.1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="wandb",
    output_dir="./results/",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(new_model)



Map:   0%|          | 0/990 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.float16.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
25,1.6517,1.833758,6.9519,1.438,0.719,-0.185139,-0.153782,0.2,-0.031358,-1.537817,-1.851393,-2.807753,-3.041391,1.741512,-0.922464,-0.395072


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
25,1.6517,1.833758,6.9519,1.438,0.719,-0.185139,-0.153782,0.2,-0.031358,-1.537817,-1.851393,-2.807753,-3.041391,1.741512,-0.922464,-0.395072
50,1.3209,1.643196,6.9545,1.438,0.719,-0.167437,-0.199176,0.9,0.031739,-1.991764,-1.67437,-2.843302,-3.081947,1.589025,-0.541711,0.370632
75,1.4863,1.572056,6.9578,1.437,0.719,-0.163718,-0.314759,1.0,0.151042,-3.147594,-1.637179,-2.85378,-3.089015,1.553152,-0.189035,1.681165
100,1.4722,1.546452,6.9494,1.439,0.719,-0.162615,-0.556699,1.0,0.394084,-5.566988,-1.626152,-2.847734,-3.080769,1.543779,-0.026731,4.163231




In [None]:
# Flush memory
del trainer, model
gc.collect()
gc.collect()
torch.cuda.empty_cache()

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)
fp16_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
fp16_model, tokenizer = setup_chat_format(fp16_model, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(fp16_model, new_model)
model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model.save_pretrained("savedMistralmodel")
tokenizer.save_pretrained("savedMistralmodel")

('savedMistralmodel/tokenizer_config.json',
 'savedMistralmodel/special_tokens_map.json',
 'savedMistralmodel/tokenizer.model',
 'savedMistralmodel/added_tokens.json',
 'savedMistralmodel/tokenizer.json')

In [None]:
import transformers
import torch

model_id = "savedMistralmodel"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
messages = [
    {"role": "system", "content": "You are an empathetic therapist."},
    {"role": "user", "content": "A friend studying psychology suggested I see my doctor to check for major depressive disorder, but I'm worried about the implications. If diagnosed, it could affect my record and possibly prevent me from pursuing a career as a counselor among other things. I'm also hesitant about taking antidepressants. Earlier this year, I had a severe reaction and passed out after taking medication prescribed for a dislocated hip. Many people I know discourage using antidepressants, believing it’s better to seek treatment without them. I'm concerned they might send me to a clinic and insist on medication if they think I can’t make decisions for myself. While I'm not suicidal, I'm also not taking good care of myself. I've heard that electroshock therapy is still used, which I'm not comfortable with. Given all this, I'm considering self-help and trying to manage on my own. I've been struggling emotionally since elementary school, and despite trying to stay positive after graduation, I still feel unwell."},
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)
print(outputs[0]["generated_text"][len(prompt):])

Seeking a diagnosis for depression can lead to tailored treatment options; discuss any concerns about medication and career impacts with a therapist. Professional guidance can complement your self-help efforts and improve your overall mental health management.
