In [3]:
pip install -U transformers datasets accelerate peft trl bitsandbytes #Install Pre-Requisites 

Note: you may need to restart the kernel to use updated packages.


Import Essential Libs and Methods

In [4]:
import gc
import os

import torch
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format


In [6]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16

Create QloRA and LoRA COnfiguration

In [7]:
base_model = "mistralai/Mistral-7B-v0.1"
new_model = "AINovice2005/ElEmperador"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj','lm_head']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Import DataSet

In [6]:
dataset_name = "argilla/ultrafeedback-binarized-preferences-cleaned"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42).select(range(100))

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)
dataset = dataset.train_test_split(test_size=0.01)

Set Trainer Configuration and Train

In [7]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=5,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    output_dir="./results/",
    report_to="tensorboard",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(new_model)





Map:   0%|          | 0/99 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.float16.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
12,1.3902,0.997781,4.0974,0.244,0.244,-0.070662,-0.080236,1.0,0.009573,-0.802357,-0.706625,-2.976416,-3.325416,0.937102,-0.60679,0.180882
24,1.5199,0.958454,3.9151,0.255,0.255,-0.067919,-0.072639,1.0,0.00472,-0.726394,-0.679195,-3.045847,-3.352174,0.893707,-0.647478,0.093526
36,1.1397,0.93721,4.1385,0.242,0.242,-0.066121,-0.069226,1.0,0.003104,-0.692256,-0.661214,-3.069771,-3.364085,0.871003,-0.662077,0.063138
48,1.1042,0.925779,4.1545,0.241,0.241,-0.064925,-0.066685,1.0,0.00176,-0.666849,-0.649253,-3.08005,-3.362315,0.858272,-0.675065,0.036497
60,1.0224,0.921342,3.9606,0.252,0.252,-0.064503,-0.066122,1.0,0.001619,-0.661216,-0.645025,-3.081339,-3.361872,0.853701,-0.676409,0.033762




Merge the Config into the Base Model

In [10]:
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Specify the local directory path for saving the final results
local_directory = "./teamspace/studios/this_studio/Results_Final"
os.makedirs(local_directory, exist_ok=True)  # Ensure the directory exists

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

# Ensure the model and tokenizer are set up for chat formatting
model, tokenizer = setup_chat_format(model, tokenizer)

# Merge the adapter with the base model
model = PeftModel.from_pretrained(model, new_model)
model = model.merge_and_unload()

# Save the merged model and tokenizer in the specified local directory
model.save_pretrained(local_directory)
tokenizer.save_pretrained(local_directory)

print(f"Model and tokenizer saved successfully to {local_directory}")




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model and tokenizer saved successfully to ./teamspace/studios/this_studio/Results_Final


Upload the Model and Tokenizer on the Hub

In [11]:
from huggingface_hub import HfApi, HfFolder

# Define your directory and model ID
local_directory = "./teamspace/studios/this_studio/Results_Final"
model_id = "AINovice2005/ElEmperador"

# Initialize the API
api = HfApi()

# Upload the directory to the Hub
api.upload_folder(
    folder_path=local_directory,
    repo_id=model_id,
    repo_type="model"
)

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AINovice2005/ElEmperador/commit/0b21c178759ce035baea65b03b136665fa1e5353', commit_message='Upload folder using huggingface_hub', commit_description='', oid='0b21c178759ce035baea65b03b136665fa1e5353', pr_url=None, pr_revision=None, pr_num=None)