# Training LLaMa 7B with LLoRa DPO
1. Install dependencies
2. Load model
3. Create instruction tuned dataset
4. Create LoRa adapter
5. Save adapter
6. Run prompt

In [1]:
!pip install transformers accelerate datasets bitsandbytes peft trl



In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
import torch
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    device_map="auto",
    torch_dtype=torch.bfloat16
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

In [4]:
from datasets import load_dataset
train_ds, test_ds = load_dataset('imdb', split=['train[1%:2%]+train[-2%:-1%]', 'test[:2%]+test[-2%:]'])

sentiment_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Classify the following sentiment into a number.

### Input:
{}

### Response:"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    inputs       = examples["text"]
    outputs      = examples["label"]
    texts = []
    chosen = []
    rejected = []
    i = 0
    for input, output in zip(inputs, outputs):
        text = sentiment_prompt.format(input)
        texts.append(text)
        chosen.append(str(output))
        if i % 2 == 0:
          rejected_bad_format = "{}\n###Instructions".format(output)
          rejected.append(rejected_bad_format)
        else:
          other_answer ^= 1
          rejected_wrong_answer = "{}".format(other_answer)
          rejected.append(rejected_wrong_answer)

    return { "prompt" : texts,"chosen": chosen,"rejected": rejected }
pass

from datasets import load_dataset
original_columns = train_ds.column_names
train_ds = train_ds.shuffle(seed=42).map(formatting_prompts_func, batched = True,remove_columns=original_columns)
test_ds = test_ds.shuffle(seed=42).map(formatting_prompts_func, batched = True,remove_columns=original_columns)

In [5]:
print(train_ds)
print(test_ds)
print(train_ds["prompt"][5])
print("Chosen:","\n",train_ds["chosen"][5],"\n")
print("Rejected:","\n",train_ds["rejected"][5],"\n")


Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 500
})
Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 1000
})
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Classify the following sentiment into a number.

### Input:
This is the kind of movie that leaves you with one impression.. Story writing IS what movie making is about. <br /><br />Incredible visual effects.. Very good acting, especially from Shue. Everything is perfect.. Except.. The story is just poor and so, everything fails.<br /><br />Picture this, if you had the power to be invisible.. What would you do? Well, our mad scientist here (played by Kevin Bacon) could think of no other thing to do but fondle and rape women.. This is all his supposedly "genius" mind could think of. Does he try to gain extra power? No. He doesn't even bother research a way to 

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer,TrainingArguments
import torch
from peft import LoraConfig,PeftModel
from trl import DPOTrainer

args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=32,
    num_train_epochs=2,
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    optim = "adamw_8bit",
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 10,
    )

peft_config = LoraConfig(
        r=64,
        lora_alpha=16,
        lora_dropout=0.1,
        target_modules=[
            "q_proj",
            "v_proj",
            "k_proj",
            "out_proj",
            "fc_in",
            "fc_out",
            "wte",
        ],
        bias="none",
        task_type="CAUSAL_LM",
    )

trainer = DPOTrainer(
    base_model,
    ref_model=None,
    peft_config=peft_config,
    tokenizer=tokenizer,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    max_length=512,
    max_prompt_length=256,
    beta=0.1,
    args=args
)



In [7]:
trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Epoch,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen
0,0.0003,5.2e-05,-0.56449,-11.276678,1.0,10.712187,-145.256348,-20.681046,-0.410039,-0.398436
1,0.0001,4.6e-05,-0.565661,-11.351208,1.0,10.785547,-146.001678,-20.692757,-0.427204,-0.416263


TrainOutput(global_step=124, training_loss=0.04138464244422193, metrics={'train_runtime': 575.7386, 'train_samples_per_second': 1.737, 'train_steps_per_second': 0.215, 'total_flos': 0.0, 'train_loss': 0.04138464244422193, 'epoch': 1.98})

In [8]:
trainer.save_model("./lora")

In [9]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify the following sentiment into a number.

### Input:
It was a great movie!

### Response:"""
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")
with torch.cuda.amp.autocast():
  output = base_model.generate(**model_inputs,max_new_tokens=50)

In [10]:
decoded = tokenizer.decode(output[0], skip_special_tokens=True)
print(decoded)

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify the following sentiment into a number.

### Input:
It was a great movie!

### Response: 1.0


In [12]:
trainer.model.push_to_hub("denyslinkov/sentiment-lora-dpo")

adapter_model.safetensors:   0%|          | 0.00/101M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/denyslinkov/sentiment-lora-dpo/commit/b2bae6e573e331a5970016ee4f18f0c648b3566d', commit_message='Upload model', commit_description='', oid='b2bae6e573e331a5970016ee4f18f0c648b3566d', pr_url=None, pr_revision=None, pr_num=None)