In [1]:
import os
import gc
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer, DPOConfig
import bitsandbytes as bnb
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
new_model = "llama-3-8B-Instruct-chat-dpo"

In [2]:
def chatml_format(example):
    # format system
    if len(example["system"]) > 0:
        message = {"role": "system", "content": example["system"]}
        system = tokenizer.apply_chat_template([message], tokenize=False)
    else:
        system = ""

    # format instruction
    message = {"role": "user", "content": example["question"]}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # format chosen answer
    chosen = example["chosen"] + "<|im_end|>\n"
    # format rejected answer
    rejected = example["rejected"] + "<|im_end|>\n"

    return {
        "prompt": system+prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

dataset = load_dataset("Intel/orca_dpo_pairs")['train']
original_columns = dataset.column_names
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# format dataset
dataset = dataset.map(chatml_format, remove_columns=original_columns)
print(dataset[1])

{'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.<|im_end|>\n', 'rejected': ' Sure! Here\'s a sentence that describes all the data you provided:\n\n"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes."<|im_end|>\n', 'prompt': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.<|eot_id|><|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nGenerate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}


In [3]:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto', torch_dtype=torch.float16) # new GPUs can use bfloat16
model.config.use_cache = True # use kv cache

message = [
    {"role": "system", "content": "You are a helpful assistant chatbot"},
    {"role": "user", "content": "What is large language model?"}
]
prompt = tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=True)

# inference pipeline
pipeline = transformers.pipeline("text-generation", model=model, tokenizer=tokenizer)
print(prompt)
# text generation
sequences = pipeline(
    prompt,
    max_length=200,
    do_sample=True,
    truncation=True,
    temperature=0.7,
    top_p=0.95,
    num_return_sequences=1,
)

print(sequences[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

What is large language model?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

What is large language model?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

A large language model (LLM) is a type of artificial intelligence (AI) that is trained on a massive dataset of text to generate human-like language outputs. These models are designed to understand and generate natural language, including text, speech, and even dialogue.

LLMs are typically trained on large datasets of text, which can range from millions to billions of words. This training data is used to teach the model to recognize patterns, relationships, and contexts within language, allowing it to generate text that is co

In [5]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

training_args = DPOConfig(
    per_device_train_batch_size=1,
    max_length=1024,
    max_prompt_length=1024,
    gradient_accumulation_steps=32,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=200,
    save_strategy="no",
    logging_steps=1,
    output_dir=new_model,
    optim="adamw_torch",
    warmup_steps=100,
    bf16=False,
    fp16=True,
)

dpo_trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=.1,

)

dpo_trainer.train()



Map:   0%|          | 0/12859 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
max_steps is given, it will override any value given in num_train_epochs
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.6931
2,0.6931
3,0.6941
4,0.6932
5,0.6899
6,0.6909
7,0.6884
8,0.6859
9,0.6808
10,0.6727


KeyboardInterrupt: 

In [6]:
dpo_trainer.model.save_pretrained("trained_checkpoint")
tokenizer.save_pretrained("trained_checkpoint")

('trained_checkpoint/tokenizer_config.json',
 'trained_checkpoint/special_tokens_map.json',
 'trained_checkpoint/tokenizer.json')

In [8]:
message = [{"role": "system", "content": "You are a helpful assistant chatbot"}, {"role": "user", "content": "What is a large language model?"}]
prompt = tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=True)

# inference pipeline
pipeline = transformers.pipeline("text-generation", model=model, tokenizer=tokenizer)
print(prompt)
# text generation
sequences = pipeline(
    prompt,
    max_length=1024,
    do_sample=True,
    truncation=True,
    temperature=0.7,
    top_p=0.95,
    num_return_sequences=1,
)

print(sequences[0]['generated_text'])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

What is a large language model?<|eot_id|><|start_header_id|>assistant<|end_header_id|>




  return fn(*args, **kwargs)


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant chatbot<|eot_id|><|start_header_id|>user<|end_header_id|>

What is a large language model?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

A large language model (LLM) is a type of artificial intelligence (AI) model designed to process and generate human-like language. These models are trained on vast amounts of text data, often from the internet, books, or other sources, to learn patterns, relationships, and nuances of language.

Large language models are typically characterized by their massive size, often measured in millions or billions of parameters, which enable them to capture complex linguistic structures, idioms, and context-dependent meanings. This enables them to generate coherent, fluent, and often surprisingly accurate text responses to user input, such as chatbot conversations, language translation, and text summarization.

Some key features of large language models include:

