In [1]:
!pip3 install datasets peft transformers accelerate trl torch bitsandbytes accelerate

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [51]:
import os
from dataclasses import dataclass, field
from typing import Dict, Optional

import torch
from datasets import Dataset, load_dataset
from peft import LoraConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments
from transformers import BitsAndBytesConfig

from trl import DPOTrainer

In [3]:
!pip3 install accelerate

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [56]:
MODEL_NAME = "alignment-handbook/zephyr-7b-sft-full"
DATASET_NAME = "ultrachat200k"
LR = 5e-4

LORA_R = 32
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

DPO_BETA = 1
SPIN_ITER = 4

In [60]:
from jinja2 import Template

# preprocess the ultrachat200k dataset (or any sharegpt format ds)
dataset = load_dataset("HuggingFaceH4/ultrachat_200k", split="train_sft[:40]")

tstr = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
template = Template(tstr)

def preprocess(item):
    output = template.render(messages=item["messages"], add_generation_prompt=True, eos_token='\n')
    return {"prompt": item["prompt"], "response": output}

dataset = dataset.map(preprocess, remove_columns=["messages", "prompt_id"])

In [61]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,        # "meta-llama/Llama-2-7b-hf"
    quantization_config=bnb_config,
    device_map={"": 0},
    trust_remote_code=True,
    #use_auth_token=True,
)
base_model.config.use_cache = False

# initialize peft config
peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [53]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [58]:
from transformers import pipeline
from tqdm import tqdm
import pandas as pd

# given a model, generate a synthetic 
def generate_data(llama_model, dataset, batch_size=8):
    #text_generator = pipeline("text-generation", model=llama_model, tokenizer=tokenizer)  # Use appropriate device index

    prompts = dataset["prompt"]
    responses = dataset["response"]

    gt_responses = []
    generated_prompts = []
    generated_responses = []

    for i in tqdm(range(0, len(prompts), batch_size)):
        batch_prompts = prompts[i:i+batch_size]
        batch_responses = prompts[i:i+batch_size]

        #print(batch_prompts)

        #batch_responses = text_generator(batch_prompts, max_length=512)

        encoding = tokenizer(batch_prompts, padding=True, return_tensors='pt').to('cuda:0')
        
        with torch.no_grad():
            generated_ids = model.generate(**encoding)
        generated_texts = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

        batch_generated_responses = generated_texts
        #batch_generated_responses = [response[0]['generated_text'] for response in batch_responses]

        #batch_responses.extend(batch_responses)
        #generated_prompts.extend(prompts)
        generated_responses.extend(batch_generated_responses)

    print(len(prompts))
    print(len(responses))
    print(len(generated_responses))
    generated_data = Dataset.from_pandas(pd.DataFrame({"question": prompts, "response_j": responses, "response_k": generated_responses}))

    return generated_data

In [59]:
model = base_model
orig_dataset = generate_data(model, dataset)

  0%|          | 0/50 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 0/50 [00:00<?, ?it/s]


RuntimeError: value cannot be converted to type at::Half without overflow

In [43]:
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    max_steps=2000,
    logging_steps=500,
    save_steps=2000,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    learning_rate=3e-4,
    evaluation_strategy="steps",
    eval_steps=100,
    output_dir="spin",
    report_to=None,
    #lr_scheduler_type=script_args.lr_scheduler_type,
    #warmup_steps=script_args.warmup_steps,
    #optim=script_args.optimizer_type,
    bf16=True,
    remove_unused_columns=False,
    run_name="spin",
)


In [45]:
from peft import get_peft_model

"""
model = AutoPeftModelForCausalLM.from_pretrained(
    MODEL_NAME, # location of saved SFT model
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
    is_trainable=True,
)
"""


model =  base_model
model_peft = get_peft_model(model, peft_config)

dataset = orig_dataset

for i in range(SPIN_ITER):

    model_peft = get_peft_model(model, peft_config)

    print("Training")
    dpo_trainer = DPOTrainer(
        model=model_peft,
        ref_model=None,
        args=training_args,
        beta=DPO_BETA,
        train_dataset=dataset,
        #eval_dataset=,
        tokenizer=tokenizer,
        peft_config=peft_config,
    )
    dpo_trainer.train()
    dpo_trainer.save_pretrained(f"peft_checkpoint_{i}")

    print("Generating Data")

    dataset = generate_data(peft_model, orig_dataset)

    model = peft_model.unload()
    

Training




TypeError: string indices must be integers