In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets

In [None]:
! huggingface-cli login

In [None]:
import bitsandbytes as bnb
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)

In [None]:
def get_lora_modules(
    model: AutoModelForCausalLM,
    module_type: type[torch.nn.Linear] = bnb.nn.Linear4bit,
) -> list[str]:
    """
    Return a list of the modules to be tuned using LoRA.

    The LoRA layers are typically the linear layers of the model.
    """
    lora_module_names = set()

    for name, module in model.named_modules():
        if isinstance(module, module_type):
            names = name.split(".")
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    return [name for name in list(lora_module_names) if name != "lm_head"]

In [None]:
# Set model and LoRA hyperparameters
MODEL_ID = "meta-llama/Llama-2-13b-hf"
GRADIENT_CHECKPOINTING = True
LORA_TARGET_LAYER = bnb.nn.Linear4bit

# Set quantization hyperparameters
QUANTIZATION_CONFIG = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Set dataset hyperparameters
DATASET = "Abirate/english_quotes"
TARGET_COLUMN = "quote"

# Set training hyperparameters
TRAINING_CONFIG = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    max_steps=1_000,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    output_dir="outputs",
    optim="paged_adamw_8bit",
)

In [None]:
# Download the tokenizer and set the pad token (needed for trainer class, no value by default)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Download the model and prepare for fine-tuning
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID, quantization_config=QUANTIZATION_CONFIG, device_map={"": 0}
)
if GRADIENT_CHECKPOINTING:
    model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
# Set LoRA hyperparameters
LORA_CONFIG = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=get_lora_modules(model, LORA_TARGET_LAYER),
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# Convert the model for LoRA
model = get_peft_model(model, LORA_CONFIG)

In [None]:
# Download and prepare data
data = load_dataset(DATASET)
data = data.map(lambda samples: tokenizer(samples[TARGET_COLUMN]), batched=True)

In [None]:
# Instantiate trainer
trainer = Trainer(
    model=model,
    train_dataset=data["train"],
    args=TRAINING_CONFIG,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False

# Fine-tune using LoRA
trainer.train()