In [1]:
import sys, os
os.chdir("..")

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
id = "google/gemma-2-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [4]:
tokenizer = AutoTokenizer.from_pretrained(f"models/{id}")
model = AutoModelForCausalLM.from_pretrained(
    f"models/{id}",
    quantization_config=bnb_config,
    device_map={"": 0},
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.08s/it]


In [5]:
text = "Quote: Imagination is more"
device = "cuda"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=32)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is more important than knowledge. Knowledge is limited. Imagination encircles the world.

Albert Einstein

The world is a book, and those who do not travel read only


In [6]:
from datasets import load_dataset
dataset = "Abirate/english_quotes"
data = load_dataset(f"datasets/{dataset}")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [7]:
from peft import LoraConfig, PeftModel

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [8]:
import transformers
from trl import SFTTrainer

def formatting_func(example):
    text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}\nTag: {example['tag'][0]}\n\n"
    return [text]

trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=8,
        learning_rate=1e-4,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
)
trainer.train()

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
1,2.8787
2,1.8749
3,2.4913
4,2.7484
5,2.0117
6,2.2027
7,2.9355
8,1.9252


TrainOutput(global_step=8, training_loss=2.3835338950157166, metrics={'train_runtime': 56.2027, 'train_samples_per_second': 0.569, 'train_steps_per_second': 0.142, 'total_flos': 13772211646464.0, 'train_loss': 2.3835338950157166, 'epoch': 0.012759170653907496})

In [9]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, PeftModel

id = "google/gemma-2-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [10]:
tokenizer = AutoTokenizer.from_pretrained(
    f"models/{id}",
)
model = AutoModelForCausalLM.from_pretrained(
    f"models/{id}",
    quantization_config=bnb_config,
    # torch_dtype=torch.bfloat16,
    device_map={"":0},
)
model = PeftModel.from_pretrained(model, "outputs/checkpoint-8")

text = "Quote: Imagination is"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=32)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.36s/it]


Quote: Imagination is more important than knowledge. Knowledge is limited. Imagination encircles the world.

Albert Einstein

The world is a book, and those who do not travel read
