In [2]:
!pip install peft

Collecting peft
  Downloading peft-0.13.0-py3-none-any.whl.metadata (13 kB)
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)
Collecting safetensors (from peft)
  Downloading safetensors-0.4.5-cp39-cp39-macosx_10_12_x86_64.whl.metadata (3.8 kB)
Downloading peft-0.13.0-py3-none-any.whl (322 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading accelerate-0.34.2-py3-none-any.whl (324 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading safetensors-0.4.5-cp39-cp39-macosx_10_12_x86_64.whl (393 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m393.1/393.1 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: safetensors, accelerate, peft
  Attempting uninstall: safetensors
    Found exist

In [1]:
import os
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging

from peft import LoraConfig, PeftModel
from trl import SFTTrainer

ModuleNotFoundError: No module named 'peft'

In [3]:
model_name = "NousResearch/Llama-2-7b-chat-hf"

dataset_name = "mlabonne/guanaco-llama2-1k"
new_model = "llama-2-7b-miniguanaco"

In [19]:
lora_r = 64
lora_alpha = 16
lora_dropout = 0.1

use_4bit = True
bnb_4bit_quant_type = "nf4"
bnb_4bit_compute_dtype = "float16"
use_nested_quant = False

out_dir = "/result"
epochs = 1

fp16 = False
bf16 = False

per_device_batch_train = 4
per_device_batch_eval = 4

gradient_accumulation = 1
gradient_checkpoint = True

max_grad_norm = 0.3
learning_rate = 0.002
weight_decay = 0.001

optimizer = "paged_adamw_32bit"
lr_scheduler = "cosine"

max_steps = -1
warmup_ratio = 0.03
group_by_length = True

save_steps = 0
logging_steps = 23

max_seq_l = None
packing = False

device_map = {"": 0}




In [9]:
dataset = load_dataset(dataset_name, split = "train")

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [10]:
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("GPU can use bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [13]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [17]:
peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    r = lora_r,
    bias = "none",
    task_type = "CAUSAL_LM"
)

In [21]:
training_arg = TrainingArguments(
    output_dir = out_dir,
    num_train_epochs = epochs,
    per_device_train_batch_size = per_device_batch_train,
    gradient_accumulation_steps = gradient_accumulation,
    optim = optimizer,
    save_steps = save_steps,
    logging_steps = logging_steps,
    learning_rate = learning_rate,
    weight_decay = weight_decay,
    fp16 = fp16,
    bf16 = bf16,
    max_grad_norm = max_grad_norm,
    max_steps = max_steps,
    warmup_ratio = warmup_ratio,
    group_by_length = group_by_length,
    lr_scheduler_type = lr_scheduler,
    report_to = "tensorboard"
)

In [22]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    peft_config = peft_config,
    dataset_text_field = "text",
    max_seq_length = max_seq_l,
    tokenizer = tokenizer,
    args = training_arg,
    packing = packing
)



Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [23]:
trainer.train()
trainer.model.save_pretrained(new_model)

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
23,1.2502
46,1.4733
69,1.3882
92,1.3456
115,1.3524
138,1.22
161,1.2838
184,1.2902
207,1.4319
230,1.2171


In [25]:
logging.set_verbosity(logging.CRITICAL)

prompt = "Can you explain the three branches of government in the United States?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] Can you explain the three branches of government in the United States? [/INST] The three branches of government in the United States are the legislative, executive, and judicial branches. These branches are established by the United States Constitution and are responsible for different aspects of the government.

The legislative branch, also known as Congress, is responsible for making laws. It is composed of two houses: the House of Representatives and the Senate. The House of Representatives has 435 members, each representing a district in one of the 50 states. The Senate has 100 members, with two senators representing each state.

The executive branch is responsible for enforcing laws. It is headed by the President, who serves as both the head of state and the head of government. The President is also the commander-in-chief of the armed forces and has the power to negotiate treaties and appoint federal judges.


