In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,

)

from peft import (
    LoraConfig,
    get_peft_model,
)

from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
import torch
from accelerate import Accelerator

Wandb will be done later

In [2]:
model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
access_token = "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"

tokenizer = AutoTokenizer.from_pretrained(model)
# QLoRA config
torch_dtype = torch.float16
attn_implementation = "eager"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [4]:
# LoRA config
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)
accelerator = Accelerator()
model = accelerator.prepare_model(model)

Datasets preparation

In [5]:
train_dataset_url = "./small_dataset/train.jsonl"
test_dataset_url ="./small_dataset/test.jsonl"
validation_dataset_url ="./small_dataset/validation.jsonl"

Datasets loading

In [6]:
data_files = {
    'train': train_dataset_url,
    'test': test_dataset_url,
    'validation': validation_dataset_url
}

dataset = load_dataset('json', data_files=data_files)
train_dataset = dataset['train']
test_dataset = dataset['test']
validation_dataset = dataset['validation']

Datasets tokenization

In [7]:
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    inputs = examples['input']
    targets = examples['output']
    max_length = 2048
    model_input = tokenizer(inputs, max_length=max_length, padding="max_length", truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=max_length, padding="max_length", truncation=True)

    model_input['labels'] = labels['input_ids']
    return model_input

trained_data = train_dataset.map(tokenize_function, batched=True)
validation_data = validation_dataset.map(tokenize_function, batched=True)
test_data = test_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/77 [00:00<?, ? examples/s]



Training arguments

In [8]:
batch_size = 1
epochs = 5
output_dir = 't5_datasets_class1/results'
logs_dir = 't5_datasets_class1/logs'



sft_config = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    #gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=epochs,
    eval_strategy="steps",
    eval_steps=50,
    logging_steps=50,
    logging_dir=logs_dir,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    max_seq_length= 2048,
    do_eval=True, 
    bf16=True 
)

Evaluation metrics

In [9]:
from evaluate import load
import numpy as np

perplexity = load("perplexity", module_type="metric")
def compute_metrics(eval_pred):
    metrics, labels = eval_pred
    predictions = np.argmax(metrics, axis=-1)

    return perplexity.compute(predictions=predictions, model_id='Meta-Llama-3.1-8B-Instruct')


Training

In [10]:
trainer = SFTTrainer(
    model=model,
    train_dataset=trained_data,
    eval_dataset=validation_data,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=sft_config,
    compute_metrics=compute_metrics
)

In [11]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss


Step,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 8.81 GiB. GPU 

saving the model

In [None]:
model_path = 't5_model_and_result/model'
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)