In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,

)

from peft import (
    LoraConfig,
    get_peft_model,
)

from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
import torch
from accelerate import Accelerator

Wandb will be done later

In [2]:
model = "meta-llama/Llama-2-7b-hf"
access_token = "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"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token

# QLoRA config
torch_dtype = torch.float16
attn_implementation = "eager"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [3]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [4]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.2,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

Datasets preparation

In [5]:
train_dataset_url = "./llama_datasets/train.jsonl"
test_dataset_url ="./llama_datasets/test.jsonl"
validation_dataset_url ="./llama_datasets/test.jsonl"

Datasets loading

In [6]:
data_files = {
    'train': train_dataset_url,
    'test': test_dataset_url,
    'validation': validation_dataset_url
}

dataset = load_dataset('json', data_files=data_files)
train_dataset = dataset['train']
test_dataset = dataset['test']
validation_dataset = dataset['validation']

Datasets tokenization

In [7]:
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    inputs = examples['input']
    targets = examples['output']
    max_length = 2048
    model_input = tokenizer(inputs, max_length=max_length, padding="max_length", truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=max_length, padding="max_length", truncation=True)

    model_input['labels'] = labels['input_ids']
    return model_input

trained_data = train_dataset.map(tokenize_function, batched=True)
validation_data = validation_dataset.map(tokenize_function, batched=True)
test_data = test_dataset.map(tokenize_function, batched=True)

Training arguments

In [8]:
accelerator = Accelerator()
model = accelerator.prepare_model(model)

In [9]:
batch_size = 1
epochs = 5
output_dir = 't5_datasets_class1/results'
logs_dir = 't5_datasets_class1/logs'



sft_config = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    #gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=epochs,
    eval_strategy="steps",
    eval_steps=200,
    logging_steps=200,
    logging_dir=logs_dir,
    warmup_steps=100,
    logging_strategy="steps",
    learning_rate=3e-4,
    max_seq_length= 2048,
    fp16=False,
    bf16=False,   
)

Evaluation metrics

In [10]:
from evaluate import load
import numpy as np

perplexity = load("perplexity", module_type="metric")
def compute_metrics(eval_pred):
    metrics, labels = eval_pred
    predictions = np.argmax(metrics, axis=-1)

    return perplexity.compute(predictions=predictions, model_id='Llama-2-7b-hf')


In [11]:
trained_data.select(range(1))

Dataset({
    features: ['input', 'output', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 1
})

Training

In [12]:
trainer = SFTTrainer(
    model=model,
    train_dataset=trained_data,
    eval_dataset=validation_data,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=sft_config,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


saving the model

In [None]:
model_path = 't5_data/model'
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)