In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [2]:
import wandb
from datasets import load_dataset, get_dataset_config_names
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
from trl import SFTTrainer

In [3]:
model_name = '/mnt/d/Models/LLaMA2' #'meta-llama/Llama-2-7b-hf'
dataset_name = 'openai/gsm8k'

In [4]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Currently logged in as: [33malexguha[0m ([33malexguha-wb[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [7]:
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [8]:
configs = get_dataset_config_names(dataset_name)
dataset = load_dataset(dataset_name, configs[0], split='train')
# print(dataset)
print(dataset[0])

{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}


In [9]:
def format(examples):
    output = []
    for i in range(len(examples['question'])):
        output.append('Question: ' + examples['question'][i] + '\n\nAnswer: ' + examples['answer'][i])
    return output

In [10]:
peft_params = LoraConfig(
    lora_alpha=32, # Run 10: 64
    r=16, # Run 10: 32
    bias="none",
    task_type="CAUSAL_LM",
    use_dora=True,
    target_modules="all-linear"
)

In [11]:
run_num = '12'
training_arguments = TrainingArguments(
    output_dir='/mnt/d/Models/LLaMA2/LLaMA2-7b-GSM8k-' + run_num, #"/home/aguha6/CSE_576/LLaMA2-7b-GSM8k-10",
    num_train_epochs=1,
    per_device_train_batch_size=1, # ~8
    gradient_accumulation_steps=8, # 1
    learning_rate=1e-4,
    weight_decay=0.001,
    fp16=True,
    optim='paged_adamw_8bit',
    report_to="wandb",
    logging_steps=4, # Ensure per_device_train_batch_size * gradient_accumulation_steps * logging_steps ~= 50
    run_name="llama-2-7b-GSM8k-" + run_num,
    torch_empty_cache_steps=8 # Run 9: 40
)

In [12]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=training_arguments,
    peft_config=peft_params,
    formatting_func=format,
    max_seq_length=2048,
)
print(trainer.train_dataset)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 7473
})


In [None]:
trainer.train()

In [14]:
trainer.save_model("/mnt/d/Models/LLaMA2/LLaMA2-7b-GSM8k-" + run_num)

In [15]:
wandb.finish()

0,1
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▃▆█▂▂▃▃▂▁▃▃▂▂▂▃▃▂▂▃▃▂▃▃▃▃▃▃▅▃▄▃▃▃▃▃▃▂▄▂▃
train/learning_rate,███▇▇▇▇▇▇▆▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁
train/loss,█▅▃▃▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▃▁▁▂▂▂▁▁▁▁▂

0,1
total_flos,1.0155934281867264e+17
train/epoch,1.0
train/global_step,623.0
train/grad_norm,0.42427
train/learning_rate,0.0
train/loss,0.8268
train_loss,0.83733
train_runtime,780.6346
train_samples_per_second,9.573
train_steps_per_second,0.798
