In [24]:
from datasets import load_dataset
from transformers import AutoTokenizer

model_name = "meta-llama/Meta-Llama-3-8B"  # or your path

# 1. Load the dataset (from JSONL)
dataset = load_dataset("json", data_files="data/finetune_champion_gen.jsonl", split="train")

# 2. Load the model's tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# 3. Tokenize
def tokenize(example):
    prompt = example["prompt"] + "\n"
    response = example["response"]
    full_prompt = prompt + response

    tokens = tokenizer(
        full_prompt,
        truncation=True,
        padding="max_length",
        max_length=1024,
    )
    labels = tokens["input_ids"].copy()
    prompt_len = len(tokenizer(prompt)["input_ids"])  # includes bos if present

    # Mask out the prompt tokens from labels
    labels[:prompt_len] = [-100] * prompt_len
    tokens["labels"] = labels
    return tokens


tokenized_dataset = dataset.map(tokenize, batched=False)

tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])


Map:   0%|          | 0/340 [00:00<?, ? examples/s]

In [6]:
!pip install -U bitsandbytes




In [36]:
from transformers import AutoModelForCausalLM
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

model_name = "meta-llama/Meta-Llama-3-8B"


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,             # This enables QLoRA
    bnb_4bit_quant_type="nf4",     # This is the quant type you want (for Llama)
    bnb_4bit_use_double_quant=True,  # Optional, improves some quantization
    bnb_4bit_compute_dtype="bfloat16"  # Use "bfloat16" or "float16" for Ampere GPUs
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=device_map,
    quantization_config=bnb_config,   # Only this, no load_in_4bit
    torch_dtype="auto",              # optional, you can specify bfloat16/float16 if you want
)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
!pip install peft

Collecting peft
  Using cached peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Using cached peft-0.15.2-py3-none-any.whl (411 kB)
Installing collected packages: peft
Successfully installed peft-0.15.2


In [37]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,             # LoRA rank (memory/performance tradeoff, 8-16 is common)
    lora_alpha=16,   # Scaling (double r is default)
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
model.print_trainable_parameters()  # sanity check


trainable params: 3,407,872 || all params: 8,033,669,120 || trainable%: 0.0424


In [5]:
!pip install tf-keras

Collecting tf-keras
  Using cached tf_keras-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Using cached tf_keras-2.19.0-py3-none-any.whl (1.7 MB)
Installing collected packages: tf-keras
Successfully installed tf-keras-2.19.0


In [48]:
print(tokenized_dataset[0])


{'input_ids': [128000, 4110, 264, 9130, 315, 42986, 18824, 2728, 279, 2768, 3649, 512, 17046, 25, 5473, 12393, 1232, 364, 13655, 518, 364, 12393, 5796, 8438, 1232, 14226, 8011, 518, 364, 12393, 1311, 4469, 1232, 364, 18, 518, 364, 12393, 1311, 4469, 5796, 8438, 1232, 14226, 15, 13, 20, 518, 364, 43648, 1232, 364, 1987, 518, 364, 43648, 5796, 8438, 1232, 14226, 19, 13, 23, 518, 364, 21208, 41995, 1232, 364, 1399, 518, 364, 21208, 41995, 5796, 8438, 1232, 14226, 20, 518, 364, 38551, 5023, 380, 1232, 364, 843, 518, 364, 38551, 5023, 380, 5796, 8438, 1232, 14226, 17, 13, 2304, 518, 364, 3479, 17374, 1232, 364, 12901, 518, 364, 21208, 9897, 1232, 364, 10005, 518, 364, 21208, 17374, 5796, 8438, 1232, 14226, 17, 13, 20, 39303, 364, 9416, 1232, 364, 3561, 268, 518, 364, 9416, 1311, 4469, 1232, 364, 13655, 489, 8011, 518, 364, 9416, 1311, 4469, 5796, 8438, 1232, 76452, 35003, 5144, 25, 16290, 1347, 5248, 800, 685, 11, 578, 12538, 258, 37080, 11, 16290, 1347, 5248, 800, 685, 11, 24218, 42743, 37

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)


In [35]:
import torch
device_map = {"": torch.cuda.current_device()} 
print(device_map)

{'': 0}


In [49]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,  # Adjust based on VRAM
    num_train_epochs=1,
    save_steps=100,
    logging_steps=10,
    fp16=True,  # If on Ampere/NVIDIA RTX (which 4060 is)
    report_to="none",
    remove_unused_columns=False
)


In [53]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    
    data_collator=data_collator,
    
)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [54]:
trainer.train()


Step,Training Loss
10,1.7103
20,1.6664
30,1.7553
40,1.7036
50,1.4661
60,1.5702
70,1.5029
80,1.505
90,1.2816
100,1.3844


TrainOutput(global_step=340, training_loss=1.2481605277341954, metrics={'train_runtime': 19841.2191, 'train_samples_per_second': 0.017, 'train_steps_per_second': 0.017, 'total_flos': 1.568460635111424e+16, 'train_loss': 1.2481605277341954, 'epoch': 1.0})

In [43]:
print(tokenized_dataset[0].keys())


dict_keys(['input_ids', 'attention_mask', 'labels'])


In [60]:

trainer.save_model("./results/checkpoint-final")
tokenizer.save_pretrained("./results/checkpoint-final")
trainer.save_state()  # if you want to resume optimizer/scheduler
