In [None]:
!pip install -q peft bitsandbytes transformers trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m544.8/544.8 kB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
system_prompt=""
dataset = load_dataset("wiki_qa", split="train")
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/594k [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/264k [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/2.00M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/6165 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2733 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/20360 [00:00<?, ? examples/s]

Dataset({
    features: ['question_id', 'question', 'document_title', 'answer', 'label'],
    num_rows: 20360
})

In [None]:
df=dataset.to_pandas()

In [None]:
df=df[0:1000:]
df=df.drop(columns=['question_id','document_title','label'])

In [None]:
df['text'] = df.apply(lambda row: f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>{row['question']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>{row['answer']}<|eot_id|>""", axis=1)

In [None]:
df=df.drop(columns=['question','answer'])

In [None]:
eval_df = df.sample(frac=0.1, random_state=42)
remaining_df = df.drop(eval_df.index)

In [None]:
from datasets import Dataset
train_dataset = Dataset.from_pandas(remaining_df)

In [None]:
eval_dataset=Dataset.from_pandas(eval_df)
train_dataset
eval_dataset

Dataset({
    features: ['text', '__index_level_0__'],
    num_rows: 100
})

In [None]:
MODEL_NAME="unsloth/Llama-3.2-3B-Instruct-bnb-4bit"

def create_model_and_tokenizer():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        use_safetensors=True,
        quantization_config=bnb_config,
        trust_remote_code=True,
        device_map="auto",
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return model, tokenizer

In [None]:
model, tokenizer = create_model_and_tokenizer()
model.config.use_cache = False



In [None]:
for name, module in model.named_modules():
    print(name, module.__class__.__name__)

 LlamaForCausalLM
model LlamaModel
model.embed_tokens Embedding
model.layers ModuleList
model.layers.0 LlamaDecoderLayer
model.layers.0.self_attn LlamaAttention
model.layers.0.self_attn.q_proj Linear4bit
model.layers.0.self_attn.k_proj Linear4bit
model.layers.0.self_attn.v_proj Linear4bit
model.layers.0.self_attn.o_proj Linear4bit
model.layers.0.mlp LlamaMLP
model.layers.0.mlp.gate_proj Linear4bit
model.layers.0.mlp.up_proj Linear4bit
model.layers.0.mlp.down_proj Linear4bit
model.layers.0.mlp.act_fn SiLU
model.layers.0.input_layernorm LlamaRMSNorm
model.layers.0.post_attention_layernorm LlamaRMSNorm
model.layers.1 LlamaDecoderLayer
model.layers.1.self_attn LlamaAttention
model.layers.1.self_attn.q_proj Linear4bit
model.layers.1.self_attn.k_proj Linear4bit
model.layers.1.self_attn.v_proj Linear4bit
model.layers.1.self_attn.o_proj Linear4bit
model.layers.1.mlp LlamaMLP
model.layers.1.mlp.gate_proj Linear4bit
model.layers.1.mlp.up_proj Linear4bit
model.layers.1.mlp.down_proj Linear4bit
mo

LoRA Settings

In [None]:
lora_r = 16
lora_alpha = 64
lora_dropout = 0.1


peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    #["q_proj", "k_proj", "v_proj", "o_proj"]
    target_modules=["q_proj", "k_proj"],
    bias="none",
    task_type="CAUSAL_LM",
    use_dora=True
)

In [None]:

output_dir = './results'

training_arguments = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=output_dir,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    args=training_arguments,
)




torch.cuda.empty_cache()



Adding EOS to train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
train_result = trainer.train()

Step,Training Loss
1,3.3585
2,4.4763
3,4.0994
4,4.3492
5,4.3632
6,4.2795
7,4.5514
8,4.7219
9,4.8372
10,4.5308
