In [None]:
import os
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, PrefixTuningConfig, TaskType

device = "cuda"
model = "Qwen/Qwen2.5-1.5B"

In [None]:
def count_active_params(model) -> int:
    """Number of trainable parameters (requires_grad=True)."""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def count_total_params(model) -> int:
    """Total number of parameters."""
    return sum(p.numel() for p in model.parameters())

def print_active(model, label: str):
    active = count_active_params(model)
    total = count_total_params(model)
    pct = 100.0 * active / total if total > 0 else 0.0
    print(f"{label}: active={active:,}  (of total={total:,}, {pct:.4f}%)")


def load_base_model(model_name: str, trainable: bool, device: str):
    """
    Loads the base model in standard precision.
    Note: For 7B, full fine-tuning may not fit on small GPUs.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, device=device)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
    )

    # If not trainable, freeze everything (useful for PEFT base)
    if not trainable:
        for p in model.parameters():
            p.requires_grad = False

    model.config.use_cache = False
    return tokenizer, model

In [None]:
_, model_full = load_base_model(model, trainable=True, device=device)
print_active(model_full, "FULL fine-tuning")

In [None]:
tokenizer_prefix, model_prefix_base = load_base_model(model, trainable=False, device=device)

prefix_cfg = PrefixTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    num_virtual_tokens=20,   # typical: 10-50
)
model_prefix = get_peft_model(model_prefix_base, prefix_cfg)
print_active(model_prefix, "PREFIX-tuning")


In [None]:
tokenizer_lora, model_lora_base = load_base_model(model, trainable=False, device=device)

# Typical LoRA targets for LLaMA/Mistral-style architectures:
# If your model uses different module names, change target_modules acco rdingly.
lora_cfg = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
)

model_lora = get_peft_model(model_lora_base, lora_cfg)
print_active(model_lora, "LoRA (r=8)")

In [None]:
def set_train_last_k_layers(model, k: int, train_lm_head: bool = True):
    """
    Makes trainable only parameters belonging to the last k transformer layers.
    """
    # Freeze everything first
    for p in model.parameters():
        p.requires_grad = False

    # Try to find common layer containers
    candidates = []
    if hasattr(model, "model") and hasattr(model.model, "layers"):
        candidates.append(("model.model.layers", model.model.layers))
    else:
        raise NotImplementedError("Model architecture not supported for last-k layer freezing.")
    
    if candidates:
        _, layers = candidates[0]
        n = len(layers)
        start = max(0, n - k)
        for i in range(start, n):
            for p in layers[i].parameters():
                p.requires_grad = True
    else:
        # Fallback: try name-based matching for last-k by scanning layer indices
        # This is less robust but often works.
        # We detect max layer index in names like "layers.31." or "h.31." etc.
        layer_idx = []
        for name, _ in model.named_parameters():
            m = re.search(r"(layers|h)\.(\d+)\.", name)
            if m:
                layer_idx.append(int(m.group(2)))
        if layer_idx:
            max_i = max(layer_idx)
            start_i = max(0, max_i - (k - 1))
            for name, p in model.named_parameters():
                m = re.search(r"(layers|h)\.(\d+)\.", name)
                if m and int(m.group(2)) >= start_i:
                    p.requires_grad = True

    # Optionally train LM head
    if train_lm_head and hasattr(model, "lm_head"):
        for p in model.lm_head.parameters():
            p.requires_grad = True

LAST_K = 1
TRAIN_LM_HEAD = True
tokenizer_freeze, model_freeze = load_base_model(model_name=model, trainable=False, device=device)
set_train_last_k_layers(model_freeze, LAST_K, TRAIN_LM_HEAD)
print_active(model_freeze, f"Layer freezing (last {LAST_K} layers" + (", + lm_head" if TRAIN_LM_HEAD else "") + ")")