# FineTune LLM - BitFit

In [1]:
"""
Self-contained BitFit (bias vectors) fine-tuning demo on TinyLlama-1.1B-Chat.
"""
import os
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    BitsAndBytesConfig
)
from peft import BiasConfig, get_peft_model, prepare_model_for_kbit_training

################################################################################
# 0. Environment & constants
################################################################################
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
USE_8BIT = True                 # flip to False for full-precision CPU run
HF_PUSH_REPO = "YOUR_USERNAME/tinyllama-lora-demo"   # change to your org/repo
MAX_SEQ_LEN = 256

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on {device}. 8-bit: {USE_8BIT}")

ImportError: cannot import name 'BiasConfig' from 'peft' (c:\Users\henri\Desktop\NTNU\5.Året\Annet\Prosjekter\ml-pipeline-sweeper\venv\Lib\site-packages\peft\__init__.py)

In [None]:
class BitFitFineTuner:
    """
    Wraps simple BitFit (bias vectors) fine-tuning behind an easy interface so it mirrors
    DistillationFineTuner.
    """
    def __init__(
        self,
        base_model_id: str,
        train_dataset,
        eval_dataset,
        rank: int = 16,
        alpha: int = 32,
        dropout: float = 0.05,
        **training_args_kwargs
    ):
        self.tokenizer = AutoTokenizer.from_pretrained(base_model_id)
        self.tokenizer.pad_token = self.tokenizer.eos_token

        bnb_cfg = BitsAndBytesConfig(
            load_in_8bit=True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            base_model_id, quantization_config=bnb_cfg, device_map="auto"
        )
        model = prepare_model_for_kbit_training(model)

        bias_cfg = BiasConfig(  # Different from LoRA
            bias_type="all",          # train every bias term
            task_type="CAUSAL_LM"
        )
        self.model = get_peft_model(model, bias_cfg)    # Different from LoRA

        args = TrainingArguments(**training_args_kwargs)
        self.trainer = Trainer(
            model=self.model,
            args=args,
            data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False),
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
        )

    def train(self):
        return self.trainer.train()

    @torch.no_grad()
    def generate(self, prompt: str, max_new_tokens: int = 40):
        device = next(self.model.parameters()).device
        ids = self.tokenizer(prompt, return_tensors="pt").to(device)
        out = self.model.generate(
            **ids,
            max_new_tokens=max_new_tokens,
            pad_token_id=self.tokenizer.eos_token_id
        )
        return self.tokenizer.decode(out[0], skip_special_tokens=True)

    def push_to_hub(self, repo_id: str):
        self.model.push_to_hub(repo_id, use_auth_token=True)
        self.tokenizer.push_to_hub(repo_id, use_auth_token=True)

In [None]:
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

# --- Toy dataset --- #
examples = [
    {"text": "### User:\nTranslate 'Good morning' to Spanish.\n### Assistant:\n"},
    {"text": "### User:\nSummarise: 'The cat sat on the mat.'\n### Assistant:\n"},
    {"text": "### User:\nList three primary colours.\n### Assistant:\n"},
    {"text": "### User:\nWhat is 2 + 2?\n### Assistant:\n"},
    {"text": "### User:\nRewrite 'I like apples' in the past tense.\n### Assistant:\n"},
]
def tok_fn(e): return tokenizer(
    e["text"], max_length=256, truncation=True, padding="max_length"
)
ds = Dataset.from_list(examples).train_test_split(test_size=0.4, seed=0)
ds_tok = ds.map(tok_fn, remove_columns=["text"])

In [None]:
bitfit_ft = BitFitFineTuner(
    base_model_id=MODEL_ID,
    train_dataset=ds_tok["train"],
    eval_dataset=ds_tok["test"],
    output_dir="./bitfit_out",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    learning_rate=2e-4,
    logging_steps=1,
    optim="paged_adamw_8bit",
    report_to=[],
)
bitfit_ft.train()
print(bitfit_ft.generate("### User:\nWhat is 2 + 2?\n### Assistant:\n"))