<a href="https://colab.research.google.com/github/SCCSMARTCODE/Deep-Learning-03-LLM-FineTuning/blob/main/fine_tuning_llama2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Finetuning Learning

### Application Dependency Importation

In [9]:
!pip uninstall transformers peft trl bitsandbytes accelerate -y
!pip cache purge

Found existing installation: transformers 4.51.3
Uninstalling transformers-4.51.3:
  Successfully uninstalled transformers-4.51.3
Found existing installation: peft 0.15.2
Uninstalling peft-0.15.2:
  Successfully uninstalled peft-0.15.2
Found existing installation: trl 0.17.0
Uninstalling trl-0.17.0:
  Successfully uninstalled trl-0.17.0
Found existing installation: bitsandbytes 0.45.5
Uninstalling bitsandbytes-0.45.5:
  Successfully uninstalled bitsandbytes-0.45.5
Found existing installation: accelerate 1.7.0
Uninstalling accelerate-1.7.0:
  Successfully uninstalled accelerate-1.7.0
Files removed: 94


In [10]:
!pip install --upgrade pip

!pip install transformers peft bitsandbytes accelerate trl safetensors



Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.1.1
Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting accelerate
  Downloading accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [11]:
import os
import torch
from datasets import load_dataset
from dataclasses import dataclass, field
from typing import Optional

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# PEFT Settings

In [12]:

@dataclass
class ModelSettings:
    model_name: str = "NousResearch/Llama-2-7b-chat-hf"
    dataset_name: str = "mlabonne/guanaco-llama2-1k"
    new_model: str = "Llama-2-7b-chat-finetune"

@dataclass
class QLoRASettings:
    lora_r: int = 4
    lora_alpha: int = 16
    lora_dropout: float = 0.1

@dataclass
class BitsAndBytesSettings:
    load_in_4bit: bool = True
    bnb_4bit_quant_type: str = "nf4"
    bnb_4bit_compute_dtype: str = "float16"
    bnb_4bit_use_double_quant: bool = False

@dataclass
class TrainingSettings:
    output_dir: str = "./results"
    num_train_epochs: int = 1
    fp16: bool = False
    bf16: bool = False
    per_device_train_batch_size: int = 4
    per_device_eval_batch_size: int = 4
    gradient_accumulation_steps: int = 1
    gradient_checkpointing: bool = True
    max_grad_norm: float = 0.3
    learning_rate: float = 2e-4
    weight_decay: float = 0.001
    optim = "paged_adamw_32bit"
    lr_scheduler_type: str = "cosine"
    max_steps = -1
    warmup_ratio: float = 0.03
    group_by_length: bool = True
    save_steps: int = 0
    logging_steps: int = 25

@dataclass
class SFTParameters:
    max_seq_length: Optional[int] = None
    packing: bool = False
    device_map: dict = field(default_factory=lambda: {"": 0})

class FinetuningConfiguration:
    def __init__(self,
                 model_settings: ModelSettings = None,
                 qlora_settings: QLoRASettings = None,
                 bnb_settings: BitsAndBytesSettings = None,
                 training_settings: TrainingSettings = None,
                 sft_params: SFTParameters = None):

        self.model_settings = model_settings or ModelSettings()
        self.qlora_settings = qlora_settings or QLoRASettings()
        self.bnb_settings = bnb_settings or BitsAndBytesSettings()
        self.training_settings = training_settings or TrainingSettings()
        self.sft_params = sft_params or SFTParameters()

# Loading Other Requirement

In [13]:
settings = FinetuningConfiguration()

dataset = load_dataset(settings.model_settings.dataset_name, split="train")

compute_dtype = getattr(torch, settings.bnb_settings.bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=settings.bnb_settings.load_in_4bit,
    bnb_4bit_quant_type=settings.bnb_settings.bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=settings.bnb_settings.bnb_4bit_use_double_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and settings.bnb_settings.load_in_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8 and settings.training_settings.bf16:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

model = AutoModelForCausalLM.from_pretrained(
    settings.model_settings.model_name,
    quantization_config=bnb_config,
    device_map=settings.sft_params.device_map,
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(settings.model_settings.model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=settings.qlora_settings.lora_alpha,
    lora_dropout=settings.qlora_settings.lora_dropout,
    r=settings.qlora_settings.lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


(…)-00000-of-00001-9ad84bb9cf65a42f.parquet:   0%|          | 0.00/967k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

# Training

In [14]:
# # Instantiate settings
# model_settings = ModelSettings()
# qlora_settings = QLoRASettings()
# training_settings = TrainingSettings()

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=settings.training_settings.output_dir,
    num_train_epochs=settings.training_settings.num_train_epochs,
    per_device_train_batch_size=settings.training_settings.per_device_train_batch_size,
    gradient_accumulation_steps=settings.training_settings.gradient_accumulation_steps,
    optim=settings.training_settings.optim,
    save_steps=settings.training_settings.save_steps,
    logging_steps=settings.training_settings.logging_steps,
    learning_rate=settings.training_settings.learning_rate,
    weight_decay=settings.training_settings.weight_decay,
    fp16=settings.training_settings.fp16,
    bf16=settings.training_settings.bf16,
    max_grad_norm=settings.training_settings.max_grad_norm,
    max_steps=settings.training_settings.max_steps,
    warmup_ratio=settings.training_settings.warmup_ratio,
    group_by_length=settings.training_settings.group_by_length,
    lr_scheduler_type=settings.training_settings.lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    args=training_arguments,
)

Converting train dataset to ChatML:   0%|          | 0/1000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [16]:
# Train model
trainer.train()