# ▶️ Configure GPUs

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

# ▶️ Load and Preprocess DBPedia dataset

In [2]:
# Import modules for LLM finetuning and evaluation
import finetune as ft
import evaluate as ev

In [3]:
from datasets import load_dataset

raw_dataset = load_dataset("fancyzhx/dbpedia_14")

In [4]:
# Sample 10% of the dataset
dataset = ft.sample_dataset(raw_dataset, labels_column="label", ratio=0.1, shuffle=False)

In [5]:
# Preprocess dataset for fine-tuning
dataset, label_names = ft.preprocess_dataset(dataset, text_column="content", labels_column="label")

# ▶️ Load Baseline LLM

NOTE: I want to refactor this into one function -> ``ft.load_model(name, device_map, quantized)``

I think it would be a bit nicer.

In [6]:
MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
MODEL_DEVICE = "cuda:0"
QUANTIZED = True # Load model with 4-bit quantization

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# Same quantization configuration as QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_use_double_quant = True,
    bnb_4bit_compute_dtype = torch.float16
) if QUANTIZED else None

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map=MODEL_DEVICE,
    use_cache=False # use_cache is incompatible with gradient checkpointing
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# ▶️ Finetune LLM

In [8]:
FINETUNED_LLM_NAME = "Qwen2.5-FT-DBPedia"

LORA_RANK_DIMENSION = 6 # the rank of the adapter, the lower the fewer parameters you'll need to train. (smaller = more compression)
LORA_ALPHA = 8 # this is the scaling factor for LoRA layers (higher = stronger adaptation)
LORA_DROPOUT = 0.05 # dropout probability for LoRA layers (helps prevent overfitting)
MAX_SEQ_LENGTH = 64
EPOCHS=1
LEARNING_RATE=2e-4

In [9]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=LORA_RANK_DIMENSION,
    lora_alpha=LORA_ALPHA,
    bias="none",
    lora_dropout=LORA_DROPOUT,
    task_type="CAUSAL_LM"
)

In [10]:
from trl import SFTConfig, SFTTrainer

sft_config = SFTConfig(
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={'use_reentrant': False},
    gradient_accumulation_steps=1,
    per_device_train_batch_size=16,
    auto_find_batch_size=True,
    
    max_seq_length=MAX_SEQ_LENGTH,
    packing=True,
    
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    optim='adamw_torch_fused',
    warmup_ratio=0.03,
    lr_scheduler_type="constant", 
    
    logging_steps=10,
    logging_dir='./logs',
    output_dir=FINETUNED_LLM_NAME,
    report_to='none'
)

In [None]:
ft.finetune( # Will save the model to the directory: FINETUNED_LLM_NAME
    model=model, tokenizer=tokenizer,
    train_dataset=dataset['train'],
    lora_config=lora_config, sft_config=sft_config
)

# ▶️ Load Finetuned LLM

In [12]:
# Unload the baseline model if it exists, otherwise we will probably get an OOM exception
import gc, torch

if "bnb_config" in locals(): del bnb_config
if "tokenizer" in locals(): del tokenizer
if "model" in locals(): del model
gc.collect()
torch.cuda.empty_cache()

In [None]:
FINETUNED_LLM_NAME = "Qwen2.5-FT-DBPedia"
MODEL_DEVICE = "cuda:0"
QUANTIZED = True # Load model with 4-bit quantization

model, tokenizer = ft.load_finetuned_llm(FINETUNED_LLM_NAME, MODEL_DEVICE, QUANTIZED)

# ▶️ Evaluate LLM

In [14]:
import model_prompts as prompts
from evaluate import EvaluationConfig

In [15]:
# Configurations for the baseline LLM
baseline_configurations = [
    EvaluationConfig(
        name="Zero-shot",
        prompt=prompts.PROMPT_ZEROSHOT,
        max_tokens=10
    ),
    EvaluationConfig(
        name="Chain-of-Thought",
        prompt=prompts.PROMPT_COT,
        max_tokens=100
    ),
    EvaluationConfig(
        name="Meta Prompt",
        prompt=prompts.PROMPT_META,
        max_tokens=100
    ),
    EvaluationConfig(
        name="2-Shot CoT",
        prompt=prompts.PROMPT_COT_2SHOT,
        max_tokens=100
    ),
    EvaluationConfig(
        name="4-Shot CoT",
        prompt=prompts.PROMPT_COT_4SHOT,
        max_tokens=100
    )]

# Configurations for the finetuned LLM
finetuned_configurations = [
    EvaluationConfig(
        name="Fine-tuned",
        prompt=None,
        max_tokens=1
    )
]

In [16]:
from peft import PeftModelForCausalLM

is_finetuned = type(model) is PeftModelForCausalLM
configurations = finetuned_configurations if is_finetuned else baseline_configurations

In [None]:
results = []
for config in configurations:
    result = ev.evaluate(
        model=model, tokenizer=tokenizer, label_names=label_names,
        eval_dataset=dataset['test'], eval_config=config
    )
    
    results.append(result)
    result.save() # Saves to "output/<EvaluationConfig.name>"