In [None]:
%pip install -U triton
%pip install unsloth

In [None]:
import torch

major_version, minor_version = torch.cuda.get_device_capability()

%pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    %pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    %pip install --no-deps xformers trl peft accelerate bitsandbytes

# QLoRA

In [None]:
from transformers import BitsAndBytesConfig
from unsloth import FastLanguageModel
import torch

cache_dir='./cache'

model_name = 'Saxo/Linkbricks-Horizon-AI-Korean-Advanced-8B'

nf4_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_use_double_quant=True,
  bnb_4bit_compute_dtype=torch.bfloat16
)

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    cache_dir=cache_dir,
    quantization_config=nf4_config,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r= 8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

# test

In [None]:
from datasets import load_dataset

alpaca_prompt = "<|im_start|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Input:\n{}\n\n<|im_end|><|im_start|>### Response:{}\n<|im_end|>"

def preprocess_function(examples):
    """데이터 전처리 함수"""
    return {
        'text': [
            alpaca_prompt.format(instruction, content, output)
            for instruction, content, output in zip(examples['instruction'], examples['content'], examples['output'])
        ]
    }

data_files = {'train': './dataset.jsonl'}
raw_datasets = load_dataset('json', data_files=data_files)
split_datasets = raw_datasets['train'].train_test_split(test_size=0.05, seed=42)

train_dataset = split_datasets['train'].map(
    preprocess_function, 
    batched=True,
    remove_columns=split_datasets['train'].column_names
)
eval_dataset = split_datasets['test'].map(
    preprocess_function,
    batched=True,
    remove_columns=split_datasets['test'].column_names
)

print(f"Number of items in raw dataset: {len(raw_datasets['train'])}")
print(f"Train dataset size: {len(train_dataset)}")
print(f"Eval dataset size: {len(eval_dataset)}")

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    args=TrainingArguments(
        per_device_train_batch_size=16,
        gradient_accumulation_steps=2,
        num_train_epochs=5,
        eval_strategy="epoch",
        logging_strategy="epoch",
        logging_steps=5,
        learning_rate=1e-6,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        optim="adamw_8bit",
        lr_scheduler_type="cosine",
        weight_decay=0.01,
        warmup_steps=100,
        seed=42,
        output_dir="outputs",
        report_to="none",
    ),
)

In [None]:
torch.cuda.empty_cache()
model.gradient_checkpointing_enable()

trainer_stats = trainer.train()

In [None]:
from transformers import TextStreamer

FastLanguageModel.for_inference(model)

alpaca_prompt = "<|im_start|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Input:\n{}\n\n<|im_end|><|im_start|>### Response:{}\n<|im_end|>"

inputs = tokenizer(
    [
        alpaca_prompt.format(
            "아래의 질문에 적절한 답을 해줘",
            "",
            "",  # output
        )
    ],
    return_tensors="pt",
).to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=750,
    temperature=0.8,
    eos_token_id=tokenizer.eos_token_id,
)
