# Part 1: DeBERTa + LoRA Fine-Tuning and Saving

This notebook fine-tunes a DeBERTa-small model using LoRA and saves the resulting adapter model weights.

## 1. Install Libraries

In [8]:
#quantization용 라이브러리인데 quantization 적용을 실패해서 사용하지 않습니다..
#!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


## 2. Load and Prepare Data

In [1]:
import pandas as pd
import numpy as np
import torch
from datasets import load_dataset, ClassLabel, Value
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig
from transformers import BitsAndBytesConfig

# 1. [변경] 'datasets'로 CSV 로드 (RAM에 올리지 않음)
path = './dataset/'
raw_dataset = load_dataset('csv', data_files=path + 'train.csv')

# 2. [변경] 전처리 함수 정의 (데이터 생성과 토큰화를 한 번에)
model_name = 'microsoft/deberta-v3-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_function(examples):
    # 레이블 생성 (handle batched input)
    conditions = [(np.array(examples['winner_model_a']) == 1), (np.array(examples['winner_model_b']) == 1), (np.array(examples['winner_tie']) == 1)]
    choices = [0, 1, 2]
    examples['labels'] = np.select(conditions, choices, default=-1).tolist()

    # 텍스트 생성 (handle batched input)
    examples['text'] = [("prompt: " + prompt +
                        "\n\nresponse_a: " + response_a +
                        "\n\nresponse_b: " + response_b) for prompt, response_a, response_b in zip(examples['prompt'], examples['response_a'], examples['response_b'])]

    # 토큰화 (여기서 max_length가 중요)
    tokenized_inputs = tokenizer(examples['text'], truncation=True, padding="max_length", max_length=512)

    # 토okenization results are already batched, just add the 'labels'
    tokenized_inputs['labels'] = examples['labels']

    # Return only necessary columns
    return tokenized_inputs


# 3. [변경] .map()으로 전처리 적용 (배치로 처리되어 빠르고 RAM 절약)
# batched=True가 핵심입니다.
tokenized_dataset = raw_dataset.map(preprocess_function, batched=True)

# 4. [변경] 불필요한 원본 컬럼 제거 (필수!)
# 'label' -> 'labels'로 이름이 바뀌었으므로 원본 'label'도 제거
tokenized_dataset = tokenized_dataset['train'].remove_columns([
    'prompt', 'response_a', 'response_b', 'text',
    'winner_model_a', 'winner_model_b', 'winner_tie' # 'label' is not in the original csv columns
])

# 5. [변경] 유효하지 않은 레이블(-1) 필터링
tokenized_dataset = tokenized_dataset.filter(lambda example: example['labels'] != -1)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [2]:
from datasets import load_dataset, ClassLabel, Value

# Cast 'labels' column to ClassLabel for stratification
tokenized_dataset = tokenized_dataset.cast_column("labels", ClassLabel(num_classes=3))

# 6. [변경] 데이터셋 분할
final_datasets = tokenized_dataset.train_test_split(test_size=0.2, stratify_by_column="labels")

# (참고) 이제 더 이상 수동으로 PreferenceDataset을 만들 필요가 없습니다!
train_dataset = final_datasets["train"]
val_dataset = final_datasets["test"]

## 4. LoRA Fine-Tuning

In [15]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig
import torch

model_name = 'microsoft/deberta-v3-small'

# Remove 8-bit quantization configuration for testing
# bnb_config = BitsAndBytesConfig(
#     load_in_8bit=True,
#     bnb_8bit_quant_type="nf4",  # You can experiment with "nf4" or "fp4"
#     bnb_8bit_compute_dtype=torch.bfloat16, # Or torch.float16, depending on your GPU
#     bnb_8bit_use_double_quant=True, # Optional: use double quantization
# )


model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    # Remove quantization_config argument
    # quantization_config=bnb_config, # Use the quantization_config argument
    device_map='auto'
    )

lora_config = LoraConfig(
    r=4, lora_alpha=32, target_modules=['query_proj', 'value_proj'],
    lora_dropout=0.05, bias='none', task_type="SEQ_CLS"
    )

lora_model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir='./results_lora', num_train_epochs=1, per_device_train_batch_size=16, # Reduced batch size
    fp16=True,gradient_accumulation_steps=1, # Add gradient accumulation
    per_device_eval_batch_size=8, warmup_steps=500, weight_decay=0.01,
    logging_dir='./logs_lora', eval_strategy="steps", eval_steps=500, # Changed evaluation_strategy to eval_strategy
    save_steps=500, load_best_model_at_end=True,report_to="none"
    )

trainer = Trainer(model=lora_model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset)

trainer.train()

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model is already on multiple devices. Skipping the move to device specified in `args`.


Step,Training Loss,Validation Loss
500,1.1028,1.097234
1000,1.0991,1.091948
1500,1.0912,1.087873


Step,Training Loss,Validation Loss
500,1.1028,1.097234
1000,1.0991,1.091948
1500,1.0912,1.087873
2000,1.0906,1.087077
2500,1.0917,1.085878


TrainOutput(global_step=2874, training_loss=1.0943977711676887, metrics={'train_runtime': 1885.2869, 'train_samples_per_second': 24.389, 'train_steps_per_second': 1.524, 'total_flos': 6102049267132416.0, 'train_loss': 1.0943977711676887, 'epoch': 1.0})

## 5. Save LoRA Model Adapters

In [16]:
model_save_path = 'deberta_lora_model'
lora_model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path) # Save tokenizer with the adapters

print(f"LoRA model adapters saved to {model_save_path}")

LoRA model adapters saved to deberta_lora_model
