# Part 1: DeBERTa + LoRA Fine-Tuning and Saving

This notebook fine-tunes a DeBERTa-small model using LoRA and saves the resulting adapter model weights.

## 2. Load and Prepare Data

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import pandas as pd
import numpy as np
import torch
from datasets import load_dataset, ClassLabel, Value
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig
from transformers import BitsAndBytesConfig

# 1. [변경] 'datasets'로 CSV 로드 (RAM에 올리지 않음)
path = '../dataset/'
raw_dataset = load_dataset('csv', data_files=path + 'train.csv')

# 2. 이 코드에서 안넘어감..
model_name = 'microsoft/deberta-v3-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)

print('!@#%$%^&!@#$')

def preprocess_function(examples):
    # 레이블 생성 (handle batched input)
    conditions = [(np.array(examples['winner_model_a']) == 1), (np.array(examples['winner_model_b']) == 1), (np.array(examples['winner_tie']) == 1)]
    choices = [0, 1, 2]
    examples['labels'] = np.select(conditions, choices, default=-1).tolist()

    # 텍스트 생성 (handle batched input)
    examples['text'] = [("prompt: " + prompt +
                        "\n\nresponse_a: " + response_a +
                        "\n\nresponse_b: " + response_b) for prompt, response_a, response_b in zip(examples['prompt'], examples['response_a'], examples['response_b'])]

    # 토큰화 (여기서 max_length가 중요)
    tokenized_inputs = tokenizer(examples['text'], truncation=True, padding="max_length", max_length=512)

    # 토okenization results are already batched, just add the 'labels'
    tokenized_inputs['labels'] = examples['labels']

    # Return only necessary columns
    return tokenized_inputs


# 3. [변경] .map()으로 전처리 적용 (배치로 처리되어 빠르고 RAM 절약)
# batched=True가 핵심입니다.
tokenized_dataset = raw_dataset.map(preprocess_function, batched=True)

# 4. [변경] 불필요한 원본 컬럼 제거 (필수!)
# 'label' -> 'labels'로 이름이 바뀌었으므로 원본 'label'도 제거
tokenized_dataset = tokenized_dataset['train'].remove_columns([
    'prompt', 'response_a', 'response_b', 'text',
    'winner_model_a', 'winner_model_b', 'winner_tie' # 'label' is not in the original csv columns
])

# 5. [변경] 유효하지 않은 레이블(-1) 필터링
tokenized_dataset = tokenized_dataset.filter(lambda example: example['labels'] != -1)




!@#%$%^&!@#$


Map: 100%|██████████| 57477/57477 [01:59<00:00, 479.66 examples/s]
Filter: 100%|██████████| 57477/57477 [01:16<00:00, 755.35 examples/s]


In [7]:
from datasets import load_dataset, ClassLabel, Value

# Cast 'labels' column to ClassLabel for stratification
tokenized_dataset = tokenized_dataset.cast_column("labels", ClassLabel(num_classes=3))

# 6. [변경] 데이터셋 분할
final_datasets = tokenized_dataset.train_test_split(test_size=0.2, stratify_by_column="labels")

# (참고) 이제 더 이상 수동으로 PreferenceDataset을 만들 필요가 없습니다!
train_dataset = final_datasets["train"]
val_dataset = final_datasets["test"]

Casting the dataset: 100%|██████████| 57477/57477 [00:00<00:00, 85326.55 examples/s]


## 3. Tokenization

## 4. LoRA Fine-Tuning

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig
import torch

model_name = 'microsoft/deberta-v3-small'

# Remove 8-bit quantization configuration for testing
# bnb_config = BitsAndBytesConfig(
#     load_in_8bit=True,
#     bnb_8bit_quant_type="nf4",  # You can experiment with "nf4" or "fp4"
#     bnb_8bit_compute_dtype=torch.bfloat16, # Or torch.float16, depending on your GPU
#     bnb_8bit_use_double_quant=True, # Optional: use double quantization
# )


model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    # Remove quantization_config argument
    # quantization_config=bnb_config, # Use the quantization_config argument
    device_map='auto'
    )

lora_config = LoraConfig(
    r=16, lora_alpha=32, target_modules=['query_proj', 'value_proj'],
    lora_dropout=0.05, bias='none', task_type="SEQ_CLS"
    )

lora_model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir='./results_lora', num_train_epochs=1, per_device_train_batch_size=4, # Reduced batch size
    fp16=True,gradient_accumulation_steps=1, # Add gradient accumulation
    per_device_eval_batch_size=4, warmup_steps=500, weight_decay=0.01,
    logging_dir='./logs_lora', eval_strategy="steps", eval_steps=5000, # Changed evaluation_strategy to eval_strategy
    save_steps=5000, load_best_model_at_end=True,report_to="none"
    )

trainer = Trainer(model=lora_model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset)

trainer.train()

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model is already on multiple devices. Skipping the move to device specified in `args`.


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

## 5. Save LoRA Model Adapters

In [None]:
model_save_path = 'deberta_lora_model'
lora_model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path) # Save tokenizer with the adapters

print(f"LoRA model adapters saved to {model_save_path}")

LoRA model adapters saved to deberta_lora_model


In [8]:
import torch
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay,
)
import matplotlib.pyplot as plt
import numpy as np

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import PeftModel
from datasets import Dataset  # Import Dataset

model_path = "deberta_lora_model"

# Load the base model first
base_model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-v3-small", num_labels=3
)

# Load the LoRA adapters on top of the base model
model = PeftModel.from_pretrained(base_model, model_path)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

model.eval()  # Set model to evaluation mode

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # Move model to the appropriate device


def get_predictions(model, dataset: Dataset):
    """
    Generates predictions for a given model on a dataset.

    Args:
        model: The trained model (including LoRA adapters).
        dataset: The dataset to generate predictions for (Hugging Face Dataset).

    Returns:
        A tuple containing:
            - y_true: numpy array of true labels.
            - y_pred: numpy array of predicted labels.
    """
    all_preds = []
    all_labels = []

    # Manually iterate over the dataset and prepare batches
    # This avoids issues with DataLoader and ensures correct tensor formatting
    batch_size = 16  # You can adjust this batch size
    for i in range(0, len(dataset), batch_size):
        batch = dataset[i : i + batch_size]

        with torch.no_grad():
            # Prepare inputs as tensors
            inputs = {
                "input_ids": torch.tensor(batch["input_ids"]).to(device),
                "attention_mask": torch.tensor(batch["attention_mask"]).to(device),
            }
            labels = torch.tensor(batch["labels"]).numpy()

            outputs = model(**inputs)
            preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    return np.array(all_labels), np.array(all_preds)


y_true, y_pred = get_predictions(model, val_dataset)

# Print confusion matrix
cm = confusion_matrix(y_true, y_pred)
labels = ["A_win", "B_win", "Tie"]  # Assuming these are your class labels

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - DeBERTa + LoRA (Validation)")
plt.show()

# Print classification report for more detailed metrics
print(classification_report(y_true, y_pred, target_names=labels))

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyboardInterrupt: 