# Fine-tuning with Bias-Aware Techniques

This notebook integrates bias-aware techniques into the fine-tuning process. It includes:
1. Analysis of verbosity and position bias.
2. Data augmentation to mitigate position bias.
3. Fine-tuning a DeBERTa-small model using LoRA on the augmented dataset.

## 1. Install Libraries

In [None]:
!pip install -q transformers peft accelerate bitsandbytes torch

## 2. Load Data and Analyze Biases

In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig

# Load data
train_df = pd.read_csv('dataset/train.csv')
test_df = pd.read_csv('dataset/test.csv')
submission_df = pd.read_csv('dataset/sample_submission.csv')

# --- Verbosity Bias Analysis ---
train_df['len_a'] = train_df['response_a'].str.len()
train_df['len_b'] = train_df['response_b'].str.len()
train_df['word_count_a'] = train_df['response_a'].apply(lambda x: len(str(x).split()))
train_df['word_count_b'] = train_df['response_b'].apply(lambda x: len(str(x).split()))
print('--- Verbosity Analysis ---')
print(f"Avg word count for A: {train_df['word_count_a'].mean():.2f}")
print(f"Avg word count for B: {train_df['word_count_b'].mean():.2f}")

# --- Position Bias Analysis ---
model_a_wins = train_df['winner_model_a'].sum()
model_b_wins = train_df['winner_model_b'].sum()
ties = train_df['winner_tie'].sum()
total = len(train_df)
print('--- Position Bias Analysis ---')
print(f'Total samples: {total}')
print(f'Model A wins: {model_a_wins} ({model_a_wins/total:.2%})')
print(f'Model B wins: {model_b_wins} ({model_b_wins/total:.2%})')
print(f'Ties: {ties} ({ties/total:.2%})')


## 3. Data Augmentation and Preparation

In [None]:
# Prepare original labels
conditions = [train_df['winner_model_a'] == 1, train_df['winner_model_b'] == 1, train_df['winner_tie'] == 1]
choices = [0, 1, 2] # 0: model_a, 1: model_b, 2: tie
train_df['label'] = np.select(conditions, choices, default=-1)
train_df = train_df[train_df['label'] != -1].copy()

# --- Augmentation to mitigate position bias ---
augmented_rows = []
for _, row in train_df.iterrows():
    # Keep original row
    augmented_rows.append(row.to_dict())
    
    # Create swapped row if not a tie
    if row['label'] != 2:
        new_row = row.to_dict()
        new_row['response_a'], new_row['response_b'] = row['response_b'], row['response_a']
        new_row['label'] = 1 - row['label'] # Flip label (0 becomes 1, 1 becomes 0)
        augmented_rows.append(new_row)

augmented_train_df = pd.DataFrame(augmented_rows).reset_index(drop=True)
print(f'Original training size: {len(train_df)}')
print(f'Augmented training size: {len(augmented_train_df)}')

# Create combined text field
def create_text(row):
    return f"""prompt: {row['prompt']}

response_a: {row['response_a']}

response_b: {row['response_b']}"""

augmented_train_df['text'] = augmented_train_df.apply(create_text, axis=1)
test_df['text'] = test_df.apply(create_text, axis=1)

# Split augmented train data for validation
train_texts, val_texts, train_labels, val_labels = train_test_split(
    augmented_train_df['text'], augmented_train_df['label'], test_size=0.1, random_state=42, stratify=augmented_train_df['label']
)

## 4. Tokenization

In [None]:
model_name = 'microsoft/deberta-v3-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts.tolist(), truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(), truncation=True, padding=True, max_length=512)

class PreferenceDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = PreferenceDataset(train_encodings, train_labels.tolist())
val_dataset = PreferenceDataset(val_encodings, val_labels.tolist())

## 5. Model Setup and LoRA Configuration

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    load_in_8bit=True,
    device_map='auto'
)

lora_config = LoraConfig(
    r=16, # Rank
    lora_alpha=32,
    target_modules=['query_proj', 'value_proj'],
    lora_dropout=0.05,
    bias='none',
    task_type="SEQ_CLS"
)

model = get_peft_model(model, lora_config)

## 6. Fine-tuning

In [None]:
training_args = TrainingArguments(
    output_dir='./results_bias_aware',
    num_train_epochs=1, # A single epoch for a quick baseline
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs_bias_aware',
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

trainer.train()

## 7. Prediction and Submission

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

test_dataset = TestDataset(test_encodings)

predictions = trainer.predict(test_dataset)
probs = torch.nn.functional.softmax(torch.from_numpy(predictions.predictions), dim=-1).numpy()

submission_df['winner_model_a'] = probs[:, 0]
submission_df['winner_model_b'] = probs[:, 1]
submission_df['winner_tie'] = probs[:, 2]

submission_df.to_csv('submission_finetuned_bias_aware.csv', index=False)

submission_df.head()