# LLM Classification Finetuning
## Predict Human Preference using Chatbot Arena Conversations

**Goal:** Predict which model's response a judge would prefer (model_a, model_b, or tie)

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_colwidth', 200)
print("Libraries imported successfully!")

In [None]:
# Load the data
DATA_PATH = "llm-classification-finetuning/"

train_df = pd.read_csv(f"{DATA_PATH}train.csv")
test_df = pd.read_csv(f"{DATA_PATH}test.csv")
sample_sub = pd.read_csv(f"{DATA_PATH}sample_submission.csv")

print(f"Training data shape: {train_df.shape}")
print(f"Test data shape: {test_df.shape}")
print(f"Sample submission shape: {sample_sub.shape}")

In [None]:
# Explore training data
print("Training Data Columns:")
print(train_df.columns.tolist())
print("\n" + "="*50)
train_df.head(2)

In [None]:
# Data info
print("Training Data Info:")
train_df.info()
print("\n" + "="*50)
print("\nMissing Values:")
print(train_df.isnull().sum())

In [None]:
# Target distribution
print("Target Distribution:")
print("="*50)
print(f"Winner Model A: {train_df['winner_model_a'].sum()} ({train_df['winner_model_a'].mean()*100:.2f}%)")
print(f"Winner Model B: {train_df['winner_model_b'].sum()} ({train_df['winner_model_b'].mean()*100:.2f}%)")
print(f"Winner Tie:     {train_df['winner_tie'].sum()} ({train_df['winner_tie'].mean()*100:.2f}%)")

# Visualize
fig, ax = plt.subplots(figsize=(8, 5))
targets = ['winner_model_a', 'winner_model_b', 'winner_tie']
counts = [train_df[t].sum() for t in targets]
colors = ['#FF6B6B', '#4ECDC4', '#95E1D3']
bars = ax.bar(['Model A Wins', 'Model B Wins', 'Tie'], counts, color=colors, edgecolor='black')
ax.set_ylabel('Count')
ax.set_title('Distribution of Winners in Training Data')
for bar, count in zip(bars, counts):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100, 
            f'{count}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Parse the prompt and response columns (they are stored as string representations of lists)
def safe_literal_eval(x):
    try:
        return literal_eval(x)
    except:
        return x

# Parse columns
train_df['prompt_parsed'] = train_df['prompt'].apply(safe_literal_eval)
train_df['response_a_parsed'] = train_df['response_a'].apply(safe_literal_eval)
train_df['response_b_parsed'] = train_df['response_b'].apply(safe_literal_eval)

# Check sample parsed data
print("Sample parsed prompt:")
print(train_df['prompt_parsed'].iloc[0])
print("\n" + "="*50)
print("\nSample parsed response_a:")
print(train_df['response_a_parsed'].iloc[0][:500] if isinstance(train_df['response_a_parsed'].iloc[0], list) else str(train_df['response_a_parsed'].iloc[0])[:500])

In [None]:
# Feature Engineering - Text lengths
def get_text_length(x):
    if isinstance(x, list):
        return sum(len(str(item)) for item in x)
    return len(str(x))

def get_num_turns(x):
    if isinstance(x, list):
        return len(x)
    return 1

train_df['prompt_length'] = train_df['prompt_parsed'].apply(get_text_length)
train_df['response_a_length'] = train_df['response_a_parsed'].apply(get_text_length)
train_df['response_b_length'] = train_df['response_b_parsed'].apply(get_text_length)
train_df['num_turns'] = train_df['prompt_parsed'].apply(get_num_turns)
train_df['length_diff'] = train_df['response_a_length'] - train_df['response_b_length']
train_df['length_ratio'] = train_df['response_a_length'] / (train_df['response_b_length'] + 1)

print("Feature Statistics:")
print(train_df[['prompt_length', 'response_a_length', 'response_b_length', 'num_turns', 'length_diff']].describe())

In [None]:
# Visualize response lengths by winner
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Create winner label
train_df['winner'] = train_df.apply(
    lambda x: 'Model A' if x['winner_model_a'] == 1 else ('Model B' if x['winner_model_b'] == 1 else 'Tie'), 
    axis=1
)

# Response A length by winner
sns.boxplot(data=train_df, x='winner', y='response_a_length', ax=axes[0], palette=colors)
axes[0].set_title('Response A Length by Winner')
axes[0].set_ylim(0, train_df['response_a_length'].quantile(0.95))

# Response B length by winner
sns.boxplot(data=train_df, x='winner', y='response_b_length', ax=axes[1], palette=colors)
axes[1].set_title('Response B Length by Winner')
axes[1].set_ylim(0, train_df['response_b_length'].quantile(0.95))

# Length difference by winner
sns.boxplot(data=train_df, x='winner', y='length_diff', ax=axes[2], palette=colors)
axes[2].set_title('Length Difference (A-B) by Winner')
axes[2].set_ylim(train_df['length_diff'].quantile(0.05), train_df['length_diff'].quantile(0.95))

plt.tight_layout()
plt.show()

## Text Preprocessing for Model Input

We'll create a combined text representation for each sample that includes:
- The prompt
- Response A
- Response B

This will be used as input to our transformer model.

In [None]:
def create_input_text(row):
    """
    Create formatted input text from prompt and responses.
    Format: [PROMPT] prompt_text [RESPONSE_A] response_a_text [RESPONSE_B] response_b_text
    """
    # Handle list format (multi-turn conversations)
    if isinstance(row['prompt_parsed'], list):
        prompt_text = " [TURN] ".join(str(p) for p in row['prompt_parsed'])
    else:
        prompt_text = str(row['prompt_parsed'])
    
    if isinstance(row['response_a_parsed'], list):
        response_a_text = " [TURN] ".join(str(r) for r in row['response_a_parsed'])
    else:
        response_a_text = str(row['response_a_parsed'])
        
    if isinstance(row['response_b_parsed'], list):
        response_b_text = " [TURN] ".join(str(r) for r in row['response_b_parsed'])
    else:
        response_b_text = str(row['response_b_parsed'])
    
    return f"[PROMPT] {prompt_text} [RESPONSE_A] {response_a_text} [RESPONSE_B] {response_b_text}"

# Create combined text
train_df['input_text'] = train_df.apply(create_input_text, axis=1)

# Check sample
print("Sample input text (truncated):")
print(train_df['input_text'].iloc[0][:1000] + "...")

In [None]:
# Create target labels (0: model_a wins, 1: model_b wins, 2: tie)
train_df['label'] = train_df.apply(
    lambda x: 0 if x['winner_model_a'] == 1 else (1 if x['winner_model_b'] == 1 else 2),
    axis=1
)

print("Label distribution:")
print(train_df['label'].value_counts().sort_index())
print("\nLabel mapping: 0=Model A wins, 1=Model B wins, 2=Tie")

## Model Training with Transformers

We'll use a pre-trained transformer model (DeBERTa) for this classification task. DeBERTa has shown strong performance on NLU tasks.

**Approach:**
1. Tokenize the combined text
2. Fine-tune DeBERTa for 3-class classification
3. Use cross-validation for robust evaluation

In [None]:
# Install required packages (uncomment if needed)
# !pip install transformers datasets accelerate -q

import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import log_loss, accuracy_score
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import Dataset as HFDataset

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Configuration
class CFG:
    model_name = "microsoft/deberta-v3-small"  # Can change to deberta-v3-base for better performance
    max_length = 512  # Adjust based on GPU memory
    batch_size = 8
    epochs = 3
    learning_rate = 2e-5
    weight_decay = 0.01
    seed = 42
    num_labels = 3

# Set seed for reproducibility
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(CFG.seed)
print(f"Model: {CFG.model_name}")
print(f"Max Length: {CFG.max_length}")
print(f"Batch Size: {CFG.batch_size}")

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)

# Tokenize function
def tokenize_function(examples):
    return tokenizer(
        examples['input_text'],
        truncation=True,
        max_length=CFG.max_length,
        padding=False  # Will use dynamic padding with DataCollator
    )

# Create train/validation split
train_data, val_data = train_test_split(
    train_df[['input_text', 'label']], 
    test_size=0.15, 
    stratify=train_df['label'],
    random_state=CFG.seed
)

print(f"Training samples: {len(train_data)}")
print(f"Validation samples: {len(val_data)}")

In [None]:
# Create HuggingFace datasets
train_dataset = HFDataset.from_pandas(train_data.reset_index(drop=True))
val_dataset = HFDataset.from_pandas(val_data.reset_index(drop=True))

# Tokenize datasets
train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=['input_text'])
val_dataset = val_dataset.map(tokenize_function, batched=True, remove_columns=['input_text'])

# Rename label column
train_dataset = train_dataset.rename_column('label', 'labels')
val_dataset = val_dataset.rename_column('label', 'labels')

print("Dataset columns:", train_dataset.column_names)
print("Sample tokenized length:", len(train_dataset[0]['input_ids']))

In [None]:
# Load model
model = AutoModelForSequenceClassification.from_pretrained(
    CFG.model_name,
    num_labels=CFG.num_labels,
    problem_type="single_label_classification"
)

# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Compute metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    probs = torch.softmax(torch.tensor(predictions), dim=-1).numpy()
    preds = np.argmax(predictions, axis=-1)
    acc = accuracy_score(labels, preds)
    logloss = log_loss(labels, probs, labels=[0, 1, 2])
    return {"accuracy": acc, "log_loss": logloss}

print(f"Model loaded successfully!")
print(f"Model parameters: {model.num_parameters():,}")

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=CFG.learning_rate,
    per_device_train_batch_size=CFG.batch_size,
    per_device_eval_batch_size=CFG.batch_size * 2,
    num_train_epochs=CFG.epochs,
    weight_decay=CFG.weight_decay,
    warmup_ratio=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="log_loss",
    greater_is_better=False,
    logging_steps=100,
    fp16=torch.cuda.is_available(),
    report_to="none",
    seed=CFG.seed,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

print("Trainer initialized successfully!")

In [None]:
# Train the model (uncomment to train)
# trainer.train()

# For quick testing, let's evaluate without training
print("Note: Uncomment trainer.train() above to start training")
print("Training will take several hours depending on your GPU")

## Inference and Submission

Generate predictions on the test set and create submission file.

In [None]:
# Prepare test data
test_df['prompt_parsed'] = test_df['prompt'].apply(safe_literal_eval)
test_df['response_a_parsed'] = test_df['response_a'].apply(safe_literal_eval)
test_df['response_b_parsed'] = test_df['response_b'].apply(safe_literal_eval)
test_df['input_text'] = test_df.apply(create_input_text, axis=1)

print(f"Test samples: {len(test_df)}")
print("\nSample test input (truncated):")
print(test_df['input_text'].iloc[0][:500] + "...")

In [None]:
def generate_predictions(model, tokenizer, texts, batch_size=16):
    """Generate predictions for test data."""
    model.eval()
    model.to(device)
    
    all_probs = []
    
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        
        # Tokenize
        inputs = tokenizer(
            batch_texts,
            truncation=True,
            max_length=CFG.max_length,
            padding=True,
            return_tensors="pt"
        ).to(device)
        
        # Predict
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy()
            all_probs.append(probs)
        
        if (i + batch_size) % 100 == 0:
            print(f"Processed {min(i + batch_size, len(texts))}/{len(texts)} samples")
    
    return np.vstack(all_probs)

# Generate predictions (after training)
# test_predictions = generate_predictions(model, tokenizer, test_df['input_text'].tolist())

print("Prediction function defined. Run after training completes.")

In [None]:
# Create submission file
def create_submission(test_ids, predictions, output_path="submission.csv"):
    """
    Create submission file.
    predictions: numpy array of shape (n_samples, 3) with probabilities
    """
    submission = pd.DataFrame({
        'id': test_ids,
        'winner_model_a': predictions[:, 0],
        'winner_model_b': predictions[:, 1],
        'winner_tie': predictions[:, 2]
    })
    submission.to_csv(output_path, index=False)
    print(f"Submission saved to {output_path}")
    return submission

# Example with dummy predictions (replace with actual predictions after training)
dummy_preds = np.full((len(test_df), 3), 1/3)  # Equal probabilities as baseline
submission = create_submission(test_df['id'].values, dummy_preds)
submission.head()

## Tips for Better Performance

1. **Larger Models**: Use `deberta-v3-base` or `deberta-v3-large` for better performance
2. **Longer Sequences**: Increase `max_length` to 1024 or 2048 if GPU memory allows
3. **Cross-Validation**: Use 5-fold stratified CV and ensemble predictions
4. **Data Augmentation**: Swap response_a and response_b with flipped labels
5. **Gradient Accumulation**: Use to simulate larger batch sizes
6. **Learning Rate Scheduling**: Try cosine annealing or linear decay
7. **Model Ensembling**: Combine predictions from multiple models (DeBERTa, Longformer, etc.)