Finetune LLMs to Predict Human Preference using Chatbot Arena conversations
This notebook contain a solution for the LLM Classification Finetuning on Kaggle
Main objective: Predict which responses users will prefer in a head-to-head battle between chatbots powered by large language models(LLMs).
Data
train.csv
•	id - A unique identifier for the row.
•	model_a/b - The identity of model_a/b. Included in train.csv but not test.csv.
•	prompt - The prompt that was given as an input (to both models).
•	response_a/b - The response from model_a/b to the given prompt.
•	winner_model_a/b/tie - Binary columns marking the judge's selection. The ground truth target column.
test.csv
•	id
•	prompt
•	response_a/b
sample_submission.csv A submission file in the correct format.
•	id
•	winner_model_a/b/tie - This is what is predicted from the test set.


In [None]:


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
# Import the files 

import numpy as np
import pandas as pd
import os

train = pd.read_csv('/kaggle/input/c/llm-classification-finetuning/train.csv')
test = pd.read_csv('/kaggle/input/c/llm-classification-finetuning/test.csv')
sample_submission = pd.read_csv ('/kaggle/input/c/llm-classification-finetuning/sample_submission.csv')


In [None]:
# Check the structure of the head

train.head()

In [None]:
#Structure of the test  file

test.head()

Any missing values in the structure

In [None]:
#Misiing valus

train.isnull().sum()

In [None]:
test.isnull().sum()

In [None]:
#Check the information of the data still under the data exploration

train.info()

In [None]:
#Information of the test
test.info()

**converting data (especially categorical or textual data)**

In [None]:
#Encoding

from sklearn.preprocessing import LabelEncoder

# Working on unique models to use in train and test
all_models = list(set(train['model_a'].unique()) | set(train['model_b'].unique()))

# Processing and Encoding
model_encoder = LabelEncoder()
# fit the encoder on all unique models
model_encoder.fit(all_models)

# Encode the names in the training dataset
train['model_a_encoded'] = model_encoder.transform(train['model_a'])
train['model_b_encoded'] = model_encoder.transform(train['model_b'])

print(f"Encoded {len(all_models)} unique models from training data:")
for i, model in enumerate(model_encoder.classes_):
    print(f"  {i}: {model}")

**Test based numeric features in the dataframe**

In [None]:
#Create text-based numeric features

import string

def create_text_features(df):
    
    
    # Text length features
    df['prompt_length'] = df['prompt'].str.len()
    df['response_a_length'] = df['response_a'].str.len()
    df['response_b_length'] = df['response_b'].str.len()
    
    # Word count features
    df['prompt_word_count'] = df['prompt'].str.split().str.len()
    df['response_a_word_count'] = df['response_a'].str.split().str.len()
    df['response_b_word_count'] = df['response_b'].str.split().str.len()
    
    # Character count features
    
    df['prompt_char_count'] = df['prompt'].str.replace(' ', '').str.len()
    df['response_a_char_count'] = df['response_a'].str.replace(' ', '').str.len()
    df['response_b_char_count'] = df['response_b'].str.replace(' ', '').str.len()
    
    # Average word length
    df['prompt_avg_word_length'] = df['prompt_char_count'] / (df['prompt_word_count'] + 1e-8)
    df['response_a_avg_word_length'] = df['response_a_char_count'] / (df['response_a_word_count'] + 1e-8)
    df['response_b_avg_word_length'] = df['response_b_char_count'] / (df['response_b_word_count'] + 1e-8)
    
    # Response length ratio (response length / prompt length)
    df['response_a_length_ratio'] = df['response_a_length'] / (df['prompt_length'] + 1e-8)
    df['response_b_length_ratio'] = df['response_b_length'] / (df['prompt_length'] + 1e-8)
    
    # Difference in response lengths
    df['response_length_diff'] = df['response_a_length'] - df['response_b_length']
    df['response_word_count_diff'] = df['response_a_word_count'] - df['response_b_word_count']
    
    # Punctuation counts
  
    df['prompt_punctuation_count'] = df['prompt'].str.count(f'[{string.punctuation}]')
    df['response_a_punctuation_count'] = df['response_a'].str.count(f'[{string.punctuation}]')
    df['response_b_punctuation_count'] = df['response_b'].str.count(f'[{string.punctuation}]')
    
    # Question marks and exclamation marks
    df['prompt_question_marks'] = df['prompt'].str.count('\?')
    df['response_a_question_marks'] = df['response_a'].str.count('\?')
    df['response_b_question_marks'] = df['response_b'].str.count('\?')
    
    df['prompt_exclamation_marks'] = df['prompt'].str.count('!')
    df['response_a_exclamation_marks'] = df['response_a'].str.count('!')
    df['response_b_exclamation_marks'] = df['response_b'].str.count('!')
    
    # Uppercase ratio
    df['prompt_uppercase_ratio'] = df['prompt'].str.count(r'[A-Z]') / (df['prompt_length'] + 1e-8)
    df['response_a_uppercase_ratio'] = df['response_a'].str.count(r'[A-Z]') / (df['response_a_length'] + 1e-8)
    df['response_b_uppercase_ratio'] = df['response_b'].str.count(r'[A-Z]') / (df['response_b_length'] + 1e-8)
    
    # Number count
    df['prompt_number_count'] = df['prompt'].str.count(r'\d')
    df['response_a_number_count'] = df['response_a'].str.count(r'\d')
    df['response_b_number_count'] = df['response_b'].str.count(r'\d')
    
    return df
# Applying feature engineering to both datasets
print("Creating text-based features for training data...")
train = create_text_features(train)

print("Creating text-based features for test data...")
test = create_text_features(test)

print("\n" + "*" * 60)
print(" SUMMARY")
print("*" * 60)


**Converting one-hot to numeric labels in one line**

In [None]:
#separate binary column for each category.
#Training target
y = train[['winner_model_a', 'winner_model_b', 'winner_tie']].values.argmax(axis=1)

# Now let Droping target columns and other unnecessary columns from training data

drop_cols = [
    'id',                    
    'model_a', 'model_b',    
    'prompt', 'response_a', 'response_b', 
    'winner_model_a', 'winner_model_b', 'winner_tie'  
]


In [None]:
# Creating feature  X by dropping unnecessary columns
X = train.drop(columns=drop_cols)

print(f'Feature matrix shape:{X.shape}')
print(f'Target vector shape:{y.shape}')
print(f'Features:{list(X.columns)}')

**Starting building model**

In [None]:
#for model building

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, log_loss

# Now Spliting  the data into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(
    X, y, 
    test_size=0.2,           # taking 20% for validation as per the number of data
    random_state=42,         # For reproducibility random seed 
    stratify=y               # Maintain class distribution
)

print(f"Training set shape: {X_train.shape}")
print(f"Validation set shape: {X_val.shape}")
print(f"Training target distribution: {np.bincount(y_train)}")
print(f"Validation target distribution: {np.bincount(y_val)}")



In [None]:

#Scaling the features by use of important  logistic regression method.

from sklearn.linear_model import LogisticRegression

scaler = StandardScaler()
X_train_done = scaler.fit_transform(X_train)
X_val_done = scaler.transform(X_val)

#Initialize the regression parameter

model = LogisticRegression(
    random_state=42,       
    max_iter=1000,          
    multi_class='ovr',       
    solver='liblinear',      
    C=1.0                    
)

print(f"Model parameters: {model.get_params()}")

In [None]:
# Confirming if tye model has been trained 

print("Training model wait...")
model.fit(X_train_done, y_train)
print("Training done!")

**Now make predictions**

In [None]:
#Making predictions

y_train_pred = model.predict(X_train_done)
y_val_pred = model.predict(X_val_done)


# Getting prediction probabilities for log loss

y_train_proba = model.predict_proba(X_train_done)
y_val_proba = model.predict_proba(X_val_done)

# Calculating the log loss (the evaluation metric)

train_log_loss = log_loss(y_train, y_train_proba)
val_log_loss = log_loss(y_val, y_val_proba)

print(f"Training Log Loss: {train_log_loss:.4f}")
print(f"Validation Log Loss: {val_log_loss:.4f}")


In [None]:
#Classification report

print("\nValidation Classification Report:")
print(classification_report(y_val, y_val_pred, 
                          target_names=['Model A Wins', 'Model B Wins', 'Tie']))


**Now test column**

In [None]:

#Now drop the test columns

test_cols_drops = ['id', 'prompt', 'response_a', 'response_b']
X_test = test.drop(columns=test_cols_drops)

print(f"Test feature matrix shape: {X_test.shape}")
print(f"Test features: {list(X_test.columns)}")


In [None]:
# What are  missing in the test data?

train_features = set(X.columns)
test_features = set(X_test.columns)
missing_features = train_features - test_features
extra_features = test_features - train_features

print(f"\nMissing features in test data: {missing_features}")
print(f"Extra features in test data: {extra_features}")


In [None]:
# Missing values by adding missing features to test data with default values (0 for encoded features)

for feature in missing_features:
    if 'encoded' in feature:
        X_test[feature] = 0
    else:
        X_test[feature] = 0

X_test = X_test[X.columns]

print(f"\nTest data after manupulating missing features:")
print(f"Features same as training: {list(X_test.columns) == list(X.columns)}")
print(f"Shape: {X_test.shape}")


In [None]:
# Scale test data using the same scaler

X_test_done = scaler.transform(X_test)

# Make predictions on test set
test_predictions = model.predict_proba(X_test_done)

print(f"\nTest predictions shape: {test_predictions.shape}")



In [None]:
# Create submission DataFrame
submission = pd.DataFrame({
    'id': test['id'],
    'winner_model_a': test_predictions[:, 0],  # Probability for class 0
    'winner_model_b': test_predictions[:, 1],  # Probability for class 1
    'winner_tie': test_predictions[:, 2]       # Probability for class 2
})


In [None]:
# Display 

print("\nSubmission file:")
print(submission.head())


In [None]:
# Performing cross-validation in the model
Cross_Val_S = cross_val_score(model, X_train_done, y_train, 
                           cv=5, scoring='neg_log_loss')
Cross_Val_log_loss = -Cross_Val_S.mean()

print(f"Cross-validation Log Loss: {Cross_Val_log_loss:.4f} (+/- {Cross_Val_S.std() * 2:.4f})")


In [None]:
# Then Make predictions on the test set

test_predictions = model.predict_proba(X_test_done)

print("Ready for submission perfect!")
print(f"Test predictions shape: {test_predictions.shape}")



In [None]:
# Saved submission file
submission.to_csv('submission.csv', index=False)
print(f"\nSubmission file saved as 'submission.csv'")
print(f"File contains {len(submission)} predictions")


In [None]:
# Verify if  the format matches the required format in the compe

print(submission.head(3).to_string(index=False))
print('Perfect prediction and sbmission matche !')