### Assigment 2

#### Step 0

In [None]:
import numpy as np
import pandas as pd 
import tensorflow as tf
import torch
import keras
import keras_hub
import re
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        filepath = os.path.join(dirname, filename)
        match filename:
            case "train.csv":
                train = pd.read_csv(filepath)
            case "test.csv":
                test = pd.read_csv(filepath)

print("Train shape:", train.shape)
print("Test shape:", test.shape)
train.head()

In [None]:
print('Train columns: ', train.columns.tolist())
print('\nTarget distribution: ')
print(train[['winner_model_a', 'winner_model_b', 'winner_tie']].sum())

In [None]:
#quick check if data is damaged
train.isnull()  #null or missing data
test.isnull()

print(test.duplicated().sum()) #duplicates 
print(train.duplicated().sum())

total_id = len(train['id'])   #duplicates in id
total_unique_id = len(train['id'].unique())

print("Total number of 'id' duplicates: ",(total_id - total_unique_id))

#### Step 1

In [None]:
#extraxting features
def get_sentence_count(series):
    return series.map(lambda x: x.count('.') + x.count('!') + x.count('?'))

def get_avg_word_len(series):
    return series.map(lambda x: np.mean([len(w) for w in x.split()]) if len(x.split()) > 0 else 0)

def get_upper_ratio(series):
    return series.map(lambda x: sum(c.isupper() for c in x) / len(x) if len(x) > 0 else 0)

def get_num_digits(series):
    return series.map(lambda x: len(re.findall(r'\d', x)))

def get_punct_count(series):
    return series.map(lambda x: sum(c in "!?;:," for c in x))

def extract_features(data):
    response_len_a = data['response_a'].map(len)
    response_len_b = data['response_b'].map(len)
    word_count_a = data['response_a'].map(lambda x: len(x.split()))
    word_count_b = data['response_b'].map(lambda x: len(x.split()))
    sentence_count_a = get_sentence_count(data['response_a'])
    sentence_count_b = get_sentence_count(data['response_b'])
    avg_word_len_a = get_avg_word_len(data['response_a'])
    avg_word_len_b = get_avg_word_len(data['response_b'])
    upper_ratio_a = get_upper_ratio(data['response_a'])
    upper_ratio_b = get_upper_ratio(data['response_b'])
    num_digits_a = get_num_digits(data['response_a'])
    num_digits_b = get_num_digits(data['response_b'])
    punct_count_a = get_punct_count(data['response_a'])
    punct_count_b = get_punct_count(data['response_b'])

    data['diff_response_len'] = response_len_a - response_len_b
    data['diff_word_count'] = word_count_a - word_count_b
    data['diff_sentence'] = sentence_count_a - sentence_count_b
    data['diff_avg_word_len'] = avg_word_len_a - avg_word_len_b
    data['diff_upper_ratio'] = upper_ratio_a - upper_ratio_b
    data['diff_num_digits'] = num_digits_a - num_digits_b
    data['diff_punct_count'] = punct_count_a - punct_count_b
    data['prompt_len'] = data['prompt'].map(len)

    return data[['diff_response_len', 'diff_word_count', 'diff_sentence', 'diff_avg_word_len',
                 'diff_upper_ratio','diff_num_digits','diff_punct_count','prompt_len']]

In [None]:
def make_class_label(row):
    if row['winner_tie'] == 1:
        return 0      # tie
    elif row['winner_model_a'] == 1:
        return 1      # model A wins
    elif row['winner_model_b'] == 1:
        return 2      # model B wins

train['y_class'] = train.apply(make_class_label, axis=1)

y = train['y_class']

X = extract_features(train)

import seaborn as sns
import matplotlib.pyplot as plt

for col in X.columns:
    sns.boxplot(x=y, y=X[col])
    plt.title(col)
    plt.show()

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    C=2.0,
    max_iter=1000
)


In [None]:
model.fit(X_train, y_train)

print("Validation Accuracy:", model.score(X_valid, y_valid))

In [None]:
X_test = extract_features(test)
proba = model.predict_proba(X_test)

submission = pd.DataFrame({'id': test['id'],
                           'model_a': proba[:, 0],
                           'model_b': proba[:, 1],
                           'tie': proba[:, 2],})

submission.to_csv('/kaggle/working/submission.csv', index=False)

#### Step 2

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'   #choosing gpu for quicker learning
print(f"Using device: {device}")

In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics import log_loss, accuracy_score

emb_model = SentenceTransformer('/kaggle/input/all-minilm-l6-v2/all-MiniLM-L6-v2', device=device) #lightweight model

#for train data
prompt = emb_model.encode(train['prompt'].tolist(), batch_size=64, convert_to_numpy=True)
resp_a = emb_model.encode(train['response_a'].tolist(), batch_size=32, convert_to_numpy=True)
resp_b = emb_model.encode(train['response_b'].tolist(), batch_size=32, convert_to_numpy=True)

diff_emb = resp_a - resp_b #better for comparsion of those respones

absdiff_emb = np.abs(resp_a - resp_b) #|absolute| diff to have a magnitude between resps

X_features = np.hstack([prompt, diff_emb, absdiff_emb])  #prod_emb

y_train = train['y_class']
X_tr, X_val, y_tr, y_val = train_test_split(X_features, y_train, 
                                            test_size=0.2, random_state=42, stratify=y_train)

scaler = StandardScaler()
X_trsc = scaler.fit_transform(X_tr)
X_valsc = scaler.transform(X_val)

#choosing which is better
classifier = LogisticRegression(C=0.01, max_iter=2000, multi_class='multinomial',solver='lbfgs', random_state=42)

#classifier = RandomForestClassifier(n_estimators=100, max_depth=20, random_state=42, n_jobs=-1)

classifier.fit(X_trsc, y_tr)

#evaluation
y_val_pred = classifier.predict_proba(X_valsc)
val_lloss = log_loss(y_val, y_val_pred)
emb_acc = accuracy_score(y_val, classifier.predict(X_valsc))


print(f"Embedding Model Validation Accuracy: {emb_acc:.4f}")
print(f"Validation Log Loss: {val_lloss:.4f}")

print(f"   Improvement over baseline (1.0871): {1.0871 - val_lloss:.4f}")

#repeating everything for test data to submit
prompt_test = emb_model.encode(test['prompt'].tolist(), batch_size=64, convert_to_numpy=True)
resp_atest = emb_model.encode(test['response_a'].tolist(), batch_size=32, convert_to_numpy=True)
resp_btest = emb_model.encode(test['response_b'].tolist(), batch_size=32, convert_to_numpy=True)

diff_test = resp_atest - resp_btest
absdiff_test = np.abs(resp_atest - resp_btest)

X_test = np.hstack([prompt_test, diff_test, absdiff_test])
X_testsc = scaler.transform(X_test)

#submission
test_predp = classifier.predict_proba(X_testsc)
submission = pd.DataFrame({'id': test['id'],
                           'winner_model_a': test_predp[:, 1],
                           'winner_model_b': test_predp[:, 2],
                           'winner_tie': test_predp[:, 0]})

submission.to_csv('/kaggle/working/submission.csv', index=False)
print('Embedding-based submission saved insted of step1')

step 3

In [None]:
from keras import mixed_precision

In [None]:
#data split
y = train['y_class']
train_df, val_df = train_test_split(train, test_size=0.2, random_state=42, stratify=y)

def fmt(row):
    prompt = str(row['prompt'])[:200] if isinstance(row['prompt'], str) else str(row['prompt'])[1:200]
    resp_a = str(row['response_a'])[:150]
    resp_b = str(row['response_b'])[:150]
    return f"{prompt} [SEP] {resp_a} [SEP] {resp_b}"

train_texts = [fmt(row) for _, row in train_df.iterrows()]
val_texts = [fmt(row) for _, row in val_df.iterrows()]
test_texts = [fmt(row) for _, row in test.iterrows()]

y_tr = train_df['y_class'].values
y_val = val_df['y_class'].values

In [None]:
mixed_precision.set_global_policy('mixed_float16') 

MODEL_NAME = 'deberta_v3_extra_small_en' 
SEQ_LEN = 128     #shorter sequence to speed from 40 min est
BATCH_SIZE = 16   #smaller batch sizes
EPOCHS = 3    #early stopping

In [None]:
#loading model
try:
    classifier = keras_hub.models.DebertaV3Classifier.from_preset(
        MODEL_NAME,
        num_classes=3,
        sequence_length=SEQ_LEN,
    )
    print("Loaded DebertaV3Classifier directly")
except Exception as e:
    print(f"Could not load classifier directly: {e}")
    print("Building custom model...")

    preprocessor = keras_hub.models.DebertaV3Preprocessor.from_preset(MODEL_NAME, sequence_length=SEQ_LEN)
    backbone = keras_hub.models.DebertaV3Backbone.from_preset(MODEL_NAME)

    train_inputs = preprocessor(train_texts)
    val_inputs = preprocessor(val_texts)
    test_inputs = preprocessor(test_texts)

    inputs = backbone.input  #tokens
    x = backbone(inputs)
    x = x[:, 0, :]  #embedding
    x = keras.layers.Dropout(0.2)(x)
    outputs = keras.layers.Dense(3, activation='softmax')(x)
    classifier = keras.Model(inputs, outputs)

print(f"Model parameters: {classifier.count_params():,}")

#freezing backbone for most epochs
if hasattr(classifier, "backbone"):
    for layer in classifier.backbone.layers[:-2]:  # keep top 2 blocks trainable
        layer.trainable = False
else:
    for layer in classifier.layers[:-2]:
        layer.trainable = False

# training
classifier.compile(optimizer=keras.optimizers.AdamW(learning_rate=3e-5),
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])

#callbacks
callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=1, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=1e-7, verbose=1),
]

hist = classifier.fit(x=train_texts, y=y_tr, validation_data=(val_texts, y_val), batch_size=BATCH_SIZE,
                      epochs=EPOCHS, callbacks=callbacks, verbose=1,)

val_probs = classifier.predict(val_texts, batch_size=BATCH_SIZE, verbose=1)
val_preds = np.argmax(val_probs, axis=1)
val_acc = accuracy_score(y_val, val_preds)
val_loss = log_loss(y_val, val_probs)

print(f"\nValidation Accuracy: {val_acc:.4f} | Log Loss: {val_loss:.4f}")
print(classification_report(y_val, val_preds, target_names=['Tie', 'Model_A', 'Model_B']))

#submission
test_probs = classifier.predict(test_texts, batch_size=BATCH_SIZE, verbose=1)
submission = pd.DataFrame({
    'id': test['id'],
    'winner_model_a': test_probs[:, 1],
    'winner_model_b': test_probs[:, 2],
    'winner_tie': test_probs[:, 0]
})
submission.to_csv('/kaggle/working/submission.csv', index=False)
print('DeBERTa-based fine-tuning submission saved insted of step1')

print(f"DeBERTa Fine-tuning after Acc: {val_acc:.3f}, LogLoss: {val_loss:.3f}")

In [None]:
#loading model
try:
    classifier = keras_hub.models.DebertaV3Classifier.from_preset(
        'deberta_v3_extra_small_en', 
        num_classes=3,
    )
    print("Loaded DebertaV3Classifier directly")
except Exception as e:
    print(f"Could not load classifier directly: {e}")
    print("Building custom model...")
    
    #custom model as planB
    preprocessor = keras_hub.models.DebertaV3Preprocessor.from_preset(
        'deberta_v3_extra_small_en',
        sample_size = 5000,
        sequence_length=256
    )
    backbone = keras_hub.models.DebertaV3Backbone.from_preset('deberta_v3_extra_small_en')
    
    #text processing
    train_texts = [preprocessor(text) for text in train_texts]
    val_texts = [preprocessor(text) for text in val_texts]
    test_texts = [preprocessor(text) for text in test_texts]

    #model itself
    inputs = backbone.input
    x = backbone(inputs)
    x = keras.layers.GlobalAveragePooling1D()(x)
    x = keras.layers.Dropout(0.1)(x)
    outputs = keras.layers.Dense(3, activation='softmax')(x)
    classifier = keras.Model(inputs, outputs)

print(f"Model parameters: {classifier.count_params():,}")

#training
classifier.compile(
    optimizer=keras.optimizers.AdamW(learning_rate=2e-5),
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

#callback
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=1, 
                                           restore_best_weights=True, verbose=1),
             keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, 
                                               min_lr=1e-7, verbose=1)]


hist = classifier.fit(np.array(train_texts), y_tr, validation_data=(np.array(val_texts), y_val),
                      batch_size=8, epochs=3, callbacks=callbacks, verbose=1)

#evaluation
val_probs = classifier.predict(np.array(val_texts), batch_size=8, verbose=1)
val_preds = np.argmax(val_probs, axis=1)
val_acc = accuracy_score(y_val, val_preds)
val_loss = log_loss(y_val, val_probs)

print(f"Validation Accuracy: {val_acc:.4f} | Log Loss: {val_loss:.4f}")
print(classification_report(y_val, val_preds, target_names=['Tie', 'Model_A', 'Model_B']))

#submission
test_probs = classifier.predict(np.array(test_texts), batch_size=8, verbose=1)
submission = pd.DataFrame({
    'id': test['id'],
    'winner_model_a': test_probs[:, 1],
    'winner_model_b': test_probs[:, 2],
    'winner_tie': test_probs[:, 0]
})
submission.to_csv('/kaggle/working/submission.csv', index=False)
print('DeBERTa-based fine-tuning submission saved insted of step1')

print(f"DeBERTa Fine-tuning after Acc: {val_acc:.3f}, LogLoss: {val_loss:.3f}")