In [None]:
from datasets import load_dataset

raw_dataset = load_dataset("yitingxie/rlhf-reward-datasets", split=["test"])


In [None]:
raw_dataset = list(raw_dataset[0])
raw_dataset[:3]


In [None]:
import random
from typing import Mapping
from llama_likes import Completion, PreferenceInput

CHOSEN = "chosen"
REJECTED = "rejected"


def to_preference_input(entry: Mapping[str, str]) -> PreferenceInput:

    def build_completions(entry: Mapping[str, str]) -> tuple[Completion, Completion]:
        items = [
            Completion(player_id=CHOSEN, completion=entry[CHOSEN]),
            Completion(player_id=REJECTED, completion=entry[REJECTED])
        ]
        random.shuffle(items)
        return tuple(items)

    completion_a, completion_b = build_completions(entry)
    return PreferenceInput(
        instruction=entry["prompt"],
        completion_a=completion_a,
        completion_b=completion_b,
    )


data: list[PreferenceInput] = [to_preference_input(d) for d in raw_dataset]
data[:5]


Briefly transforming these into a format that is digestible to our comparison mechanism later...

In [None]:
from llama_likes import Payoff, PreferenceResult

def get_payoff(pref_input: PreferenceInput) -> Payoff:
    if pref_input.completion_a.player_id == CHOSEN:
        return Payoff.PLAYER_A_WINS
    return Payoff.PLAYER_B_WINS

human_labeled = [PreferenceResult(
    preference_input=pref_input,
    payoff=get_payoff(pref_input)
) for pref_input in data]

human_labeled[:5]

Labeling a few examples with GPT...

In [None]:
from typing import Union
from llama_likes import OpenaiModel, OpenaiRanker, PreferenceError

TEST_SAMPLE_SIZE = 10

openai_model = OpenaiModel.GPT_4
openai_ranker = OpenaiRanker(openai_model)

labels: list[Union[PreferenceResult, PreferenceError]] = []
for input in data[:TEST_SAMPLE_SIZE]:
    label = openai_ranker.rank(input)
    labels.append(label)

labels


In [None]:
labels = [l for l in labels if isinstance(l, PreferenceResult)]
labels

In [None]:
from typing import Mapping, Sequence
from sklearn.isotonic import spearmanr
from sklearn.metrics import cohen_kappa_score


def calculate_cohens_kappa(data1: Sequence[int], data2: Sequence[int]) -> float:
    """
    Calculate Cohen's Kappa for two sets of categorical data.
    
    Args:
        data1 (Sequence[int]): Categorical data from method 1.
        data2 (Sequence[int]): Categorical data from method 2.
    
    Returns:
        float: Cohen's Kappa score.
    """
    return cohen_kappa_score(data1, data2)


def calculate_spearmans_correlation(data1: Sequence[int], data2: Sequence[int]) -> float:
    """
    Calculate Spearman's rank correlation coefficient for two sets of ordinal data.
    
    Args:
        data1 (Sequence[int]): Ratings from the first rater.
        data2 (Sequence[int]): Ratings from the second rater.
    
    Returns:
        float: Spearman's rank correlation coefficient.
    """
    correlation, _p_value = spearmanr(data1, data2)
    return correlation


def transform_payoff(payoff: tuple[float, float]) -> int:
    mapping = {(1, 0): 0, (0.5, 0.5): 1, (0, 1): 2}
    return mapping[payoff]


def compare_ai_and_human(labels1: Sequence[PreferenceResult], labels2: Sequence[PreferenceResult]) -> Mapping[str, float]:
    data1 = [transform_payoff(l.payoff.value) for l in labels1]
    print(data1)
    data2 = [transform_payoff(l.payoff.value) for l in labels2]
    print(data2)
    
    ck = cohen_kappa_score(data1, data2)
    sr = calculate_spearmans_correlation(data1, data2)
    return {
        "ck": ck,
        "sr": sr
    }


In [None]:
result = compare_ai_and_human(human_labeled[:TEST_SAMPLE_SIZE], labels)
result