# Synthetic Preference Generation


In [None]:
import numpy as np

In [None]:
n = 15
scores = np.random.rand(n)

In [None]:
m = 5*n

comparisons = []

for _ in range(m):
  a = np.random.randint(n)
  b = np.random.randint(n)
  while b == a:
    b = np.random.randint(n)

  comparisons.append((a, b, scores[a] > scores[b]))

In [None]:
fruits = {
    1: "Apple",
    2: "Banana",
    3: "Cherry",
    4: "Date",
    5: "Elderberry",
    6: "Fig",
    7: "Grape",
    8: "Honeydew",
    9: "Indian Fig",
    10: "Jackfruit",
    11: "Kiwi",
    12: "Lemon",
    13: "Mango",
    14: "Nectarine",
    15: "Orange",
    16: "Papaya",
    17: "Quince",
    18: "Raspberry",
    19: "Strawberry",
    20: "Tangerine",
    21: "Ugli Fruit",
    22: "Vanilla Bean",
    23: "Watermelon",
    24: "Xigua",
    25: "Yellow Passion Fruit",
    26: "Zucchini (Yes, it's a fruit!)",
    27: "Avocado",
    28: "Blackberry",
    29: "Blueberry",
    30: "Cantaloupe",
    31: "Coconut",
    32: "Cranberry",
    33: "Dragon Fruit",
    34: "Durian",
    35: "Gooseberry",
    36: "Guava",
    37: "Jujube",
    38: "Kumquat",
    39: "Lychee",
    40: "Mulberry",
    41: "Olive",
    42: "Passion Fruit",
    43: "Peach",
    44: "Pear",
    45: "Pineapple",
    46: "Plum",
    47: "Pomegranate",
    48: "Starfruit",
    49: "Sapodilla",
    50: "Tamarillo",
}


In [None]:
comparisons


Rank: all entities
Pairwise comparisons: A > B, B > C, ....

Perform rank aggregation and provide the overall ranking

In [None]:
named_comparisons = ""

for a, b, a_preferred in comparisons:
  named_comparisons += f"{fruits[a+1]} {'>' if a_preferred else '<'} {fruits[b+1]}; "

In [None]:
named_comparisons

In [None]:
", ".join([fruits[i+1] for i in range(n)])

In [None]:
[fruits[i+1] for i in np.argsort(-scores)]

### Caloric Density Based Ranking

Without calorie information: Strawberry, Watermelon, Cantaloupe, Orange, Apple, Peach, Pineapple, Avocado, Mango, Banana, Coconut Meat, Dates, Grapes, Dried Figs, Dried Apricots

With prompt A: Strawberry Cantaloupe Watermelon Orange Apple Peach Pineapple Mango Banana Avocado Coconut Meat Dates Grapes Dried Figs Dried Apricots



In [None]:
caloric_density_ranking = {
    1: "Watermelon",       # ~30 kcal
    2: "Strawberry",       # ~32 kcal
    3: "Cantaloupe",       # ~34 kcal
    4: "Peach",            # ~39 kcal
    5: "Orange",           # ~47 kcal
    6: "Apple",            # ~52 kcal
    7: "Pineapple",        # ~50 kcal
    8: "Mango",            # ~60 kcal
    9: "Grapes",           # ~69 kcal
    10: "Banana",          # ~89 kcal
    11: "Avocado",         # ~160 kcal
    12: "Coconut Meat",    # ~354 kcal
    13: "Dates",           # ~282 kcal
    14: "Dried Figs",      # ~249 kcal
    15: "Dried Apricots",  # ~241 kcalconda
}

In [None]:
named_comparisons = ""
for _ in range(m):
  a, b = np.random.choice(a=15, size=2, replace=False)
  named_comparisons += f"{caloric_density_ranking[a+1]} {'>' if a < b else '<'} {caloric_density_ranking[b+1]}; "

In [None]:
named_comparisons

# Preference Data and Standard Aggregation Algorithms

In [5]:
from huggingface_hub import login
login("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") # confidential

In [6]:
# !pip install -q datasets trueskill # -q : quiet mode
from datasets import load_dataset
import pandas as pd
import numpy as np
import trueskill
from collections import defaultdict
from scipy.stats import kendalltau
import random

random.seed(42)

In [29]:
dataset = load_dataset("konan-kun/icc-test-championship-rankings-2023-2025-cycle")

In [30]:
df = dataset['train'].to_pandas()

In [43]:
df

Unnamed: 0,Rank,Team,M,W,L,T,D,N/R,PT,PCT,Series Form,Next,Matches
0,1,South Africa,12,8,3,0,1,0,100,69.44,W W W W W,-,"[W January 03, 2025, 2nd Test, South Africa vs..."
1,2,Australia,19,13,4,0,2,0,154,67.54,D W W W W,-,"[W February 06, 2025, 2nd Test, Australia vs S..."
2,3,India,19,9,8,0,2,0,114,50.0,W L D L L,-,"[L January 03, 2025, 5th Test, India vs Austra..."
3,4,New Zealand,14,7,7,0,0,0,81,48.21,W W L L W,-,"[W December 14, 2024, 3rd Test, New Zealand vs..."
4,5,England,22,11,10,0,1,0,114,43.18,L L W W L,-,"[L December 14, 2024, 3rd Test, England vs New..."
5,6,Sri Lanka,13,5,8,0,0,0,60,38.46,W L L L L,-,"[L February 06, 2025, 2nd Test, Sri Lanka vs A..."
6,7,Bangladesh,12,4,8,0,0,0,45,31.25,L L L L W,-,"[W November 30, 2024, 2nd Test, Bangladesh vs ..."
7,8,West Indies,13,3,8,0,2,0,44,28.21,L W L L W,-,"[W January 25, 2025, 2nd Test, West Indies vs ..."
8,9,Pakistan,14,5,9,0,0,0,47,27.98,W L L W L,-,"[L January 25, 2025, 2nd Test, Pakistan vs Wes..."


In [33]:
preference_data = []
matchday_dict = {}
unique_matches = []
team_dict = {0: 'South Africa', 1: 'Australia', 2: 'India', 3: 'New Zealand', 4: 'England', 5: 'Sri Lanka', 6: 'Bangladesh', 7: 'West Indies', 8: 'Pakistan'}
for idx, team_matches in enumerate(df['Matches']):
    for mat in team_matches:
        matchday = ' '.join(mat.split(',')[0].split(' ')[1:]) + mat.split(',')[1]
        if matchday in matchday_dict.keys():
            continue
        unique_matches.append(mat)
        match_status = mat.split(' ')[0]
        opponent = None
        if match_status == 'D':
            opponent = mat.split('vs')[-1].split('Match drawn')[0].strip()
        elif mat.replace(team_dict[idx], '_').count('_') == 2:
            opponent = mat.replace(team_dict[idx], '_').split('vs')[-1].split('_')[0].strip()
        else:
            if len(mat.split('vs')[-1].split('won')[0].strip().split(' ')) == 2:
                opponent = mat.split('vs')[-1].split('won')[0].strip().split(' ')[0].strip()
            else:
                opponent = ' '.join(mat.split('vs')[-1].split('won')[0].strip().split(' ')[:2])
        matchday_dict[matchday] = (team_dict[idx], opponent, match_status)

In [36]:
preference_data

[('South Africa', 'Pakistan', 'W'),
 ('South Africa', 'Pakistan', 'W'),
 ('South Africa', 'Sri Lanka', 'W'),
 ('South Africa', 'Sri Lanka', 'W'),
 ('South Africa', 'Bangladesh', 'W'),
 ('South Africa', 'Bangladesh', 'W'),
 ('South Africa', 'West Indies', 'W'),
 ('South Africa', 'West Indies', 'D'),
 ('South Africa', 'New Zealand', 'L'),
 ('South Africa', 'New Zealand', 'L'),
 ('South Africa', 'India', 'L'),
 ('South Africa', 'India', 'W'),
 ('Australia', 'Sri Lanka', 'W'),
 ('Australia', 'Sri Lanka', 'W'),
 ('Australia', 'India', 'D'),
 ('Australia', 'India', 'W'),
 ('Australia', 'India', 'L'),
 ('Australia', 'New Zealand', 'W'),
 ('Australia', 'New Zealand', 'W'),
 ('Australia', 'West Indies', 'L'),
 ('Australia', 'West Indies', 'W'),
 ('Australia', 'Pakistan', 'W'),
 ('Australia', 'England', 'L'),
 ('Australia', 'England', 'D'),
 ('Australia', 'England', 'L'),
 ('Australia', 'England', 'W'),
 ('Australia', 'England', 'W'),
 ('India', 'New Zealand', 'L'),
 ('India', 'New Zealand', 'L'

In [35]:
for key, value in matchday_dict.items():
    preference_data.append(value)

In [17]:
'''
Trueskill Rank Aggregation
'''

ratings = defaultdict(trueskill.Rating)
print(ratings['India']) # unranked as of now
for p1, p2, outcome in preference_data:
    if outcome == 'W':
        ratings[p1], ratings[p2] = trueskill.rate_1vs1(ratings[p1], ratings[p2])
    elif outcome == 'L':
        ratings[p2], ratings[p1] = trueskill.rate_1vs1(ratings[p2], ratings[p1])
    else:
        ratings[p1], ratings[p2] = trueskill.rate_1vs1(ratings[p1], ratings[p2], drawn=True)

trueskill.Rating(mu=25.000, sigma=8.333)


In [18]:
trueskill_rankings = []
gold_rankings = df['Team'].tolist()
team_identifier = {}

for idx, team in enumerate(gold_rankings):
    team_identifier[team] = idx

for team, rating in sorted(ratings.items(), key=lambda x: x[1].mu, reverse=True):
    trueskill_rankings.append(team)

In [23]:
'''
Kendall’s tau is a measure of the correspondence between two rankings.
statistic tells us about the correlation between the two rankings
pvalue tells us whether the result is statistically significant or not (requires more reading)
'''
gold_list = [team_identifier[team] for team in gold_rankings]
trueskill_list = [team_identifier[team] for team in trueskill_rankings]
kendalltau(gold_list, trueskill_list).statistic

np.float64(0.611111111111111)

In [24]:
from scipy.stats import spearmanr
spearmanr(gold_list, trueskill_list)

SignificanceResult(statistic=np.float64(0.8166666666666667), pvalue=np.float64(0.007224785246358785))

In [12]:
import base64
import os
from google import genai
from google.genai import types


client = genai.Client(
        api_key="AIzaSyAZCjoHhlZM7hxZbsCFksnNpZzYLOpedPo",
    )

model = "gemini-2.0-flash"

system_inst = """
You are given pairwise match results between teams, where each match can result in a win, loss, or a draw. Generate the final team rankings based on overall wins, losses, draws and number of matches played by a team.
Respond with the ranked list of only team names in descending order of skill.
"""

def generate(prompt=None):
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=prompt),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        temperature=0,
        response_mime_type="text/plain",
        system_instruction=[
            types.Part.from_text(text=system_inst),
        ],
    )

    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        print(chunk.text, end="")

In [42]:
preference_data

[('South Africa', 'Pakistan', 'W'),
 ('South Africa', 'Pakistan', 'W'),
 ('South Africa', 'Sri Lanka', 'W'),
 ('South Africa', 'Sri Lanka', 'W'),
 ('South Africa', 'Bangladesh', 'W'),
 ('South Africa', 'Bangladesh', 'W'),
 ('South Africa', 'West Indies', 'W'),
 ('South Africa', 'West Indies', 'D'),
 ('South Africa', 'New Zealand', 'L'),
 ('South Africa', 'New Zealand', 'L'),
 ('South Africa', 'India', 'L'),
 ('South Africa', 'India', 'W'),
 ('Australia', 'Sri Lanka', 'W'),
 ('Australia', 'Sri Lanka', 'W'),
 ('Australia', 'India', 'D'),
 ('Australia', 'India', 'W'),
 ('Australia', 'India', 'L'),
 ('Australia', 'New Zealand', 'W'),
 ('Australia', 'New Zealand', 'W'),
 ('Australia', 'West Indies', 'L'),
 ('Australia', 'West Indies', 'W'),
 ('Australia', 'Pakistan', 'W'),
 ('Australia', 'England', 'L'),
 ('Australia', 'England', 'D'),
 ('Australia', 'England', 'L'),
 ('Australia', 'England', 'W'),
 ('Australia', 'England', 'W'),
 ('India', 'New Zealand', 'L'),
 ('India', 'New Zealand', 'L'

In [37]:
len(preference_data), len(unique_matches)

(59, 59)

In [39]:
llm_preference_data = [mat.split(',')[-1].split('by')[0].strip() for mat in unique_matches]
user_text = '\n'.join(llm_preference_data)

In [41]:
print(user_text)

South Africa vs Pakistan South Africa won
South Africa vs Pakistan South Africa won
South Africa vs Sri Lanka South Africa won
South Africa vs Sri Lanka South Africa won
South Africa vs Bangladesh South Africa won
South Africa vs Bangladesh South Africa won
South Africa vs West Indies South Africa won
South Africa vs West Indies Match drawn
South Africa vs New Zealand New Zealand won
South Africa vs New Zealand New Zealand won
South Africa vs India India won
South Africa vs India South Africa won
Australia vs Sri Lanka Australia won
Australia vs Sri Lanka Australia won
Australia vs India Match drawn
Australia vs India Australia won
Australia vs India India won
Australia vs New Zealand Australia won
Australia vs New Zealand Australia won
Australia vs West Indies West Indies won
Australia vs West Indies Australia won
Australia vs Pakistan Australia won
Australia vs England England won
Australia vs England Match drawn
Australia vs England England won
Australia vs England Australia won
Aus

In [14]:
generate(prompt=user_text)

Here's the ranked list of teams based on the provided match results:

1.  South Africa
2.  Australia
3.  India
4.  England
5.  Sri Lanka
6.  New Zealand
7.  Pakistan
8.  Bangladesh
9.  West Indies

In [None]:
gemini_rankings = ["South Africa", "Australia", "India", "England", "Sri Lanka", "New Zealand", "Pakistan", "Bangladesh", "West Indies"]
gemini_list = [team_identifier[team] for team in gemini_rankings]
kendalltau(gold_list, gemini_list)

In [None]:
# calibration check

random.shuffle(llm_preference_data)
user_text = '\n'.join(llm_preference_data)
generate(prompt=user_text)

In [16]:
shuffled_gemini_rankings = ["Australia", "South Africa", "England", "New Zealand", "India", "Pakistan", "Sri Lanka", "Bangladesh", "West Indies"]
gemini_list = [team_identifier[team] for team in shuffled_gemini_rankings]
kendalltau(gold_list, gemini_list)

NameError: name 'team_identifier' is not defined