In [2]:
import requests
import pandas as pd
import numpy as np
import plotly.express as px 
from tqdm.notebook import tqdm

## Get with access to ballot

In [22]:
api_url = None

In [23]:
def get_speakers(tournament_slug):
    """Maps speaker IDs to names for a given tournament."""
    speakers = requests.get(f'{api_url}/tournaments/{tournament_slug}/speakers').json()

    speaker_map = {}

    for speaker in speakers:
        speaker_id = speaker['id']

        speaker_map[str(speaker_id)] = speaker['name']

    return speaker_map


In [41]:
def get_adjudicators(tournament_slug):
    """Maps adjudicator IDs to names for a given tournament."""
    adjudicators_map = {}

    response = requests.get(f'{api_url}/tournaments/{tournament_slug}/adjudicators')

    for adj in response.json():
        try: 
            adj_id = adj['id']
            adj_name = adj['name']
            adjudicators_map[str(adj_id)] = adj_name
        except Exception as e:
            print(f"Error processing adjudicator data: {e}")
            continue

    return adjudicators_map

In [25]:
def get_pairings(tournament_slug):
    """Fetches all pairings for a given tournament in one list."""
    rounds = requests.get(f'{api_url}/tournaments/{tournament_slug}/rounds').json()

    all_pairings = []

    for round in rounds: 
        round_seq = round['seq']
        pairings = requests.get(f'{api_url}/tournaments/{tournament_slug}/rounds/{round_seq}/pairings').json()
        all_pairings.extend(pairings)

    return all_pairings

In [26]:
def get_scores_from_ballots(tournament_slug):
    """Fetches scores from all ballots of all pairings for a given tournament, given these are public."""
    df = []

    pairings = get_pairings(tournament_slug)
    print(f'Found {len(pairings)} pairings.')
    adjudicators_map = get_adjudicators(tournament_slug)
    print(f'Found {len(adjudicators_map)} adjudicators.')
    speaker_map = get_speakers(tournament_slug)
    print(f'Found {len(speaker_map)} speakers.')

    for i in tqdm(range(len(pairings))): 
        pairing = pairings[i]
        try: 
            pairing_id = pairing['id']
            round_seq = pairing['url'].split('/')[-3]
            ballots = requests.get(f'{api_url}/tournaments/{tournament_slug}/rounds/{round_seq}/pairings/{pairing_id}/ballots').json()

            for ballot in ballots:
                adj_url = ballot['participant_submitter'] or pairing['adjudicators']['chair'] #TODO: prioritizes the submitter, is this valid? 
                adj_id = adj_url.split('/')[-1]
                adj_name = adjudicators_map[str(adj_id)]

                sheets = ballot['result']['sheets']
                for sheet in sheets: 
                    for team in sheet['teams']:
                        if 'speeches' not in team:
                            continue

                        for speech in team['speeches']:
                            score = speech['score']
                            speaker_id = speech['speaker'].split('/')[-1]

                            row = {
                                'round': round_seq,
                                'score': score,
                                'speaker': speaker_map[speaker_id],
                                'adj': adj_name
                            }

                            df.append(row)
        except Exception as e:
            print(f'Error processing pairing {pairing}: {e}')
            continue

    return pd.DataFrame(df)


In [8]:
df_from_ballots = pd.DataFrame()

In [37]:
existing_df = pd.read_csv('scores.csv')
print(f'Found {len(existing_df)} existing rows.')

search_space = ('wudc2020', 'wudc2021', 'wudc2022', 'wudc2023', 'wudc2024', 'wudc2025', 'stockholmopen2025')

Found 13848 existing rows.


In [42]:
for tournament_name in search_space: 
    if tournament_name in existing_df['tournament'].unique() or tournament_name in df_from_ballots['tournament'].unique():
        print(f'{tournament_name} already exists in the dataset. Skipping.')
        continue

    api_url = f'https://{tournament_name}.calicotab.com/api/v1'

    tournaments = requests.get(f'{api_url}/tournaments').json()

    if (len(tournaments) == 0):
        print(f'No tournaments found for {tournament_name}')
        continue
    if (len(tournaments) > 1):
        print(f'More than one tournament found for {tournament_name}. Data will be collected from only the first: {tournaments[0]['name']}.')

    print(f'Collecting data from {tournament_name}.')

    for tournament in tournaments:
        tournament_slug = tournament['slug']

        tournament_df = get_scores_from_ballots(tournament_slug)
        tournament_df['tournament'] = tournament_name

        df_from_ballots = pd.concat([df_from_ballots, tournament_df], ignore_index=True)
        break

wudc2020 already exists in the dataset. Skipping.
Collecting data from wudc2021.
Found 865 pairings.
Error processing adjudicator data: string indices must be integers, not 'str'
Found 0 adjudicators.


TypeError: string indices must be integers, not 'str'

In [43]:
df_from_ballots

Unnamed: 0,tournament,round,score,speaker,adj
0,wudc2024,1,77.0,Justin Wang,Cici Liu Sining
1,wudc2024,1,76.0,Aniket Godbole,Cici Liu Sining
2,wudc2024,1,75.0,,Cici Liu Sining
3,wudc2024,1,76.0,Martinez Sandoval Aleisha,Cici Liu Sining
4,wudc2024,1,75.0,Lit Wen Wong,Cici Liu Sining
...,...,...,...,...,...
15515,wudc2020,9,74.0,Ariel Nathaniel,Charlene Enriquez
15516,wudc2020,9,77.0,Blake Shapiro,Charlene Enriquez
15517,wudc2020,9,77.0,Tashin Patrick,Charlene Enriquez
15518,wudc2020,9,77.0,Jacob Boehm,Charlene Enriquez


In [44]:
if existing_df.empty:
    existing_df = df_from_ballots

else:
    uniq_tournaments = df_from_ballots['tournament'].unique()

    for tournament_name in uniq_tournaments:
        if tournament_name in existing_df['tournament'].unique():
            print(f'Data for {tournament_name} already exists. Skipping.')
        else:
            print(f'Adding data for {tournament_name}.')
            existing_df = pd.concat([existing_df, df_from_ballots[df_from_ballots['tournament'] == tournament_name]], ignore_index=True)

existing_df.to_csv(f'scores.csv', index=False)

Data for wudc2024 already exists. Skipping.
Data for wudc2025 already exists. Skipping.
Data for stockholmopen2025 already exists. Skipping.
Adding data for wudc2020.


## Get without access to ballots

In [349]:
temp = 'sido2025'
api_url = f'https://{temp}.calicotab.com/api/v1'

tournaments = requests.get(f'{api_url}/tournaments').json()
tournament_slug = temp 

In [350]:
def get_team_and_round_to_adj(tournament_slug):
    per_round_team_to_adj = {}

    pairings = get_pairings(tournament_slug)
    adjudicators_map = get_adjudicators(tournament_slug)

    for pairing in pairings: 
        round_seq = pairing['url'].split('/')[-3]
        teams = pairing['teams']

        chair_id = pairing['adjudicators']['chair'].split('/')[-1]  
        adjudicator = adjudicators_map[chair_id]

        for team in teams: 
            team_id = team['team'].split('/')[-1]

            per_round_team_to_adj[(str(round_seq), str(team_id))] = adjudicator

    return per_round_team_to_adj


In [351]:
def get_speaker_to_team(tournament_slug):
    speaker_to_team = {}

    speakers = requests.get(f'{api_url}/tournaments/{tournament_slug}/speakers').json()

    for speaker in speakers:
        speaker_id = speaker['id']

        team = speaker['team']
        team_id = team.split('/')[-1]   

        speaker_to_team[str(speaker_id)] = team_id

    return speaker_to_team


In [411]:
def get_from_standings(tournament_slug):
    df = pd.DataFrame()

    standings_url = f'{api_url}/tournaments/{tournament_slug}/speakers/standings/rounds'
    standings = requests.get(standings_url).json()

    speaker_to_team = get_speaker_to_team(tournament_slug)
    per_round_team_to_adj = get_team_and_round_to_adj(tournament_slug)
    speaker_map = get_speakers(tournament_slug)

    for standing in standings:
        speaker = standing['speaker']
        speaker_id = speaker.split('/')[-1]
        speaker_name = speaker_map[str(speaker_id)]

        team_id = speaker_to_team[speaker_id]

        for round in standing['rounds']:
            round_seq = round['round'].split('/')[-1]
            adj = per_round_team_to_adj[(str(round_seq), str(team_id))]

            for speech in round['speeches']:
                row = {
                    'tournament': tournament_slug,
                    'round': round_seq,
                    'score': speech['score'],
                    'speaker': speaker_name,
                    'adj': adj
                }

                df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    
    return df

In [353]:
df_from_standings = get_from_standings(tournament_slug)

In [355]:
# sanity check
df_from_standings[df_from_standings['adj'] == 'Alikhan'].sort_values('round')

Unnamed: 0,tournament,round,score,speaker,adj
403,sido2025,2,75.0,Lily Chengxi Feng,Alikhan
409,sido2025,2,76.0,Fiona Ruxin Xia,Alikhan
779,sido2025,2,72.0,Olive Yao,Alikhan
781,sido2025,2,71.0,Lucy Xu,Alikhan
1058,sido2025,2,75.0,Justin Yang,Alikhan
1064,sido2025,2,73.0,Huatao Xue,Alikhan
1357,sido2025,2,74.0,Jason Chen,Alikhan
1363,sido2025,2,71.0,Vera Xu,Alikhan
325,sido2025,3,74.0,Orlando Yao,Alikhan
326,sido2025,3,72.0,Orlando Yao,Alikhan


In [None]:
df = pd.concat([df_from_ballots, df_from_standings], ignore_index=True)

In [None]:
df

Unnamed: 0,tournament,round,score,speaker,adj
0,stockholm2025,1,73.0,Finn Mescher,Andrew Mabon
1,stockholm2025,1,72.0,Sergej Biserov,Andrew Mabon
2,stockholm2025,1,74.0,Akvilė Mačiulevičiūtė,Andrew Mabon
3,stockholm2025,1,73.0,Mykolas Žalinkevicius,Andrew Mabon
4,stockholm2025,1,77.0,Alisa Popova,Andrew Mabon
...,...,...,...,...,...
1979,sido2025,3,74.0,Elsa Lu,Ben Bensali
1980,sido2025,5,75.0,Elsa Lu,Jaed Bengzon
1981,sido2025,1,77.0,Elsa Lu,Adel Mostaque Ahmed
1982,sido2025,4,77.0,Elsa Lu,Xandy Sophia Armamento


# Meta
- target the paper towards audience not familiar with debates (easier peer reviews)

## Showing the problem
Intuitively: A speaker can get either a 79 or 74 with the same speech, depending on the judge.

What is an average speaker's expected standard deviation? Could be calculated by taking an average across all individual speakers st. devs.
(Is this actually reliable/useful?)

speaker score = latent skill + randomness + judge bias

Show that this judge bias is statistically significant. 

Things to show: 
1. The distribution of judges' means is spread out.
2. The distribution of an individual judge's mean is not as spread out.


-> Judges have inherent internal inconsistency as compared to each other that other factors cannot account for.

## Solving the problem




In [None]:
# speaker scores by judge
# pandas dataframe model
# score, speaker_id, judge_id, round_seq, pairing_id,  

# Questions to answer
# 1. Do judges at better tournaments give more consistent means? 

# monte carlo on wsdc 2023 for tetsing judge subsets, gives rough means distributions
# should we also standardize by  