# **Import Google Drive**


In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Setting**


In [20]:
from sklearn.isotonic import isotonic_regression
import cvxpy as cp
import numpy as np
import csv, os
from scipy import stats
from sklearn import metrics
import itertools
import pandas as pd
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import statistics
from scipy.stats import levene
import seaborn as sns
import warnings
from sklearn.metrics import r2_score
warnings.filterwarnings("ignore", category=RuntimeWarning)


# **Function: Partition all submissions according to "Greedy" and "Multi-owner" methods**


In [21]:
def validate(partition, graph, n):
	# check that
	# 1. each paper is in exactly one partition
	# 2. the number of papers in all partition is equal to the number of papers
	# 3. each partition has at least two authors owns all papers in the partition

	papers = set()
	full_author_parts = []
	for part in partition[:-1]:
		assert( len(part.intersection(papers)) == 0 )
		papers |= part
		# find all authors that can rank all papers in this part
		cnt = 0
		author_part = set()
		for author, val in graph.items():
			if len(val.intersection(part)) == len(part):
				cnt += 1
				author_part.add(author)

		# assert( cnt >= 2 )
		full_author_parts.append(author_part)

	# add the last partition
	full_author_parts.append(set())
	papers |= partition[-1]
	assert(len(papers) == n)

	return full_author_parts


def greedy(graph, m, n, randomize=False, level=1):
	partition = []
	author_parts = []
	allocated_papers = set()

	if level == 1:
		parts = [ graph[i].copy() for i in range(m) ]
		index2pair = [ set([i]) for i in range(m) ]
	elif level == 2:
		parts = [ graph[i].intersection(graph[j])  for i in range(m) for j in range(i+1, m) ]
		index2pair = [ set([i,j]) for i in range(m) for j in range(i+1, m) ]

	max_idx = 0
	max_val = 0
	active_indices = set( [i for i in range(len(parts)) if len(parts[i]) > 1] )
	# for i in range(0, len(parts)):
	for i in active_indices:
		if len(parts[i]) > max_val:
			max_idx = i
			max_val = len(parts[i])

	while len(allocated_papers) < n and len(parts[max_idx]) > 1:
		max_part = parts[max_idx].copy()
		partition.append( max_part )
		author_parts.append( index2pair[max_idx] )
		allocated_papers |= max_part

		max_idx = 0
		max_val = 0
		# for i in range(0, len(parts)):
		to_remove = set()
		for i in active_indices:
			parts[i].difference_update(max_part)
			if len(parts[i]) < 2:
				to_remove.add(i)
				continue
			if len(parts[i]) > max_val:
				max_idx = i
				max_val = len(parts[i])
		max_part = parts[max_idx].copy()
		active_indices.difference_update(to_remove)

	# add all remaining papers to the last partition
	partition.append(set())
	author_parts.append(set())
	for i in range(n):
		if i not in allocated_papers:
			partition[-1].add(i)

	return partition, author_parts


def arbitrary(graph, m, n, randomize=False):
	partition = []
	author_parts = []
	allocated_papers = set()

	parts = [ graph[i].copy() for i in range(m) ]
	index2pair = [ set([i]) for i in range(m) ]

	idx = 0
	while len(parts[idx]) < 2 and idx < len(parts)-1: idx += 1

	while len(allocated_papers) < n and idx != -1:
		part = parts[idx].copy()

		partition.append( part )
		author_parts.append( index2pair[idx] )
		allocated_papers |= part

		idx = -1
		val = None
		for i in range(0, len(parts)):
			parts[i].difference_update(part)
			if len(parts[i]) >= 2:
				idx = i
				val = len(parts[i])

	partition.append(set())
	author_parts.append(set())
	for i in range(n):
		if i not in allocated_papers:
			partition[-1].add(i)

	return partition, author_parts


#**Generate simulation dataset**

In [None]:
# Load your dataset (replace 'your_dataset.csv' with your actual file path)
data = pd.read_csv(r'proxy_score.csv')

# Step 1: Create a dictionary to map each submission_id to its rating_0312 list
submission_dict = {}
for submission_id in data['submission_id'].unique():
    first_author_data = data[(data['submission_id'] == submission_id) & (data['author_id'] == data[data['submission_id'] == submission_id]['author_id'].iloc[0])]
    submission_dict[submission_id] = first_author_data['rating_0312'].tolist()

# Step 2 and Step 3: Generate 'score' and 'proxy' for each submission_id
score_proxy_results = {}
for submission_id, ratings in submission_dict.items():
    score_index = np.random.choice(len(ratings))
    score = ratings[score_index]
    remaining_ratings = [ratings[i] for i in range(len(ratings)) if i != score_index]
    proxy = np.mean(remaining_ratings) if remaining_ratings else score
    score_proxy_results[submission_id] = {'score': score, 'proxy': proxy}

# Add the 'score' and 'proxy' columns to the original dataset
data['score'] = data['submission_id'].map(lambda x: score_proxy_results[x]['score'])
data['proxy'] = data['submission_id'].map(lambda x: score_proxy_results[x]['proxy'])

# Save the updated dataset
data.to_csv('proxy_score_6.csv', index=False)

print("Dataset updated with new 'score' and 'proxy' columns saved.")


Dataset updated with new 'score' and 'proxy' columns saved.


# **Simple-averaging Isotonic Scores, Proxy**


## Compute L_2 values for all submissions and Plot.


In [None]:
# Initialize list to store squared differences for each run
adhoc_diff_all = []
old_diff_all = []

for trail_idx in range(1,4):
    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(f'proxy_score_{trail_idx}.csv')
    df = df.drop_duplicates(['submission_idx', 'author_idx'])

    author_submission_rank_old = {}
    authors = df['author_idx'].unique()
    for author in authors:
        author_submission_rank_old[author] = []
        submissions = list(set(df[df['author_idx'] == author]['submission_idx'].tolist()))

        for i in range(len(submissions)):
            rank = df[(df['submission_idx'] == submissions[i]) & (df['author_idx'] == author)]['rank'].tolist()[0]
            ratings = df[(df['submission_idx'] == submissions[i]) & (df['author_idx'] == author)]['score'].tolist()[0]
            author_submission_rank_old[author].append((submissions[i], rank, ratings))

    def sort_submissions(author_submission_rank_old):
        for author in author_submission_rank_old:
            author_submission_rank_old[author].sort(key=lambda x: (x[1], -x[2]), reverse=False)
        return author_submission_rank_old

    author_submission_rank_old = sort_submissions(author_submission_rank_old)

    author_submission_rank_new = {}
    for author in author_submission_rank_old:
        ir_rank = []
        for i in range(len(author_submission_rank_old[author])):
            r1 = author_submission_rank_old[author][i][2]
            ir_rank.append(r1)
        ir_rank = np.array(ir_rank)
        ir_rank_pred =  isotonic_regression(ir_rank, sample_weight = None, y_min=0.0, y_max=10.0, increasing=False)

        author_submission_rank_new[author] = []
        for i in range(len(author_submission_rank_old[author])):
            author_submission_rank_new[author].append((author_submission_rank_old[author][i][0], author_submission_rank_old[author][i][1], ir_rank_pred[i]))

    final_submission_list = df['submission_idx'].unique()

    submission_new_rating = {}
    for submission in final_submission_list:
        submission_new_rating[submission] = []

    for author in author_submission_rank_new:
        for i in range(len(author_submission_rank_new[author])):
          if author_submission_rank_new[author][i][0] in final_submission_list:
            submission_new_rating[author_submission_rank_new[author][i][0]].append(author_submission_rank_new[author][i][2])

    for submission in final_submission_list:
        submission_new_rating[submission] = [float(rating) for rating in submission_new_rating[submission]]
        avg_rating = sum(submission_new_rating[submission])/len(submission_new_rating[submission])
        submission_new_rating[submission] = avg_rating


    # Isotonic score
    adhoc_iso_rating = []
    for submission in final_submission_list:
      adhoc_iso_rating.append(submission_new_rating[submission])

    # Proxy
    submission_true_rating = {}
    for submission in final_submission_list:
        submission_true_rating[submission] = df[df['submission_idx'] == submission]['proxy'].tolist()
        submission_true_rating[submission] = submission_true_rating[submission][0]

    True_score_multi_iso = []
    for submission in final_submission_list:
        True_score_multi_iso.append(submission_true_rating[submission])

    # Score
    old_rating = {}
    for submission in final_submission_list:
        old_rating[submission] = df[df['submission_idx'] == submission]['score'].tolist()
        old_rating[submission] = old_rating[submission][0]

    old_score_multi_iso = []
    for submission in final_submission_list:
        old_score_multi_iso.append(old_rating[submission])

    # Compute and collect squared errors
    adhoc_diff = [(adhoc_iso_rating[i] - True_score_multi_iso[i])**2 for i in range(len(True_score_multi_iso))]
    old_diff = [(old_score_multi_iso[i] - True_score_multi_iso[i])**2 for i in range(len(True_score_multi_iso))]

    adhoc_diff_all.append(adhoc_diff)
    old_diff_all.append(old_diff)

# ==========================
# After all files are processed, compute the average over all i
# ==========================

# Convert to numpy arrays for easier computation
adhoc_diff_all = np.array(adhoc_diff_all)
old_diff_all = np.array(old_diff_all)

# Average across the 3 files (axis=0 is file_idx)
adhoc_diff_average = np.mean(adhoc_diff_all, axis=0)
old_diff_average = np.mean(old_diff_all, axis=0)

# Final outputs
print('adhoc_diff_average:', np.mean(adhoc_diff_average))
print('old_diff_average:', np.mean(old_diff_average))


#Perform the paired sample t-test
t_statistic, p_value = stats.ttest_rel(old_diff_average, adhoc_diff_average, alternative='greater')
print("Before Rebuttal with proxy t-test for greedy:", t_statistic)
print("Before Rebuttal with proxy p-value for greedy:", p_value)

adhoc_diff_average: 1.9947407746772812
old_diff_average: 2.612946201141853
Before Rebuttal with proxy t-test for greedy: 20.369651013927328
Before Rebuttal with proxy p-value for greedy: 7.85588140412992e-86


## Compute L_1 values for all submissions and Plot.


In [None]:
# Initialize list to store squared differences for each run
adhoc_diff_all = []
old_diff_all = []

for trail_idx in range(1,4):
    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(f'proxy_score_{trail_idx}.csv')
    df = df.drop_duplicates(['submission_idx', 'author_idx'])

    author_submission_rank_old = {}
    authors = df['author_idx'].unique()
    for author in authors:
        author_submission_rank_old[author] = []
        submissions = list(set(df[df['author_idx'] == author]['submission_idx'].tolist()))

        for i in range(len(submissions)):
            rank = df[(df['submission_idx'] == submissions[i]) & (df['author_idx'] == author)]['rank'].tolist()[0]
            ratings = df[(df['submission_idx'] == submissions[i]) & (df['author_idx'] == author)]['score'].tolist()[0]
            author_submission_rank_old[author].append((submissions[i], rank, ratings))

    def sort_submissions(author_submission_rank_old):
        for author in author_submission_rank_old:
            author_submission_rank_old[author].sort(key=lambda x: (x[1], -x[2]), reverse=False)
        return author_submission_rank_old

    author_submission_rank_old = sort_submissions(author_submission_rank_old)

    author_submission_rank_new = {}
    for author in author_submission_rank_old:
        ir_rank = []
        for i in range(len(author_submission_rank_old[author])):
            r1 = author_submission_rank_old[author][i][2]
            ir_rank.append(r1)
        ir_rank = np.array(ir_rank)
        ir_rank_pred =  isotonic_regression(ir_rank, sample_weight = None, y_min=0.0, y_max=10.0, increasing=False)

        author_submission_rank_new[author] = []
        for i in range(len(author_submission_rank_old[author])):
            author_submission_rank_new[author].append((author_submission_rank_old[author][i][0], author_submission_rank_old[author][i][1], ir_rank_pred[i]))

    final_submission_list = df['submission_idx'].unique()

    submission_new_rating = {}
    for submission in final_submission_list:
        submission_new_rating[submission] = []

    for author in author_submission_rank_new:
        for i in range(len(author_submission_rank_new[author])):
          if author_submission_rank_new[author][i][0] in final_submission_list:
            submission_new_rating[author_submission_rank_new[author][i][0]].append(author_submission_rank_new[author][i][2])

    for submission in final_submission_list:
        submission_new_rating[submission] = [float(rating) for rating in submission_new_rating[submission]]
        avg_rating = sum(submission_new_rating[submission])/len(submission_new_rating[submission])
        submission_new_rating[submission] = avg_rating


    # Isotonic score
    adhoc_iso_rating = []
    for submission in final_submission_list:
      adhoc_iso_rating.append(submission_new_rating[submission])

    # Proxy
    submission_true_rating = {}
    for submission in final_submission_list:
        submission_true_rating[submission] = df[df['submission_idx'] == submission]['proxy'].tolist()
        submission_true_rating[submission] = submission_true_rating[submission][0]

    True_score_multi_iso = []
    for submission in final_submission_list:
        True_score_multi_iso.append(submission_true_rating[submission])

    # Score
    old_rating = {}
    for submission in final_submission_list:
        old_rating[submission] = df[df['submission_idx'] == submission]['score'].tolist()
        old_rating[submission] = old_rating[submission][0]

    old_score_multi_iso = []
    for submission in final_submission_list:
        old_score_multi_iso.append(old_rating[submission])

    # Compute and collect squared errors
    adhoc_diff = [abs(adhoc_iso_rating[i] - True_score_multi_iso[i]) for i in range(len(True_score_multi_iso))]
    old_diff = [abs(old_score_multi_iso[i] - True_score_multi_iso[i]) for i in range(len(True_score_multi_iso))]

    adhoc_diff_all.append(adhoc_diff)
    old_diff_all.append(old_diff)

# ==========================
# After all files are processed, compute the average over all i
# ==========================

# Convert to numpy arrays for easier computation
adhoc_diff_all = np.array(adhoc_diff_all)
old_diff_all = np.array(old_diff_all)

# Average across the 3 files (axis=0 is file_idx)
adhoc_diff_average = np.mean(adhoc_diff_all, axis=0)
old_diff_average = np.mean(old_diff_all, axis=0)

# Final outputs
print('adhoc_diff_average:', np.mean(adhoc_diff_average))
print('old_diff_average:', np.mean(old_diff_average))


#Perform the paired sample t-test
t_statistic, p_value = stats.ttest_rel(old_diff_average, adhoc_diff_average, alternative='greater')
print("Before Rebuttal with proxy t-test for greedy:", t_statistic)
print("Before Rebuttal with proxy p-value for greedy:", p_value)

adhoc_diff_average: 1.1082301329910025
old_diff_average: 1.273618796662275
Before Rebuttal with proxy t-test for greedy: 19.13915094772145
Before Rebuttal with proxy p-value for greedy: 1.1562964147679104e-76


# **Greedy/Multi-owner Isotonic Scores, Proxy**

## Compute L2 values for all submissions.

In [None]:
# Initialize list to store squared differences for each run
greedy_diff_all = []
multi_diff_all = []
old_diff_all = []

for trail_idx in range(1,4):

    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(f'proxy_score_{trail_idx}.csv')
    df = df.drop_duplicates(['submission_idx', 'author_idx'])


    # Extract the unique authors from the DataFrame
    authors = df['author_idx'].unique()
    submissions = df['submission_idx'].unique()

    author_submission = {}
    for author in authors:
        submissionss = list( set(df[df['author_idx'] == author]['submission_idx'].tolist()) )
        author_submission[author] = submissionss

    m_2 = len(author_submission)
    n_2 = len(submissions)

    authors = []
    for author in author_submission:
        submission_ranking = {}
        for i in range(len(author_submission[author])):
            submission_ranking[author_submission[author][i]] = df[(df['submission_idx'] == author_submission[author][i]) & (df['author_idx'] == author)]['rank'].tolist()
            submission_ranking[author_submission[author][i]] = submission_ranking[author_submission[author][i]][0]
        authors.append(submission_ranking)

    graph = {}
    for i, author in enumerate(authors):
        graph[i] = set( int(k) for k in author.keys())

    # Partition all the graph according to Multi-owner algorithm
    partition, author_parts = arbitrary(graph, m_2, n_2)
    author_parts = validate(partition, graph, n_2)

    calibrated_scores = np.zeros(n_2)
    for part, author_part in zip(partition, author_parts):
      if len(author_part) == 0:
        for i in part: calibrated_scores[i] = df[df['submission_idx'] == i]['score'].tolist()[0]
        continue
      paper_part = list(part)

      # Organize each block by {author: [submission, rank, score]}.
      author_submission_rank_old = {}
      for author in author_part:
        author_submission_rank_old[author] = []
        for i in range(len(paper_part)):
            rank = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['rank'].tolist()[0]
            ratings = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['score'].tolist()[0]
            author_submission_rank_old[author].append((paper_part[i], rank, ratings))



      # Sort submissions by rank; in case of ties, sort by score.
      def sort_submissions(author_submission_rank_old):
        for author in author_submission_rank_old:
          author_submission_rank_old[author].sort(key=lambda x: (x[1], -x[2]), reverse=False)
        return author_submission_rank_old
      author_submission_rank_old = sort_submissions(author_submission_rank_old)



      # Compute isotonic scores for each author in the block.
      author_submission_rank_multi_iso = {}
      for author in author_submission_rank_old:
          ir_rank = []
          for i in range(len(author_submission_rank_old[author])):
              r1 = author_submission_rank_old[author][i][2]
              ir_rank.append(r1)
          ir_rank = np.array(ir_rank)
          ir_rank_pred =  isotonic_regression(ir_rank, sample_weight = None, y_min=0.0, y_max=10.0, increasing=False)

          author_submission_rank_multi_iso[author] = []
          for i in range(len(author_submission_rank_old[author])):
              author_submission_rank_multi_iso[author].append((author_submission_rank_old[author][i][0], author_submission_rank_old[author][i][1], ir_rank_pred[i]))



      # For multi-author submissions in a block, average the scores to get the isotonic score.
      submission_multi_iso_rating = {}
      for submission in paper_part:
          submission_multi_iso_rating[submission] = []

      for author in author_submission_rank_multi_iso:
          for i in range(len(author_submission_rank_multi_iso[author])):
              submission_multi_iso_rating[author_submission_rank_multi_iso[author][i][0]].append(author_submission_rank_multi_iso[author][i][2])

      for submission in submission_multi_iso_rating:
          submission_multi_iso_rating[submission] = [float(rating) for rating in submission_multi_iso_rating[submission]]
          avg_rating = sum(submission_multi_iso_rating[submission])/len(submission_multi_iso_rating[submission])
          submission_multi_iso_rating[submission] = avg_rating

      for i in paper_part:
        calibrated_scores[i] = submission_multi_iso_rating[i]

    # Multi-owner Isotonic Score
    multi_iso_rating = []
    for i in range(n_2):
      multi_iso_rating.append(calibrated_scores[i])



    # Partition all the graph according to greedy algorithm
    partition, author_parts = greedy(graph, m_2, n_2)
    author_parts = validate(partition, graph, n_2)

    calibrated_scores = np.zeros(n_2)
    for part, author_part in zip(partition, author_parts):
      if len(author_part) == 0:
        for i in part: calibrated_scores[i] = df[df['submission_idx'] == i]['score'].tolist()[0]
        continue
      paper_part = list(part)

      # Organize each block by {author: [submission, rank, score]}.
      author_submission_rank_old = {}
      for author in author_part:
        author_submission_rank_old[author] = []
        for i in range(len(paper_part)):
            rank = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['rank'].tolist()[0]
            ratings = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['score'].tolist()[0]
            author_submission_rank_old[author].append((paper_part[i], rank, ratings))

      # Sort submissions by rank; in case of ties, sort by score.
      def sort_submissions(author_submission_rank_old):
        for author in author_submission_rank_old:
          author_submission_rank_old[author].sort(key=lambda x: (x[1], -x[2]), reverse=False)
        return author_submission_rank_old
      author_submission_rank_old = sort_submissions(author_submission_rank_old)



      # Compute isotonic scores for each author in the block.
      author_submission_rank_greedy_iso = {}
      for author in author_submission_rank_old:
          ir_rank = []
          for i in range(len(author_submission_rank_old[author])):
              r1 = author_submission_rank_old[author][i][2]
              ir_rank.append(r1)
          ir_rank = np.array(ir_rank)
          ir_rank_pred =  isotonic_regression(ir_rank, sample_weight = None, y_min=0.0, y_max=10.0, increasing=False)

          author_submission_rank_greedy_iso[author] = []
          for i in range(len(author_submission_rank_old[author])):
              author_submission_rank_greedy_iso[author].append((author_submission_rank_old[author][i][0], author_submission_rank_old[author][i][1], ir_rank_pred[i]))



      # For multi-author submissions in a block, average the scores to get the isotonic score.
      submission_greedy_iso_rating = {}
      for submission in paper_part:
          submission_greedy_iso_rating[submission] = []

      for author in author_submission_rank_greedy_iso:
          for i in range(len(author_submission_rank_greedy_iso[author])):
              submission_greedy_iso_rating[author_submission_rank_greedy_iso[author][i][0]].append(author_submission_rank_greedy_iso[author][i][2])

      for submission in submission_greedy_iso_rating:
          submission_greedy_iso_rating[submission] = [float(rating) for rating in submission_greedy_iso_rating[submission]]
          avg_rating = sum(submission_greedy_iso_rating[submission])/len(submission_greedy_iso_rating[submission])
          submission_greedy_iso_rating[submission] = avg_rating

      for i in paper_part:
        calibrated_scores[i] = submission_greedy_iso_rating[i]



    # Greedy Isotonic Score
    greedy_iso_rating = []
    for i in range(n_2):
      greedy_iso_rating.append(calibrated_scores[i])

    # proxy
    submission_true_rating = {}
    for submission in range(n_2):
        submission_true_rating[submission] = df[df['submission_idx'] == submission]['proxy'].tolist()
        submission_true_rating[submission] = submission_true_rating[submission][0]

    True_score_multi_iso = []
    for i in range(n_2):
        True_score_multi_iso.append(submission_true_rating[i])


    # score
    old_rating = {}
    for submission in range(n_2):
        old_rating[submission] = df[df['submission_idx'] == submission]['score'].tolist()
        old_rating[submission] = old_rating[submission][0]

    old_score_multi_iso = []
    for i in range(n_2):
        old_score_multi_iso.append(old_rating[i])


    # Create two lists to record: (y - R)^2 and (\hat{R} - R)^2 values.
    greedy_diff = []
    multi_diff = []
    old_diff = []
    for i in range(len(True_score_multi_iso)):
      greedy_diff.append( (greedy_iso_rating[i] - True_score_multi_iso[i])**2 )
      multi_diff.append( (multi_iso_rating[i] - True_score_multi_iso[i])**2 )
      old_diff.append( (old_score_multi_iso[i] - True_score_multi_iso[i])**2 )


    greedy_diff_all.append(greedy_diff)
    multi_diff_all.append(multi_diff)
    old_diff_all.append(old_diff)


# ==========================
# After all files are processed, compute the average over all i
# ==========================

# Convert to numpy arrays for easier computation
greedy_diff_all = np.array(greedy_diff_all)
multi_diff_all = np.array(multi_diff_all)
old_diff_all = np.array(old_diff_all)

# Average across the 3 files (axis=0 is file_idx)
greedy_diff_average = np.mean(greedy_diff_all, axis=0)
multi_diff_average = np.mean(multi_diff_all, axis=0)
old_diff_average = np.mean(old_diff_all, axis=0)

# Final outputs
print('Greedy_diff_average:', np.mean(greedy_diff_average))
print('Multi_diff_average:', np.mean(multi_diff_average))
print('old_diff_average:', np.mean(old_diff_average))

#Perform the paired sample t-test
t_statistic, p_value = stats.ttest_rel(old_diff_average, greedy_diff_average, alternative='greater')
print("Before Rebuttal with proxy t-test for greedy:", t_statistic)
print("Before Rebuttal with proxy p-value for greedy:", p_value)

#Perform the paired sample t-test
t_statistic, p_value = stats.ttest_rel(old_diff_average, multi_diff_average, alternative='greater')
print("Before Rebuttal with proxy t-test for Multi-owner:", t_statistic)
print("Before Rebuttal with proxy p-value for Multi-owner:", p_value)

Greedy_diff_average: 2.0680999531198805
Multi_diff_average: 2.1073726437520905
old_diff_average: 2.612946201141853
Before Rebuttal with proxy t-test for greedy: 17.611348208377855
Before Rebuttal with proxy p-value for greedy: 7.06470527213899e-66
Before Rebuttal with proxy t-test for Multi-owner: 18.19408530907814
Before Rebuttal with proxy p-value for Multi-owner: 6.53945434043408e-70


## Compute L1 values for all submissions.

In [None]:
# Initialize list to store squared differences for each run
greedy_diff_all = []
multi_diff_all = []
old_diff_all = []

for trail_idx in range(1,4):

    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(f'proxy_score_{trail_idx}.csv')
    df = df.drop_duplicates(['submission_idx', 'author_idx'])


    # Extract the unique authors from the DataFrame
    authors = df['author_idx'].unique()
    submissions = df['submission_idx'].unique()

    author_submission = {}
    for author in authors:
        submissionss = list( set(df[df['author_idx'] == author]['submission_idx'].tolist()) )
        author_submission[author] = submissionss

    m_2 = len(author_submission)
    n_2 = len(submissions)

    authors = []
    for author in author_submission:
        submission_ranking = {}
        for i in range(len(author_submission[author])):
            submission_ranking[author_submission[author][i]] = df[(df['submission_idx'] == author_submission[author][i]) & (df['author_idx'] == author)]['rank'].tolist()
            submission_ranking[author_submission[author][i]] = submission_ranking[author_submission[author][i]][0]
        authors.append(submission_ranking)

    graph = {}
    for i, author in enumerate(authors):
        graph[i] = set( int(k) for k in author.keys())

    # Partition all the graph according to Multi-owner algorithm
    partition, author_parts = arbitrary(graph, m_2, n_2)
    author_parts = validate(partition, graph, n_2)

    calibrated_scores = np.zeros(n_2)
    for part, author_part in zip(partition, author_parts):
      if len(author_part) == 0:
        for i in part: calibrated_scores[i] = df[df['submission_idx'] == i]['score'].tolist()[0]
        continue
      paper_part = list(part)

      # Organize each block by {author: [submission, rank, score]}.
      author_submission_rank_old = {}
      for author in author_part:
        author_submission_rank_old[author] = []
        for i in range(len(paper_part)):
            rank = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['rank'].tolist()[0]
            ratings = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['score'].tolist()[0]
            author_submission_rank_old[author].append((paper_part[i], rank, ratings))



      # Sort submissions by rank; in case of ties, sort by score.
      def sort_submissions(author_submission_rank_old):
        for author in author_submission_rank_old:
          author_submission_rank_old[author].sort(key=lambda x: (x[1], -x[2]), reverse=False)
        return author_submission_rank_old
      author_submission_rank_old = sort_submissions(author_submission_rank_old)



      # Compute isotonic scores for each author in the block.
      author_submission_rank_multi_iso = {}
      for author in author_submission_rank_old:
          ir_rank = []
          for i in range(len(author_submission_rank_old[author])):
              r1 = author_submission_rank_old[author][i][2]
              ir_rank.append(r1)
          ir_rank = np.array(ir_rank)
          ir_rank_pred =  isotonic_regression(ir_rank, sample_weight = None, y_min=0.0, y_max=10.0, increasing=False)

          author_submission_rank_multi_iso[author] = []
          for i in range(len(author_submission_rank_old[author])):
              author_submission_rank_multi_iso[author].append((author_submission_rank_old[author][i][0], author_submission_rank_old[author][i][1], ir_rank_pred[i]))



      # For multi-author submissions in a block, average the scores to get the isotonic score.
      submission_multi_iso_rating = {}
      for submission in paper_part:
          submission_multi_iso_rating[submission] = []

      for author in author_submission_rank_multi_iso:
          for i in range(len(author_submission_rank_multi_iso[author])):
              submission_multi_iso_rating[author_submission_rank_multi_iso[author][i][0]].append(author_submission_rank_multi_iso[author][i][2])

      for submission in submission_multi_iso_rating:
          submission_multi_iso_rating[submission] = [float(rating) for rating in submission_multi_iso_rating[submission]]
          avg_rating = sum(submission_multi_iso_rating[submission])/len(submission_multi_iso_rating[submission])
          submission_multi_iso_rating[submission] = avg_rating

      for i in paper_part:
        calibrated_scores[i] = submission_multi_iso_rating[i]

    # Multi-owner Isotonic Score
    multi_iso_rating = []
    for i in range(n_2):
      multi_iso_rating.append(calibrated_scores[i])



    # Partition all the graph according to greedy algorithm
    partition, author_parts = greedy(graph, m_2, n_2)
    author_parts = validate(partition, graph, n_2)

    calibrated_scores = np.zeros(n_2)
    for part, author_part in zip(partition, author_parts):
      if len(author_part) == 0:
        for i in part: calibrated_scores[i] = df[df['submission_idx'] == i]['score'].tolist()[0]
        continue
      paper_part = list(part)

      # Organize each block by {author: [submission, rank, score]}.
      author_submission_rank_old = {}
      for author in author_part:
        author_submission_rank_old[author] = []
        for i in range(len(paper_part)):
            rank = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['rank'].tolist()[0]
            ratings = df[(df['submission_idx'] == paper_part[i]) & (df['author_idx'] == author)]['score'].tolist()[0]
            author_submission_rank_old[author].append((paper_part[i], rank, ratings))

      # Sort submissions by rank; in case of ties, sort by score.
      def sort_submissions(author_submission_rank_old):
        for author in author_submission_rank_old:
          author_submission_rank_old[author].sort(key=lambda x: (x[1], -x[2]), reverse=False)
        return author_submission_rank_old
      author_submission_rank_old = sort_submissions(author_submission_rank_old)



      # Compute isotonic scores for each author in the block.
      author_submission_rank_greedy_iso = {}
      for author in author_submission_rank_old:
          ir_rank = []
          for i in range(len(author_submission_rank_old[author])):
              r1 = author_submission_rank_old[author][i][2]
              ir_rank.append(r1)
          ir_rank = np.array(ir_rank)
          ir_rank_pred =  isotonic_regression(ir_rank, sample_weight = None, y_min=0.0, y_max=10.0, increasing=False)

          author_submission_rank_greedy_iso[author] = []
          for i in range(len(author_submission_rank_old[author])):
              author_submission_rank_greedy_iso[author].append((author_submission_rank_old[author][i][0], author_submission_rank_old[author][i][1], ir_rank_pred[i]))



      # For multi-author submissions in a block, average the scores to get the isotonic score.
      submission_greedy_iso_rating = {}
      for submission in paper_part:
          submission_greedy_iso_rating[submission] = []

      for author in author_submission_rank_greedy_iso:
          for i in range(len(author_submission_rank_greedy_iso[author])):
              submission_greedy_iso_rating[author_submission_rank_greedy_iso[author][i][0]].append(author_submission_rank_greedy_iso[author][i][2])

      for submission in submission_greedy_iso_rating:
          submission_greedy_iso_rating[submission] = [float(rating) for rating in submission_greedy_iso_rating[submission]]
          avg_rating = sum(submission_greedy_iso_rating[submission])/len(submission_greedy_iso_rating[submission])
          submission_greedy_iso_rating[submission] = avg_rating

      for i in paper_part:
        calibrated_scores[i] = submission_greedy_iso_rating[i]



    # Greedy Isotonic Score
    greedy_iso_rating = []
    for i in range(n_2):
      greedy_iso_rating.append(calibrated_scores[i])

    # proxy
    submission_true_rating = {}
    for submission in range(n_2):
        submission_true_rating[submission] = df[df['submission_idx'] == submission]['proxy'].tolist()
        submission_true_rating[submission] = submission_true_rating[submission][0]

    True_score_multi_iso = []
    for i in range(n_2):
        True_score_multi_iso.append(submission_true_rating[i])


    # score
    old_rating = {}
    for submission in range(n_2):
        old_rating[submission] = df[df['submission_idx'] == submission]['score'].tolist()
        old_rating[submission] = old_rating[submission][0]

    old_score_multi_iso = []
    for i in range(n_2):
        old_score_multi_iso.append(old_rating[i])


    # Create two lists to record: (y - R)^2 and (\hat{R} - R)^2 values.
    greedy_diff = []
    multi_diff = []
    old_diff = []
    for i in range(len(True_score_multi_iso)):
      greedy_diff.append( abs(greedy_iso_rating[i] - True_score_multi_iso[i]) )
      multi_diff.append( abs(multi_iso_rating[i] - True_score_multi_iso[i]) )
      old_diff.append( abs(old_score_multi_iso[i] - True_score_multi_iso[i]) )


    greedy_diff_all.append(greedy_diff)
    multi_diff_all.append(multi_diff)
    old_diff_all.append(old_diff)


# ==========================
# After all files are processed, compute the average over all i
# ==========================

# Convert to numpy arrays for easier computation
greedy_diff_all = np.array(greedy_diff_all)
multi_diff_all = np.array(multi_diff_all)
old_diff_all = np.array(old_diff_all)

# Average across the 3 files (axis=0 is file_idx)
greedy_diff_average = np.mean(greedy_diff_all, axis=0)
multi_diff_average = np.mean(multi_diff_all, axis=0)
old_diff_average = np.mean(old_diff_all, axis=0)

# Final outputs
print('Greedy_diff_average:', np.mean(greedy_diff_average))
print('Multi_diff_average:', np.mean(multi_diff_average))
print('old_diff_average:', np.mean(old_diff_average))

#Perform the paired sample t-test
t_statistic, p_value = stats.ttest_rel(old_diff_average, greedy_diff_average, alternative='greater')
print("Before Rebuttal with proxy t-test for greedy:", t_statistic)
print("Before Rebuttal with proxy p-value for greedy:", p_value)

#Perform the paired sample t-test
t_statistic, p_value = stats.ttest_rel(old_diff_average, multi_diff_average, alternative='greater')
print("Before Rebuttal with proxy t-test for Multi-owner:", t_statistic)
print("Before Rebuttal with proxy p-value for Multi-owner:", p_value)

Greedy_diff_average: 1.1240108281412629
Multi_diff_average: 1.136489608327553
old_diff_average: 1.273618796662275
Before Rebuttal with proxy t-test for greedy: 16.649052097738977
Before Rebuttal with proxy p-value for greedy: 1.9278948001921145e-59
Before Rebuttal with proxy t-test for Multi-owner: 16.892870107284295
Before Rebuttal with proxy p-value for Multi-owner: 4.80100352077003e-61
