In [2]:
import numpy as np
import pandas as pd

def rank_numbers(numbers):
    sorted_indices = sorted(range(len(numbers)), key=lambda k: numbers[k], reverse=True)
    ranks = [0] * len(numbers)

    i = 0
    while i < len(sorted_indices):
        value_indices = [i]
        while i + 1 < len(sorted_indices) and numbers[sorted_indices[i]] == numbers[sorted_indices[i + 1]]:
            i += 1
            value_indices.append(i)
        average_rank = np.mean([index + 1 for index in value_indices])
        for index in value_indices:
            ranks[sorted_indices[index]] = average_rank
        i += 1

    return ranks

def calculate_spearman_manual(values1, values2):
    n = len(values1)
    m = len(values2)
    rank1 = rank_numbers(values1)
    rank2 = rank_numbers(values2)
    d = np.array(rank1) - np.array(rank2)
    d_squared = np.square(d)
    spearman_corr = 1 - (6 * np.sum(d_squared)) / (n * (n**2 - 1))
    return spearman_corr

def rank_basescore(base_score, draw_ratio):
    # 计算最大值和最小值的差值
    score_range = max(base_score) - min(base_score)
    # 计算平序的阈值
    draw_gap = draw_ratio * score_range

    # 对列表进行排序并保留原始索引
    indexed_scores = list(enumerate(base_score))
    indexed_scores.sort(key=lambda x: x[1])

    # 处理平序
    ranks = [0] * len(base_score)
    current_rank = 1
    for i in range(len(indexed_scores)):
        if i > 0 and abs(indexed_scores[i][1] - indexed_scores[i - 1][1]) < draw_gap:
            ranks[indexed_scores[i][0]] = current_rank
        else:
            current_rank = i + 1
            ranks[indexed_scores[i][0]] = current_rank

    return ranks

In [3]:
dimension = 'color'

In [4]:
import json
jsonpath = '../Human_anno/{}.json'.format(dimension)
with open(jsonpath,'r') as f:
    oc = json.load(f)

# history ="../GPT4o_eval_results/{}_gpt4eval_results.json".format(dimension)
# with open(history,'r') as f:
#     gpt4o_eval_history = json.load(f)

In [5]:
models = ['cogvideox5b','gen3', 'kling','videocrafter2', 'pika', 'show1', 'lavie']
# models = ['videocrafter2', 'pika', 'show1', 'lavie']
# models = ['cogvideox5b','gen3', 'kling']
idexls = []
for i in range(0,len(oc)):
    idexls.append(i)
# for i in range(1,len(oc),3):
#     idexls.append(i)
length = len(idexls)

In [1]:
flag = 0
iternum = 0
while(flag == 0):
    iternum += 1
    gptvsannos_spearman = np.zeros(4,length)
    gptvsannomean_spearman = np.zeros(length)
    multigptvsannos_spearman = np.zeros(4,length)
    multigptvsannomean_spearman = np.zeros(length)
    baselinevsannos_spearman = np.zeros(4,length)
    baselinevsannomean_spearman = np.zeros(length)

    gptscore = np.zeros([len(models),length])
    annoscore = np.zeros([4,len(models),length])
    multigptscore = np.zeros([len(models),length])
    annomeanscore =np.zeros([len(models),length])
    baseline_rank = np.zeros([len(models),length])
    badeval = []

    for j in range(length):
        i = idexls[j]
        gpt4o_eval_rs = np.array(list(oc[i]['gpt4o_eval'].values()))
        human_anno = np.array(list(oc[i]['human_anno'].values()))
        baseline_score = np.array(list(oc[i]['baseline_score'].values()))
        multiagent_eval_results = np.array(list(oc[i]['multiagent_score'].values()))

        for human in range(4):
            annoscore[human,:,j] = human_anno[:,human]

        baseline_rank[j] = rank_basescore(baseline_score, 0.165)
        gptscore[j]= gpt4o_eval_rs
        multigptscore[j] = multiagent_eval_results
        annomeanscore[j] = np.mean(annoscore[:,:,j],axis=0)

        for human in range(4):
            gptvsannos_spearman[human,j] = calculate_spearman_manual(gpt4o_eval_rs,annoscore[human,:,j])
            multigptvsannos_spearman[human,j] = calculate_spearman_manual(multiagent_eval_results,annoscore[human,:,j])
            baselinevsannos_spearman[human,j] = calculate_spearman_manual(baseline_rank[j],annoscore[human,:,j])

        gptvsannomean_spearman[j] = calculate_spearman_manual(gpt4o_eval_rs,annomeanscore[j])
        multigptvsannomean_spearman[j] = calculate_spearman_manual(multiagent_eval_results,annomeanscore[j])
        baselinevsannomean_spearman[j] = calculate_spearman_manual(baseline_rank,annomeanscore[j])

    gptscore = gptscore.mean(axis=0)
    multigptscore = multigptscore.mean(axis=0)
    annomeanscore = annomeanscore.mean(axis=0)

    print("{} iter max gap in gpt".format(iternum))
    for i in range(4):
        print(np.max(np.abs(gptscore - annoscore[i].mean(axis=0))),np.argmax(np.abs(gptscore - annoscore[i].mean(axis=0))))
        print(np.max(np.abs(multigptscore - annoscore[i].mean(axis=0))),np.argmax(np.abs(multigptscore - annoscore[i].mean(axis=0))))

    # if np.max(np.abs(gptscore - anno1score) ) < 0.1 or np.max(np.abs(gptscore - anno2score)) < 0.1 or np.max(np.abs(gptscore - anno3score)) < 0.1:
    flag = 1

    if iternum%10==0:
        print("GPT average score: ",gptscore)
        for i in range(4):
            print("Anno{} average score: ".format(i+1),annoscore[i].mean(axis=0))

print("GPT average score: ",gptscore)
for i in range(4):
    print("Anno{} average score: ".format(i+1),annoscore[i].mean(axis=0))
print("AnnoMean average score: ",annomeanscore)

for i in range(4):
    print("GPT vs Anno{} Spearman: ".format(i+1),gptvsannos_spearman[i].mean())
    print("MultiGPT vs Anno{} Spearman: ".format(i+1),multigptvsannos_spearman[i].mean())
    print("Baseline vs Anno{} Spearman: ".format(i+1),baselinevsannos_spearman[i].mean())
print("Average GPT vs Anno Spearman: ",(gptvsannos_spearman[0].mean()+gptvsannos_spearman[1].mean()+gptvsannos_spearman[2].mean())/3)
print("GPT vs AnnoMean Spearman: ",gptvsannomean_spearman.mean())

print("Average Baseline vs Anno Spearman: ",(baselinevsanno_spearman.mean()+baselinevsanno2_spearman.mean()+baselinevsanno3_spearman.mean())/3)
print("Baseline vs AnnoMean Spearman: ",baselinevsannomean_spearman.mean())




SyntaxError: invalid syntax (391442222.py, line 28)

In [14]:
num_human_anno = 4
spearmans =np.zeros([num_human_anno,num_human_anno,length])
annos = [anno1s,anno2s,anno3s,anno4s]
for j in range(length):
    i = idexls[j]
    human_anno = np.array(list(oc[i]['human_anno'].values()))
    for k in range(num_human_anno):
        for l in range(num_human_anno):
            spearmans[k,l,j] = calculate_spearman_manual(annos[k][j],annos[l][j])
average_spearmans = spearmans.mean(axis=2)

print(average_spearmans)
print((average_spearmans.sum()-num_human_anno)/(num_human_anno*(num_human_anno-1)))

[[1.         0.7595515  0.63247508 0.63116002]
 [0.7595515  1.         0.65521179 0.63420543]
 [0.63247508 0.65521179 1.         0.63046788]
 [0.63116002 0.63420543 0.63046788 1.        ]]
0.6571786175710596
