In [2]:
from scipy.stats import spearmanr, pearsonr
from scipy.stats import kendalltau

import json


In [3]:
import numpy as np
def softmax(a, b, temperature=100):
    a /= temperature
    b /= temperature
    return np.exp(a) / (np.exp(a) + np.exp(b))

def argmax(a, b): #, temperature=2):
    return int(a > b)

def singmax(a, b): #, temperature=2):
    return b

In [4]:
import json, glob
models = glob.glob("*/")


datasets = ["", "**Model Name**", "SPAQ","KoNIQ-10k","LIVE-FB","LIVE-itw","CGIQA-6K", "AGIQA-3K", "KADID-10K", ""]
print("| ".join(datasets))
print("| ".join([""] + ["-" for i in datasets[1:-1] + [""]]))
for json_prefix in sorted(models):
    jsons = [
        json_prefix + "spaq.json",
        json_prefix + "koniq.json",
        json_prefix + "flive.json",
        json_prefix + "livec.json",
        json_prefix + "cgi.json",
        json_prefix + "agi.json",
        json_prefix + "kadid.json",
    ]
    stri = json_prefix[:-1]
    for json_ in jsons:
                
                with open(json_) as f:
                    s = f.read().replace("}{", "},{")
                    if s[0] != "[":
                        s = "[" + s + "]"
                    d = json.loads(s)
                s = (spearmanr([float(di["gt_score"]) for di in d], [softmax(di["logit_good"], di["logit_poor"]) for di in d])[0])
                p = (pearsonr([float(di["gt_score"]) for di in d], [softmax(di["logit_good"], di["logit_poor"]) for di in d])[0])
                stri += " | {:.3f}/{:.3f}".format(s, p)
    print("|"+stri+"|")

| **Model Name**| SPAQ| KoNIQ-10k| LIVE-FB| LIVE-itw| CGIQA-6K| AGIQA-3K| KADID-10K| 
| -| -| -| -| -| -| -| -| -
|clip_vit_l14 | 0.385/0.389 | 0.468/0.505 | 0.218/0.237 | 0.307/0.308 | 0.339/0.324 | 0.436/0.458 | 0.376/0.388|
|idefics | 0.474/0.484 | 0.375/0.400 | 0.235/0.240 | 0.409/0.428 | 0.179/0.217 | 0.562/0.622 | 0.370/0.373|
|instructblip_t5 | 0.581/0.618 | 0.288/0.289 | 0.221/0.231 | 0.017/0.020 | 0.072/0.126 | 0.264/0.281 | 0.264/0.221|
|instructblip_vicuna | 0.683/0.689 | 0.359/0.437 | 0.200/0.283 | 0.253/0.367 | 0.200/0.258 | 0.629/0.663 | 0.337/0.382|
|kosmos_2 | 0.644/0.641 | 0.255/0.281 | 0.196/0.195 | 0.358/0.368 | 0.222/0.237 | 0.489/0.491 | 0.359/0.365|
|llama_adapter_v2 | 0.464/0.506 | 0.354/0.363 | 0.275/0.329 | 0.298/0.360 | 0.215/0.227 | 0.604/0.666 | 0.412/0.425|
|llava_v1 | 0.442/0.462 | 0.462/0.457 | 0.264/0.280 | 0.404/0.417 | 0.036/0.082 | 0.626/0.684 | 0.349/0.372|
|minigpt4_13b | 0.238/0.253 | 0.239/0.257 | 0.170/0.183 | 0.339/0.340 | 0.229/0.211 | 0.572/0.

In [10]:
## ABLATION Study for InstructBLIP-T5XL, "good/poor" vs "high/low"

import json
models = ["instructblip_t5"]
datasets = ["koniq", "spaq", "flive", "livec", "agi", "kadid"]
for model in models:
    for data in datasets:
        with open(f"{model}/{data}.json") as f:
            s = f.read().replace("}{", "},{")
            if s[0] != "[":
                s = "[" + s + "]"
            d = json.loads(s)
        print("#### On dataset:", data, "####")
        print("Use good/poor:")
        print(spearmanr([float(di["gt_score"]) for di in d], [softmax(di["logit_good"], di["logit_poor"]) for di in d])[0])
        print(pearsonr([float(di["gt_score"]) for di in d], [softmax(di["logit_good"], di["logit_poor"]) for di in d])[0])
        print("Use high/low:")
        
        print(spearmanr([float(di["gt_score"]) for di in d], [softmax(di["logit_high"], di["logit_low"]) for di in d])[0])
        print(pearsonr([float(di["gt_score"]) for di in d], [softmax(di["logit_high"], di["logit_low"]) for di in d])[0])

#### On dataset: koniq ####
Use good/poor:
0.287872594346266
0.28916321967110636
Use high/low:
0.3343189447129938
0.36228762312872836
#### On dataset: spaq ####
Use good/poor:
0.5809460263575776
0.6183042126179601
Use high/low:
0.5821330335775249
0.598677194354896
#### On dataset: flive ####
Use good/poor:
0.2209330461038824
0.23059966962060757
Use high/low:
0.24803585398750716
0.2674082687959074
#### On dataset: livec ####
Use good/poor:
0.017060770005605568
0.020464221103786404
Use high/low:
0.11327492446378733
0.11316193991280091
#### On dataset: agi ####
Use good/poor:
0.26382944187621005
0.281093899467867
Use high/low:
0.37774309995693933
0.39952749110800656
#### On dataset: kadid ####
Use good/poor:
0.26425786523005373
0.22081100785332958
Use high/low:
0.1810856710249136
0.16649232815409978
