In [83]:
'''
This is the bootstrap significance test of models and the subjective listening test
'''
import numpy as np
import pandas as pd
import os
import random

data_list = [
    "Music InpaintNet", 
    "SketchVAE + InpaintRNN", 
    "SketchVAE+ + SketchInpainter", 
    "SketchNet"
]
data_path = [
    "result/res-validate-irish-inpaintNet.npy",
    "result/res-validate-irish-inpaintNet-sketchvae.npy",
    "result/res-validate-irish-sketchnet-stage-1.npy",
    "result/res-validate-irish-sketchnet.npy"
]
# subjective listening test file
slt_path = "response.tsv"

In [58]:
def ae_bootstrap(set_a, set_b, sample_time):
    ave_a = np.mean(set_a)
    ave_b = np.mean(set_b)
    eps = np.abs(ave_a - ave_b)
    new_set = set_a + set_b
    s = len(set_a)
    p = 0
    for i in range(sample_time):
        random.shuffle(new_set)
        na = new_set[:s]
        nb = new_set[s:]
        n_eps = np.abs(np.mean(na) - np.mean(nb))
        if n_eps > eps:
            p += 1
    return p / sample_time

In [60]:
# subjective listening test
# row 1-3 sketchnet | 4-6 inpaintnet | 7-9 original
slt_data = pd.read_csv(slt_path, sep='\t')
slt_data = slt_data.values[:-1]
complexity = [[],[],[]]
structure = [[],[],[]]
musicality = [[],[],[]]

complexity[0] = [d[0] for d in slt_data]
complexity[1] = [d[3] for d in slt_data]
complexity[2] = [d[6] for d in slt_data]

structure[0] = [d[1] for d in slt_data]
structure[1] = [d[4] for d in slt_data]
structure[2] = [d[7] for d in slt_data]

musicality[0] = [d[2] for d in slt_data]
musicality[1] = [d[5] for d in slt_data]
musicality[2] = [d[8] for d in slt_data]

print(np.mean(complexity, axis = -1))
print(np.mean(structure, axis = -1))
print(np.mean(musicality, axis = -1))

for d in [[0,1],[0,2],[1,2]]:
    i,j = d
    print("p-value complexity (%d, %d): %lf" % (i,j, ae_bootstrap(complexity[i], complexity[j], 10000)))
    print("p-value structure (%d, %d): %lf" % (i,j, ae_bootstrap(structure[i], structure[j], 10000)))
    print("p-value musicality (%d, %d): %lf" % (i,j, ae_bootstrap(musicality[i], musicality[j], 10000)))

[3.04716981 2.97798742 3.21698113]
[3.28616352 3.00943396 3.46540881]
[3.26415094 3.08805031 3.55660377]
p-value complexity (0, 1): 0.363600
p-value structure (0, 1): 0.000200
p-value musicality (0, 1): 0.028900
p-value complexity (0, 2): 0.029600
p-value structure (0, 2): 0.018600
p-value musicality (0, 2): 0.000100
p-value complexity (1, 2): 0.003500
p-value structure (1, 2): 0.000000
p-value musicality (1, 2): 0.000000


In [90]:
# pairwise model acc
# def acc(a,b):
#     a = a.reshape(-1)
#     b = b.reshape(-1)
#     acc = float(np.sum(a == b)) / len(a)
#     return acc
    
data = [np.load(d,allow_pickle = True) for d in data_path]
for i in range(len(data)):
    accs = [d["acc"] for d in data[i]]
    print("%s overall acc: %lf" % (data_list[i], np.mean(accs)))
    for j in range(i + 1, len(data)):
        accs2 = [d["acc"] for d in data[j]]
        print("p-value acc (%d, %d): %lf" % (i,j, ae_bootstrap(accs,accs2, 10000)))
        
    

Music InpaintNet overall acc: 0.838169
p-value acc (0, 1): 0.402200
p-value acc (0, 2): 0.000000
p-value acc (0, 3): 0.000000
SketchVAE + InpaintRNN overall acc: 0.841010
p-value acc (1, 2): 0.000000
p-value acc (1, 3): 0.000000
SketchVAE+ + SketchInpainter overall acc: 0.860255
p-value acc (2, 3): 0.000000
SketchNet overall acc: 0.889724
