In [1]:
import json
import os
from statsmodels.stats.contingency_tables import mcnemar
from collections import defaultdict
from tqdm import tqdm
results_dir = os.path.abspath("../results/pairtest/classifier_results/")



In [2]:
noise_types = ["AA","RV","VA"]
accuracy_tables = {ntype:defaultdict(list) for ntype in noise_types}

for filename in tqdm(os.listdir(results_dir)):
    fullpath = os.path.join(results_dir, filename)
    results = []
    if not os.path.isdir(fullpath):
        parameters = filename.split("_") 
        exp_id = parameters[0]
        condition = parameters[1]
        exp_name = "_".join(parameters[0:-2])
        noise_type = parameters[-2]
        with open(fullpath, "r") as res_file:
            for line in res_file.readlines():
                obj = json.loads(line)
                results.append(obj)
        for result in results:
            is_correct = result["gold_label"] == result["predicted_label"]
            accuracy_tables[noise_type][exp_name].append(is_correct)

  

100%|██████████| 75/75 [02:15<00:00,  1.95s/it]


In [10]:
print(list(accuracy_tables['AA']))

['exp-1_no-WS_350-1000_lab', 'exp-1.3_16-512', 'exp-2.1_2uni-2-layer', 'exp-1.3_256-512', 'exp-1_all-WS_350-1000_lab', 'exp-1.3_512-512', 'exp-2.1_6bi-3-layer', 'exp-1.1_512-512', 'exp-1.1_256-256', 'exp-1.1_32-32', 'exp-2.1_1uni-1-layer', 'exp-1.2_pretrained-freeze', 'exp-1.2_pretrained-cont', 'exp-1.1_128-128', 'exp-1.1_64-64', 'exp-2.1_5bi-2-layer', 'exp-1_quarter-WS_350-1000_lab', 'exp-1.3_128-512', 'exp-2.1_3uni-3-layer', 'exp-1.2_random', 'exp-1_half-WS_350-1000_lab', 'exp-2.1_4bi-1-layer', 'exp-1.1_512-1024', 'exp-1.3_32-512', 'exp-1.3_64-512']


In [35]:
def build_contingency_table( first_table, second_table):
    yes_yes = 0
    yes_no = 0
    no_yes = 0
    no_no = 0
    if len(first_table) != len(second_table):
        print("Warning, tables are different lengths, please reevaluate,  your life")
        return
    if not first_table or not second_table:
        print("Warning, one of your experiments is empty, double ch)
    for first,second in zip(first_table, second_table):
        if first and second:
            yes_yes += 1
        elif not (first or second):
            no_no +=1
        elif first and not second:
            yes_no +=1
        elif second and not first:
            no_yes +=1
    contingency_table = [[yes_yes,yes_no],[no_yes,no_no]]
    return contingency_table
    

In [50]:
trial = build_contingency_table(accuracy_tables["AA"]["exp-1.1_128-128"], 
                        accuracy_tables["AA"]["exp-1.3_512-512"])

result = mcnemar(trial, exact=False, correction=True)
print('statistic={}, p-value={}'.format(result.statistic, result.pvalue))

statistic=78.4138823759281, p-value=8.356285342824392e-19


In [46]:
trial

[[298339, 0], [0, 23149]]

In [48]:
result.pvalue

8.356285342824392e-19