In [6]:
import pickle
import glob
import os
from tqdm import tqdm
import numpy as np
ds = 'cityscapes'
num_classes = 19
exp = 'best_thresh'
exp_dir = f'/BS/mlcysec2/work/hierarchical-certification/log/{ds}/{exp}'


overall_dict = {}

for file in tqdm(glob.glob(os.path.join(exp_dir, '*.pkl')), desc=f'reading from {exp_dir}'):
    d = pickle.load(open(file, 'rb'))
    filename = os.path.basename(file).replace('.pkl', '')
    new_d = {}
    new_d[filename] = d[filename]
    d = new_d
    for image_name, image_d in d.items():
        for model_type, model_d in image_d.items():
            if model_type not in overall_dict:
                overall_dict[model_type] = {}
            for metric, value in model_d.items():
                if isinstance(value, np.ndarray):
                    value = sum(value)
                if isinstance(value, dict):
                    for k, v in value.items():
                        if k == 'num_pixels': k='num_pixels_per_cls'
                        if k == 'cig': k='cig_per_cls'
                        if k not in overall_dict[model_type]:
                            overall_dict[model_type][k] = v
                        else:
                            overall_dict[model_type][k] += v
                    continue
                if metric not in overall_dict[model_type]:
                    overall_dict[model_type][metric] = value
                else:
                    overall_dict[model_type][metric] += value


reading from /BS/mlcysec2/work/hierarchical-certification/log/cityscapes/best_thresh: 100%|██████████| 1/1 [00:00<00:00, 151.26it/s]


In [7]:
# graph_d should contain the relevant calculations
# graph_d = {
graph_d = {}
baseline = {}
for model_type, model_d in overall_dict.items():

    if isinstance(model_type, tuple):
        if model_type not in graph_d:
            graph_d[model_type] = {}

        assert sum(model_d['num_pixels_per_cls']) == model_d['num_pixels']
        graph_d[model_type]['CIG'] = sum(model_d['cig_per_cls'])/sum(model_d['num_pixels_per_cls'])/np.log(171)
        graph_d[model_type]['cCIG'] = np.mean([c/n/np.log(171) for c, n in zip(model_d['cig_per_cls'], model_d['num_pixels_per_cls']) if n > 0])
        graph_d[model_type]['%\\ certified'] = model_d['certified_count']/model_d['num_pixels']
    else:
        baseline[model_type] = {}
        baseline[model_type]['confusion_matrix'] = model_d['confusion_matrix']


In [8]:
graph_d = dict(sorted(graph_d.items(), key=lambda item: item[1]['CIG'], reverse=True))
top_k = 0; d = False
for model_type, model_d in graph_d.items():
    n, n0, f, h_i, sigma, tau = model_type
    if f is None:
        print('\nSegCertify', model_type, model_d)
        d = True
    else:
        if top_k < 10:
            print('AdaptiveCertify', model_type, model_d)
            top_k +=1
    if top_k == 10 and d:
        break


AdaptiveCertify (100, 10, '(0, 0, 0.25)', 4, 0.25, 0.75) {'CIG': 0.5289676281618492, 'cCIG': 0.4223661367851581, '%\\ certified': 0.9580611193514419}
AdaptiveCertify (100, 10, '(0, 0.05, 0.25)', 4, 0.25, 0.75) {'CIG': 0.5288988533628585, 'cCIG': 0.42308543226321343, '%\\ certified': 0.958507013806553}
AdaptiveCertify (100, 10, '(0, 0, 0.3)', 4, 0.25, 0.75) {'CIG': 0.5288409505307476, 'cCIG': 0.4241350155019661, '%\\ certified': 0.9593475814673972}
AdaptiveCertify (100, 10, '(0, 0.05, 0.3)', 4, 0.25, 0.75) {'CIG': 0.5287721757317567, 'cCIG': 0.4248543109800213, '%\\ certified': 0.9597934759225082}
AdaptiveCertify (100, 10, '(0, 0.25, 0.3)', 4, 0.25, 0.75) {'CIG': 0.528088021271965, 'cCIG': 0.4247808580073585, '%\\ certified': 0.9611850763924497}
AdaptiveCertify (100, 10, '(0, 0, 0.4)', 4, 0.25, 0.75) {'CIG': 0.5279437287508015, 'cCIG': 0.42602934451205166, '%\\ certified': 0.9611608136953759}
AdaptiveCertify (100, 10, '(0, 0.05, 0.4)', 4, 0.25, 0.75) {'CIG': 0.5278749539518105, 'cCIG': 