In [None]:
# default_exp projects.robustness_benchmark

# Robustness Benchmark
> Benchmark utility code

In [None]:
# hide
from nbdev.showdoc import *
from fastcore.test import *

In [None]:
# export
import pandas as pd
import numpy as np

from enum import Enum 
from scp.analysis.binary import performance

## Constants

In [None]:
# export
Corruption = Enum("Corruption", 
                  "gaussian_noise, shot_noise, impulse_noise,\
                   defocus_blur, motion_blur, zoom_blur,\
                   black_corner, char,\
                   brightness_up, brightness_down, contrast, elastic_transform, pixelate, jpeg_compression,\
                   speckle_noise, gaussian_blur, bubble, saturate,\
                   sanity_check")

Perturbation = Enum("Perturbation",
                    "gaussian_noise, shot_noise,\
                     motion_blur, zoom_blur,\
                     char\
                     brightness, translate, rotate, tilt, scale,\
                     speckle_noise, gaussian_blur, bubble, shear,\
                     sanity_check")

max_severity = 6 # severity ranges from 1 to 5 inclusive
num_frames = 31 # 
classes = ["nevus", "melanoma"]



## Analysis

In [None]:
# export
# get balanced error rate
def bal_error_rate(v):
    perf = performance(v[1], v[0][:,1], bal_err_rate=True)
    return perf["bal_err_rate"]

# get normal error rate
def error_rate(v):
    perf = performance(v[1], v[0][:,1], err_rate=True)
    return perf["err_rate"]

# get auroc
def auroc(v):
    perf = performance(v[1], v[0][:,1], auroc=True)
    return perf["auroc"]

def flip_rate(k, v, n):
    preds = v[0]
    stop = preds.shape[0]
    assert stop%n == 0, f"Number of predictions ({stop}) is not evenly divisible by stepsize ({n})!"
    
    noise_seq = False
    if "noise" in k:
        noise_seq = True
        
    # calculate flip rate for a single image sequence (which contains 'n' number of frames)
    n_sum, m = 0, 0
    for i in range(0, stop, n):
        m += 1
        seq_preds = preds[i:i+n].argmax(dim=1)
        
        if noise_seq:
            n_sum += sum(seq_preds[0]!=seq_preds[1:])
        else:
            n_sum += sum(seq_preds[:-1]!=seq_preds[1:])
    
    return (n_sum/(m*(n-1))).item()

In [None]:
# export
def adjust_by_baseline(perf, baseline_model_name:str):
    '''Adjust the (corruption) performance of a model by a given baseline'''
    
    perf_adj = dict()
    
    for model_name in perf.keys():
        perf_adj[model_name] = dict()
        
        for rep in perf[model_name].keys():
            perf_adj[model_name][rep] = dict()
            
            for tfm_type in perf[model_name][rep].keys():
                perf_adj[model_name][rep][tfm_type] = perf[model_name][rep][tfm_type]/perf[baseline_model_name][rep][tfm_type]
            
    return perf_adj

def relative_perf(perf_c, perf_cl):
    
    def subtract(v, x): return v-x
    perf_rel = dict()
    
    for model_name in perf_c.keys():
        perf_rel[model_name] = dict()
        
        for rep in perf_c[model_name].keys():
            apply_subtract = partial(subtract, x=perf_cl[model_name][rep]["External"])
            perf_rel[model_name][rep] = apply_to_dict_vals(perf_c[model_name][rep], apply_subtract)
            
    return perf_rel
