## **Compute risk bound and theta with Selection with Guaranteed Risk (SGR)**

Reference: https://arxiv.org/pdf/1705.08500 Algorithm 1

In [1]:
from risk_control import *
import numpy as np
import torch

## **Get Confidence Scores**

#### Method1 : **Maxprob**

In [3]:
from collections import Counter

def max_occurence(labels):
    count = Counter(labels)
    max_num = max(count, key=count.get)
    return max_num


    

In [4]:
data_path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/predictions/logits_and_labels/"
NUM_BATCH = 2139
residuals = []
kappa = []
softmax = torch.nn.Softmax(dim=1)
for batch_no in range(NUM_BATCH):
    file_name = "Logits_and_labels" + str(batch_no) + ".pt"
    data = torch.load(data_path + file_name)
    
    for logits, labels in zip(data['logits'], data['labels']):
        if(len(labels)) == 0:
            continue
        
        logits = torch.from_numpy(logits)
        prob = softmax(logits)
        # print(prob)
        # print(torch.max(prob).numpy())
        kappa.append(torch.max(prob).numpy())
        idx = torch.argmax(prob)
        # print(prob[0][idx])
        # print(labels)
        residuals.append(idx.item() != max_occurence(labels))
        # print(kappa)
        # print(residuals)
        # break
    # break
        
    

In [5]:
kappa = np.array(kappa)
residuals = np.array(residuals)
risk_dict_max_prob = {}
risk_stars = [0.02, 0.1, 0.15, 0.20, 0.25,0.30, 1.0] # desired risk
delta = 0.01 ## confidence
for desired_risk in risk_stars:
    bound_cal = risk_control()
    [theta, b_star] = bound_cal.bound(desired_risk, delta, kappa, residuals, split= False)
    risk_dict_max_prob[str(desired_risk)] = [theta, b_star]


0.02 & 0.0184 & 0.3710 & 0.0200   \\
0.10 & 0.0974 & 0.6826 & 0.1000   \\
0.15 & 0.1472 & 0.8097 & 0.1500   \\
0.20 & 0.1970 & 0.9178 & 0.2000   \\
0.25 & 0.2443 & 1.0000 & 0.2474   \\
0.30 & 0.2443 & 1.0000 & 0.2474   \\
1.00 & 0.2443 & 1.0000 & 0.2474   \\


In [6]:
risk_dict_max_prob

{'0.02': [0.9896492, 0.01999774429653458],
 '0.1': [0.7007049, 0.09999531061703862],
 '0.15': [0.49871063, 0.14999606753785255],
 '0.2': [0.26925242, 0.19999684626012168],
 '0.25': [0.022015542, 0.24738606947789515],
 '0.3': [0.022015542, 0.24738606947789515],
 '1.0': [0.022015542, 0.24738606947789515]}

#### Method 2: **Vector Scaling**

In [7]:
import sys
sys.path.append("/teamspace/studios/this_studio/Selective_Prediction_VQA")
from calibration_methods import calibrator as cal

In [8]:
def load_calibrator(path, calibrator_type = "vector_calibrator", device = 'cpu'):
    dict = torch.load(path)
    if calibrator_type == "vector_calibrator":
        cali = cal.VectorScaling(bias=dict['biasFlag'], 
                                 weights= dict['weights'],
                                 num_label = dict['num_label'],
                                 device=device,
                                 print_verbose= False)
        cali.temperature = dict['temperature']
        cali.bias = dict['bias']
    else:
        #todo
        return None
    return cali

In [9]:
path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/calibration_methods/scaling/vector_calibrator.pt"
cali = load_calibrator(path, calibrator_type = "vector_calibrator", device='cuda')

In [10]:
data_path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/predictions/logits_and_labels/"
NUM_BATCH = 2139
residuals = []
kappa = []
softmax = torch.nn.Softmax(dim=1)
for batch_no in range(NUM_BATCH):
    file_name = "Logits_and_labels" + str(batch_no) + ".pt"
    data = torch.load(data_path + file_name)
    
    for logits, labels in zip(data['logits'], data['labels']):
        if(len(labels)) == 0:
            continue
        cali.calibrate(logits)
        logits = torch.from_numpy(logits)
        
        prob = softmax(logits)
        # print(prob)
        # print(torch.max(prob).numpy())
        # prob.to('cpu')
        kappa.append(torch.max(prob).numpy())
        idx = torch.argmax(prob)
        # print(prob[0][idx])
        # print(labels)
        residuals.append(idx.item() != max_occurence(labels))
        # print(kappa)
        # print(residuals)
        # break
    # break
        
    
# kappa, residuals = sort(kappa, residuals )

In [11]:
kappa = np.array(kappa)
residuals = np.array(residuals)
risk_dict_vec_scal = {}
risk_stars = [0.02, 0.1, 0.15, 0.20, 0.25,0.30, 1.0] # desired risk
delta = 0.1 ## confidence
for desired_risk in risk_stars:
    bound_cal = risk_control()
    [theta, b_star] = bound_cal.bound(desired_risk, delta, kappa, residuals, split= False)
    risk_dict_vec_scal[str(desired_risk)] = [theta, b_star]


0.02 & 0.0187 & 0.3740 & 0.0200   \\
0.10 & 0.0980 & 0.6841 & 0.1000   \\
0.15 & 0.1478 & 0.8114 & 0.1500   \\
0.20 & 0.1977 & 0.9191 & 0.2000   \\
0.25 & 0.2443 & 1.0000 & 0.2467   \\
0.30 & 0.2443 & 1.0000 & 0.2467   \\
1.00 & 0.2443 & 1.0000 & 0.2467   \\


In [12]:
risk_dict_vec_scal

{'0.02': [0.9891249, 0.01999221433435066],
 '0.1': [0.69801855, 0.0999995560632004],
 '0.15': [0.49559176, 0.1499965628504193],
 '0.2': [0.26623195, 0.19999764311961266],
 '0.25': [0.022015542, 0.24670545668329957],
 '0.3': [0.022015542, 0.24670545668329957],
 '1.0': [0.022015542, 0.24670545668329957]}

In [17]:

risk_dict = {
    'max_prob' : risk_dict_max_prob,
    'vector_scaling_calibration' : risk_dict_vec_scal
}
path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/risk_bounds/"
torch.save(risk_dict, path + "risk_bound.pt")


In [None]:
np.sum(residuals)

51211

In [None]:
residuals.size

209608

In [None]:
np.sum(residuals)/residuals.size

0.24431796496316935