## **Compute risk bound and theta with Selection with Guaranteed Risk (SGR)**

Reference: https://arxiv.org/pdf/1705.08500 Algorithm 1

In [13]:
from risk_control import *
import numpy as np
import torch

## **Get Confidence Scores**

#### Method1 : **Maxprob**

In [14]:
from collections import Counter

def max_occurence(labels):
    count = Counter(labels)
    max_num = max(count, key=count.get)
    return max_num


    

In [3]:
data_path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/predictions/logits_and_labels/"
NUM_BATCH = 2139
residuals = []
kappa = []
softmax = torch.nn.Softmax(dim=1)
for batch_no in range(NUM_BATCH):
    file_name = "Logits_and_labels" + str(batch_no) + ".pt"
    data = torch.load(data_path + file_name)
    
    for logits, labels in zip(data['logits'], data['labels']):
        if(len(labels)) == 0:
            continue
        
        logits = torch.from_numpy(logits)
        prob = softmax(logits)
        # print(prob)
        # print(torch.max(prob).numpy())
        kappa.append(torch.max(prob).numpy())
        idx = torch.argmax(prob)
        # print(prob[0][idx])
        # print(labels)
        residuals.append(idx.item() != max_occurence(labels))
        # print(kappa)
        # print(residuals)
        # break
    # break
        
    

In [15]:
kappa = np.array(kappa)
residuals = np.array(residuals)
risk_dict_max_prob = {}
risk_stars = [0.02, 0.1, 0.15, 0.20, 0.25,0.30, 1.0] # desired risk
delta = 0.01 ## confidence
for desired_risk in risk_stars:
    bound_cal = risk_control()
    [theta, b_star] = bound_cal.bound(desired_risk, delta, kappa, residuals, split= True)
    risk_dict_max_prob[str(desired_risk)] = [theta, b_star]


0.02 & 0.0177 & 0.3726 & 0.0193 & 0.3717 & 0.0200  \\


0.10 & 0.0964 & 0.6780 & 0.0969 & 0.6826 & 0.1000  \\
0.15 & 0.1460 & 0.8065 & 0.1451 & 0.8045 & 0.1500  \\
0.20 & 0.1958 & 0.9168 & 0.1968 & 0.9161 & 0.2000  \\
0.25 & 0.2446 & 1.0000 & 0.2440 & 1.0000 & 0.2489  \\
0.30 & 0.2434 & 1.0000 & 0.2452 & 1.0000 & 0.2478  \\
1.00 & 0.2439 & 1.0000 & 0.2447 & 0.9999 & 0.2482  \\


In [16]:
risk_dict_max_prob

{'0.02': [0.9894505, 0.0199641253168461],
 '0.1': [0.70405567, 0.09997844312894971],
 '0.15': [0.5049889, 0.14999522941302407],
 '0.2': [0.27237254, 0.19999602656843307],
 '0.25': [0.022311127, 0.24889408630636511],
 '0.3': [0.02225437, 0.2477710283302041],
 '1.0': [0.02400235, 0.2482029990738233]}

#### Method 2: **Vector Scaling**

In [17]:
import sys
sys.path.append("/teamspace/studios/this_studio/Selective_Prediction_VQA")
from calibration_methods import calibrator as cal

In [18]:
def load_calibrator(path, calibrator_type = "vector_calibrator", device = 'cpu'):
    dict = torch.load(path)
    if calibrator_type == "vector_calibrator":
        cali = cal.VectorScaling(bias=dict['biasFlag'], 
                                 weights= dict['weights'],
                                 num_label = dict['num_label'],
                                 device=device,
                                 print_verbose= False)
        cali.temperature = dict['temperature']
        cali.bias = dict['bias']
    else:
        #todo
        return None
    return cali

In [19]:
path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/calibration_methods/scaling/vector_calibrator.pt"
cali = load_calibrator(path, calibrator_type = "vector_calibrator", device='cuda')

In [20]:
data_path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/predictions/logits_and_labels/"
NUM_BATCH = 2139
residuals = []
kappa = []
softmax = torch.nn.Softmax(dim=1)
for batch_no in range(NUM_BATCH):
    file_name = "Logits_and_labels" + str(batch_no) + ".pt"
    data = torch.load(data_path + file_name)
    
    for logits, labels in zip(data['logits'], data['labels']):
        if(len(labels)) == 0:
            continue
        logits = cali.calibrate(logits)
        logits = torch.from_numpy(logits)
        
        prob = softmax(logits)
        # print(prob)
        # print(torch.max(prob).numpy())
        # prob.to('cpu')
        kappa.append(torch.max(prob).numpy())
        idx = torch.argmax(prob)
        # print(prob[0][idx])
        # print(labels)
        residuals.append(idx.item() != max_occurence(labels))
        # print(kappa)
        # print(residuals)
        # break
    # break
        
    
# kappa, residuals = sort(kappa, residuals )

In [21]:
kappa = np.array(kappa)
residuals = np.array(residuals)
risk_dict_vec_scal = {}
risk_stars = [0.02, 0.1, 0.15, 0.20, 0.25,0.30, 1.0] # desired risk
delta = 0.1 ## confidence
for desired_risk in risk_stars:
    bound_cal = risk_control()
    [theta, b_star] = bound_cal.bound(desired_risk, delta, kappa, residuals, split= True)
    risk_dict_vec_scal[str(desired_risk)] = [theta, b_star]


0.02 & 0.0000 & 0.0000 & 0.0306 & 0.4583 & 0.9941  \\
0.10 & 0.0972 & 0.6783 & 0.0946 & 0.6797 & 0.1000  \\
0.15 & 0.1469 & 0.7966 & 0.1475 & 0.7967 & 0.1500  \\
0.20 & 0.1967 & 0.9008 & 0.1957 & 0.9023 & 0.2000  \\
0.25 & 0.2451 & 1.0000 & 0.2448 & 1.0000 & 0.2484  \\
0.30 & 0.2445 & 1.0000 & 0.2453 & 1.0000 & 0.2479  \\
1.00 & 0.2459 & 1.0000 & 0.2439 & 1.0000 & 0.2492  \\


In [22]:
risk_dict_vec_scal

{'0.02': [1.0, 0.9941177368164062],
 '0.1': [0.99844825, 0.0999940050639865],
 '0.15': [0.96077937, 0.14999492921580543],
 '0.2': [0.75928205, 0.19999539489987525],
 '0.25': [0.14571935, 0.2484226213998834],
 '0.3': [0.14843729, 0.24787624694690857],
 '1.0': [0.15131189, 0.2492277806559775]}

In [23]:

risk_dict = {
    'max_prob' : risk_dict_max_prob,
    'vector_scaling_calibration' : risk_dict_vec_scal
}
path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/risk_bounds/"
torch.save(risk_dict, path + "risk_bound.pt")


In [24]:
np.sum(residuals)

51335

In [25]:
residuals.size

209608

In [26]:
np.sum(residuals)/residuals.size

0.24490954543719706