## **Compute risk bound and theta with Selection with Guaranteed Risk (SGR)**

Reference: https://arxiv.org/pdf/1705.08500 Algorithm 1

In [16]:
from risk_control import *
import numpy as np
import torch

## **Get Confidence Scores**

#### Method1 : **Maxprob**

In [89]:
from collections import Counter

def max_occurence(labels):
    count = Counter(labels)
    max_num = max(count, key=count.get)
    return max_num

def sort(arr1, arr2 ):
    combined = list(zip(arr1, arr2))

    # Sort the combined list based on the first array's values
    sorted_combined = sorted(combined, key=lambda x: x[0])

    # Separate the sorted tuples back into two lists
    x, y = zip(*sorted_combined)
    return x, y
    

In [90]:
data_path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/predictions/logits_and_labels/"
NUM_BATCH = 2139
residuals = []
kappa = []
softmax = torch.nn.Softmax(dim=1)
for batch_no in range(NUM_BATCH):
    file_name = "Logits_and_labels" + str(batch_no) + ".pt"
    data = torch.load(data_path + file_name)
    
    for logits, labels in zip(data['logits'], data['labels']):
        if(len(labels)) == 0:
            continue
        
        logits = torch.from_numpy(logits)
        prob = softmax(logits)
        # print(prob)
        # print(torch.max(prob).numpy())
        kappa.append(torch.max(prob).numpy())
        idx = torch.argmax(prob)
        # print(prob[0][idx])
        # print(labels)
        residuals.append(idx.item() == max_occurence(labels))
        # print(kappa)
        # print(residuals)
        # break
    # break
        
    
kappa, residuals = sort(kappa, residuals )

In [92]:
kappa = np.array(kappa)
residuals = np.array(residuals)
risk_dict_max_prob = {}
risk_stars = [0.02, 0.1, 0.15, 0.20, 0.25, 0.999] # desired risk
delta = 0.01 ## confidence
for desired_risk in risk_stars:
    bound_cal = risk_control()
    [theta, b_star] = bound_cal.bound(desired_risk, delta, kappa, residuals, split= False)
    risk_dict_max_prob[str(desired_risk)] = [theta, b_star]


0.02 & 1.0000 & 0.0000 & 1.0000   \\
0.10 & 1.0000 & 0.0000 & 1.0000   \\
0.15 & 1.0000 & 0.0000 & 1.0000   \\
0.20 & 1.0000 & 0.0000 & 1.0000   \\
0.25 & 1.0000 & 0.0000 & 1.0000   \\
1.00 & 0.7557 & 1.0000 & 0.7587   \\


In [93]:
risk_dict_max_prob

{'0.02': [1.0, 1.0],
 '0.1': [1.0, 1.0],
 '0.15': [1.0, 1.0],
 '0.2': [1.0, 1.0],
 '0.25': [1.0, 1.0],
 '0.999': [0.022015542, 0.7587391736725159]}

#### Method 2: **Vector Scaling**

In [113]:
import sys
sys.path.append("/teamspace/studios/this_studio/Selective_Prediction_VQA")
from calibration_methods import calibrator as cal

In [114]:
def load_calibrator(path, calibrator_type = "vector_calibrator", device = 'cpu'):
    dict = torch.load(path)
    if calibrator_type == "vector_calibrator":
        cali = cal.VectorScaling(bias=dict['biasFlag'], 
                                 weights= dict['weights'],
                                 num_label = dict['num_label'],
                                 device=device,
                                 print_verbose= False)
        cali.temperature = dict['temperature']
        cali.bias = dict['bias']
    else:
        #todo
        return None
    return cali

In [115]:
path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/calibration_methods/scaling/vector_calibrator.pt"
cali = load_calibrator(path, calibrator_type = "vector_calibrator", device='cuda')

In [116]:
data_path = "/teamspace/studios/this_studio/Selective_Prediction_VQA/predictions/logits_and_labels/"
NUM_BATCH = 2139
residuals = []
kappa = []
softmax = torch.nn.Softmax(dim=1)
for batch_no in range(NUM_BATCH):
    file_name = "Logits_and_labels" + str(batch_no) + ".pt"
    data = torch.load(data_path + file_name)
    
    for logits, labels in zip(data['logits'], data['labels']):
        if(len(labels)) == 0:
            continue
        cali.calibrate(logits)
        logits = torch.from_numpy(logits)
        
        prob = softmax(logits)
        # print(prob)
        # print(torch.max(prob).numpy())
        # prob.to('cpu')
        kappa.append(torch.max(prob).numpy())
        idx = torch.argmax(prob)
        # print(prob[0][idx])
        # print(labels)
        residuals.append(idx.item() == max_occurence(labels))
        # print(kappa)
        # print(residuals)
        # break
    # break
        
    
kappa, residuals = sort(kappa, residuals )

In [117]:
kappa = np.array(kappa)
residuals = np.array(residuals)
risk_dict_max_prob = {}
risk_stars = [0.02, 0.1, 0.15, 0.20, 0.25, 0.999] # desired risk
delta = 0.01 ## confidence
for desired_risk in risk_stars:
    bound_cal = risk_control()
    [theta, b_star] = bound_cal.bound(desired_risk, delta, kappa, residuals, split= False)
    risk_dict_max_prob[str(desired_risk)] = [theta, b_star]


0.02 & 1.0000 & 0.0000 & 1.0000   \\
0.10 & 1.0000 & 0.0000 & 1.0000   \\
0.15 & 1.0000 & 0.0000 & 1.0000   \\
0.20 & 1.0000 & 0.0000 & 1.0000   \\
0.25 & 1.0000 & 0.0000 & 1.0000   \\
1.00 & 0.7557 & 1.0000 & 0.7587   \\
