In [1]:
from utils import load_generated_result
import numpy as np

In [2]:
result = load_generated_result('opt', 'truthful_qa')

Reading file:  eef4e351-eca5-4221-9c9e-f647be914de2.npy


In [3]:
us_metrics = ['sar', 'maximumsequenceprobability', 'semanticentropy', 'lexicalsimilarity', 'montecarlosequenceentropy']
correctness = [data['align']>0.5 for data in result]
from sklearn.metrics import roc_auc_score
for metric in us_metrics:
    auroc = roc_auc_score(correctness, -np.array([data[metric] for data in result]))
    print(f'{metric} auroc: {auroc}')


sar auroc: 0.6124151192836969
maximumsequenceprobability auroc: 0.5053091761312422
semanticentropy auroc: 0.47413016044708844
lexicalsimilarity auroc: 0.5826693107385373
montecarlosequenceentropy auroc: 0.44771347875728623


In [3]:
def assign_labels_by_uncertainty(uncertainty_values, true_labels, high_threshold=0.3, low_threshold=0.4):
    """
    Assigns labels based on uncertainty values.
    - The top `high_threshold` proportion of uncertainty values are assigned True (1).
    - The bottom `low_threshold` proportion of uncertainty values are assigned False (0).
    
    Parameters:
        uncertainty_values (list or np.array): The uncertainty scores.
        true_labels (list or np.array): The ground truth labels.
        high_threshold (float): The proportion of highest uncertainty values to assign True.
        low_threshold (float): The proportion of lowest uncertainty values to assign False.
        
    Returns:
        assigned_labels (np.array): The assigned labels (-1 for unassigned).
        accuracy (float): The accuracy of assigned labels compared to true labels.
    """
    # Convert to numpy array
    uncertainty_values = np.array(uncertainty_values)
    true_labels = np.array(true_labels)

    # Determine threshold indices
    num_high = int(len(uncertainty_values) * high_threshold)
    num_low = int(len(uncertainty_values) * low_threshold)

    # Get sorted indices in ascending order
    sorted_indices = np.argsort(uncertainty_values)

    # Assign labels
    assigned_labels = np.full_like(true_labels, -1)  # Initialize with -1 (unassigned)
    assigned_labels[sorted_indices[:num_low]] = 0   # Lowest 40% assigned False (0)
    assigned_labels[sorted_indices[-num_high:]] = 1 # Highest 30% assigned True (1)

    # Compute accuracy (only on assigned labels)
    mask = assigned_labels != -1  # Consider only assigned labels
    accuracy = np.mean(assigned_labels[mask] == true_labels[mask])

    return assigned_labels, accuracy

def assign_labels_by_uncertainty_2(uncertainty1, uncertainty2, true_labels, high_threshold=0.3, low_threshold=0.4):
    """
    Assigns labels based on two uncertainty values.
    - The samples that are in the top `high_threshold` proportion in **both** uncertainty values are assigned True (1).
    - The samples that are in the bottom `low_threshold` proportion in **both** uncertainty values are assigned False (0).
    - Middle values remain unassigned (-1).
    
    Parameters:
        uncertainty1 (list or np.array): The first set of uncertainty scores.
        uncertainty2 (list or np.array): The second set of uncertainty scores.
        true_labels (list or np.array): The ground truth labels.
        high_threshold (float): The proportion of highest uncertainty values to assign True.
        low_threshold (float): The proportion of lowest uncertainty values to assign False.
        
    Returns:
        assigned_labels (np.array): The assigned labels (-1 for unassigned).
        accuracy (float): The accuracy of assigned labels compared to true labels.
    """
    # Convert to numpy arrays
    uncertainty1 = np.array(uncertainty1)
    uncertainty2 = np.array(uncertainty2)
    true_labels = np.array(true_labels)

    # Determine threshold indices
    num_high = int(len(uncertainty1) * high_threshold)
    num_low = int(len(uncertainty1) * low_threshold)

    # Get sorted indices
    sorted_indices1 = np.argsort(uncertainty1)  # Ascending order for first uncertainty
    sorted_indices2 = np.argsort(uncertainty2)  # Ascending order for second uncertainty

    # Identify top and bottom values
    high_set = set(sorted_indices1[-num_high:]) & set(sorted_indices2[-num_high:])  # Top 30% in both
    low_set = set(sorted_indices1[:num_low]) & set(sorted_indices2[:num_low])  # Bottom 40% in both

    # Assign labels
    assigned_labels = np.full_like(true_labels, -1)  # Initialize with -1 (unassigned)
    assigned_labels[list(low_set)] = 0   # Assign False (0)
    assigned_labels[list(high_set)] = 1  # Assign True (1)

    # Compute accuracy (only for assigned labels)
    mask = assigned_labels != -1  # Consider only assigned labels
    accuracy = np.mean(assigned_labels[mask] == true_labels[mask]) if np.any(mask) else 0.0

    return assigned_labels, accuracy

In [4]:
labels = np.array([i['align']>0.7 for i in result])
uncertainty1 = np.array([-i['sar'] for i in result])
uncertainty2 = np.array([-i['maximumsequenceprobability'] for i in result])
# assign_labels_by_uncertainty_2(uncertainty1, uncertainty2, labels, high_threshold=0.05, low_threshold=0.05)
assign_labels_by_uncertainty(uncertainty1, labels, high_threshold=0.8, low_threshold=0.54)

(array([ True,  True, False, ...,  True,  True, False]), 0.83275)