In [1]:
import os
os.chdir('/home/hanwenli/work/2025/AL_SSL/hidden_state')

from utils import load_generated_result
import numpy as np
import math

In [2]:
result = load_generated_result('llama3', 'trivia_qa')

Reading file:  8aa76b7b-1009-4ac4-8d7f-fc2f73ce5030.npy


In [3]:
result[0]

{'montecarlosequenceentropy': 2.6767156451132905,
 'lexicalsimilarity': -0.19294532627865962,
 'semanticentropy': 2.834320496712656,
 'maximumsequenceprobability': 1.3573691844940186,
 'sar': -7.501996362145998,
 'align': 1.0,
 'inputs': '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\nAnswer the question concisely. (less than 5 words)\nQ: "What fictional ship, with black hull and sails, was originally named ""Wicked Wench""?"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\nA: ',
 'target_texts': ['wicked wench', 'black pearl'],
 'answer': 'The Black Pearl.'}

In [4]:
us_metrics = ['sar', 'maximumsequenceprobability', 'semanticentropy', 'lexicalsimilarity', 'montecarlosequenceentropy']
correctness = [data['align']>0.5 for data in result]
from sklearn.metrics import roc_auc_score
for metric in us_metrics:
    scores = []
    labels = []
    for data in result:
        value = data.get(metric)
        if value is not None and not math.isnan(value):
            scores.append(-value)
            labels.append(data['align'] > 0.5)
    if len(set(labels)) < 2:
        print(f'{metric} skipped: only one class present in y_true.')
    else:
        auroc = roc_auc_score(labels, scores)
        print(f'{metric} auroc: {auroc:.4f}')


sar auroc: 0.8683
maximumsequenceprobability auroc: 0.8328
semanticentropy auroc: 0.8372
lexicalsimilarity auroc: 0.8543
montecarlosequenceentropy auroc: 0.8327


In [5]:
def assign_labels_by_uncertainty(uncertainty_values, true_labels, high_threshold=0.4, low_threshold=0.4):
    """
    Assigns labels based on uncertainty values.
    - The top `high_threshold` proportion of uncertainty values are assigned True (1).
    - The bottom `low_threshold` proportion of uncertainty values are assigned False (0).
    
    Parameters:
        uncertainty_values (list or np.array): The uncertainty scores.
        true_labels (list or np.array): The ground truth labels.
        high_threshold (float): The proportion of highest uncertainty values to assign True.
        low_threshold (float): The proportion of lowest uncertainty values to assign False.
        
    Returns:
        assigned_labels (np.array): The assigned labels (-1 for unassigned).
        accuracy (float): The accuracy of assigned labels compared to true labels.
    """
    # Convert to numpy array
    uncertainty_values = np.array(uncertainty_values)
    true_labels = np.array(true_labels)

    # Determine threshold indices
    num_high = int(len(uncertainty_values) * high_threshold)
    num_low = int(len(uncertainty_values) * low_threshold)

    # Get sorted indices in ascending order
    sorted_indices = np.argsort(uncertainty_values)

    # Assign labels
    assigned_labels = np.full_like(true_labels, -1)  # Initialize with -1 (unassigned)
    assigned_labels[sorted_indices[:num_low]] = 0   # Lowest 40% assigned False (0)
    assigned_labels[sorted_indices[-num_high:]] = 1 # Highest 30% assigned True (1)

    # Compute accuracy (only on assigned labels)
    mask = assigned_labels != -1  # Consider only assigned labels
    accuracy = np.mean(assigned_labels[mask] == true_labels[mask])

    return assigned_labels, accuracy

In [6]:
labels = np.array([i['align']>0.5 for i in result])
uncertainty1 = np.array([-i['sar'] for i in result])
uncertainty2 = np.array([-i['sar'] for i in result])
# assign_labels_by_uncertainty_2(uncertainty1, uncertainty2, labels, high_threshold=0.05, low_threshold=0.05)
assign_labels_by_uncertainty(uncertainty2, labels, high_threshold=0.13, low_threshold=0.13)

(array([ True,  True, False, ...,  True,  True,  True]), 0.84925)