# OOD detection applied to Hallucination Detection

In [None]:
#/home/lila.roig/.env/ood_env/bin/python 

## Part I. Extract Descriptors

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# Import libraries
# -----------------------------------
import torch
import sys
import time 
import os 
import pickle
# Add the path to the src directory
sys.path.append(os.path.abspath(".."))

In [9]:
# Visualize setup 
# -----------------------------------
print(f"Python version: {sys.version}")
print(f"Cuda version: {torch.version.cuda}")
num_gpus = torch.cuda.device_count()
print(f"Number of available de GPU : {num_gpus}")
for i in range(num_gpus):
    print(f"GPU {i + 1} : {torch.cuda.get_device_name(i)}")

Python version: 3.11.11 (main, Dec  4 2024, 08:55:07) [GCC 11.4.0]
Cuda version: 12.6
Number of available de GPU : 2
GPU 1 : NVIDIA GeForce RTX 4090
GPU 2 : NVIDIA GeForce RTX 4090


In [18]:
# Define global variables
# -----------------------------------
SEED = 777
BATCH_SIZE = 16
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"

# Extract descriptors from: prompt, generated answer or both concatenated 
ACTIVATION_SOURCE = "prompt"  # 'prompt', 'generation', 'PromptGeneration'

# Define layers to extract descriptors from
LAYERS = list(range(1, 31, 2)) + [-1] # (List[int]) - Layers from witch retrieve the scores 

# Define descriptors aggregations 
HIDDEN_AGG = ["avg_emb", "last_emb", "max_emb", "first_gen_emb", "hidden_score", "feat_var_emb"]
ATTN_AGG = ["attn_score"]
LOGIT_AGG = ["perplexity_score", "logit_entropy_score", "window_logit_entropy_score"]
LOGIT_CONFIG = {"top_k": 50, "window_size": 1, "stride": 1}


# Define repository names
OUTPUT_DIR = f"../results/raw/TEST/small_dataset_allConfig_seed{SEED}/"
STR_AGG = 'All'
STR_LAYERS = '1:32:2'

In [11]:
# Seed everything
# -----------------------------------
from src.utils.general import seed_all
seed_all(SEED)

In [12]:
# Clear memory to avoid "CUDA out of memory"
# -----------------------------------
torch.cuda.empty_cache()
torch.cuda.ipc_collect()

### 1. Load model

In [13]:
# Load model
# -----------------------------------
from src.model_loader.llama_loader import load_llama

model, tokenizer = load_llama(MODEL_NAME)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### 2. Load ID dataset

In [None]:
# Load ID dataset
# -----------------------------------
from src.data_reader.squad_loader import load_id_fit_dataset 
# Total number of samples in squad v2.0 train set: 86821

id_fit_dataset = load_id_fit_dataset()
id_fit_dataset = id_fit_dataset.shuffle(SEED) 
id_fit_dataset = id_fit_dataset.slice(idx_start=0, idx_end=10_000) 
id_fit_dataset.print_info()


===== Dataset Information =====
Dataset({
    features: ['id', 'title', 'context', 'question', 'answers', 'original_index', 'is_impossible'],
    num_rows: 10000
})
Mean ground-truth answer length: 3.19, Max length: 29
Mean context + question length: 129.88, Max length: 661


In [12]:
# Visualize one generation with the ID dataset
# -----------------------------------
# TODO: ADAPT TO NEW CODE VERSION
'''
from src.inference.run_extraction import analyze_single_generation, build_prompt
from src.inference.hooks import register_forward_activation_hook, extract_token_activations

_ = analyze_single_generation(
    model=model,
    tokenizer=tokenizer,
    dataset=id_fit_dataset,
    sample_idx=3,
    build_prompt_fn=build_prompt,
    register_forward_activation_hook_fn=register_forward_activation_hook,
    layer_idx=-1,
    extract_token_activations_fn=partial(extract_token_activations, mode=EXTRACTION_MODE),
) 
'''

'\nfrom src.inference.run_extraction import analyze_single_generation, build_prompt\nfrom src.inference.hooks import register_forward_activation_hook, extract_token_activations\n\n_ = analyze_single_generation(\n    model=model,\n    tokenizer=tokenizer,\n    dataset=id_fit_dataset,\n    sample_idx=3,\n    build_prompt_fn=build_prompt,\n    register_forward_activation_hook_fn=register_forward_activation_hook,\n    layer_idx=-1,\n    extract_token_activations_fn=partial(extract_token_activations, mode=EXTRACTION_MODE),\n) \n'

### 3. Define more global variables

In [19]:
# Compute offsets to select the tokens to give to the model
# -----------------------------------
from src.inference.generation_utils import build_prompt
from src.inference.offset_utils import compute_token_offsets

if False:
    idx = 67
    text = build_prompt(id_fit_dataset[idx]["context"], id_fit_dataset[idx]["question"])
    
    START_OFFSET, END_OFFSET = compute_token_offsets(
        text=text,
        tokenizer=tokenizer,
        start_phrase="Passage:", 
        end_phrase=" [/INST]",
        include_start_phrase=True,
        include_end_phrase=False,
        debug=True,
        )

if True:
    START_OFFSET=0
    END_OFFSET=0

In [20]:
# Define file name
OUTPUT_TITLE = f"_layer{STR_LAYERS}_agg{STR_AGG}_{ACTIVATION_SOURCE}_so{START_OFFSET}_eo{END_OFFSET}_seed{SEED}"

print(f'OUTPUT_TITLE: {OUTPUT_TITLE}')

OUTPUT_TITLE: _layer1:32:2_aggAll_prompt_so0_eo0_seed777


### 4. Compare responses & create new correct dataset

In [21]:
# Retrieve ID generated responses and compare them to ground-truth 
# -----------------------------------
from src.inference.run_extraction import run_filter_generated_answers_by_similarity
from src.inference.generation_utils import build_prompt
from src.utils.general import print_time_elapsed
from src.data_reader.pickle_io import merge_batches_and_cleanup

output_path = OUTPUT_DIR + f"id_fit_results_answers_seed{SEED}" 

# Runs batched inference on a dataset using a decoder-only language model.
# For each batch, generates answers, computes semantic similarity scores, 
# and appends the results to a pickle file.
print("\nStart generating ID answers and comparing them to ground-truth...")
t0 = time.time()
run_filter_generated_answers_by_similarity(
    model=model,
    tokenizer=tokenizer,
    dataset=id_fit_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(id_fit_dataset),
    output_path=output_path,
    build_prompt_fn=build_prompt
)
t1 = time.time()
print("...end!")
print_time_elapsed(t0, t1, label="ID answers: ")

# Merge all batches, save as a single file and delete batch directory
_= merge_batches_and_cleanup(directory=output_path, delete=True, confirm='user') 


Start generating ID answers and comparing them to ground-truth...


100%|██████████| 7/7 [00:15<00:00,  2.27s/it]


...end!
ID answers: Time elapsed: 00 min 15 sec

Directory '../results/raw/TEST/small_dataset_allConfig_seed777/id_fit_results_answers_seed777' deleted.


In [None]:
# Load ID responses and only keep correct entries 
# -----------------------------------
from src.data_reader.pickle_io import load_pickle_batches
from src.utils.general import filter_entries

# Load extracted answers 
id_fit_answers = load_pickle_batches(OUTPUT_DIR + f"id_fit_results_answers_seed{SEED}.pkl" )
# Only keep rows where the generated responses are similar to the ground-truth answers
ids_correct_answers = filter_entries(id_fit_answers, column='is_correct', value=1)["id"]
# Create a new dataset contaning only the correct answers 
id_fit_correct_dataset =  id_fit_dataset.filter_by_column('id', ids_correct_answers)
# Save the new correct dataset for later use
#id_fit_correct_dataset.save(f"../data/datasets/id_fit_correct_dataset_small_seed{SEED}.pkl")

Loaded 112 samples from: ../results/raw/TEST/small_dataset_allConfig_seed777/id_fit_results_answers_seed777.pkl
Size before filtering: 112. Size after filtering: 82. Filtered 30 samples.


In [25]:
# Load correct dataset
# -----------------------------------
with open(f"../data/datasets/id_fit_correct_dataset_small_seed{SEED}.pkl", "rb") as f:
    id_fit_correct_dataset = pickle.load(f)

id_fit_correct_dataset.print_info()


===== Dataset Information =====
Dataset({
    features: ['id', 'title', 'context', 'question', 'answers', 'original_index', 'is_impossible'],
    num_rows: 8008
})
Mean ground-truth answer length: 3.01, Max length: 27
Mean context + question length: 129.19, Max length: 568


### 5. Retrieve ID fit descriptors 




#### 5.1. Retrieve ID fit descriptors from input

For this section, `ACTIVATION_SOURCE` is `'prompt'` by default

In [None]:
# Retrieve ID descriptors and save results 
# -----------------------------------
from src.inference.run_extraction import run_prompt_descriptor_extraction
from src.inference.generation_utils import build_prompt
from src.utils.general import print_time_elapsed
from src.data_reader.pickle_io import merge_batches_and_cleanup

output_path = f"{OUTPUT_DIR}id_fit_results{OUTPUT_TITLE}" 

# Runs batched inference on a dataset using a decoder-only language model.
# For each batch, extracts token-level activations/attention/logits, 
# and appends the results to a pickle file.
print("\nStart retrieving ID fit descriptors from inputs...")
t2 = time.time()
res = run_prompt_descriptor_extraction(
    model=model,
    tokenizer=tokenizer,
    dataset=id_fit_correct_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(id_fit_correct_dataset),
    save_to_pkl=False,
    output_path=output_path,
    build_prompt_fn=build_prompt,
    layers=LAYERS,  
    hidden_agg=HIDDEN_AGG,
    attn_agg=ATTN_AGG,
    logit_agg=LOGIT_AGG,
    logit_config=LOGIT_CONFIG,
    start_offset=START_OFFSET,
    end_offset=END_OFFSET
)
t3 = time.time()
print("...end!")
print_time_elapsed(t2, t3, label="ID descriptors: ")

# Merge all batches, save as a single file and delete batch directory
_= merge_batches_and_cleanup(directory=output_path, delete=True, confirm='user') 

In [20]:
# Free memory 
del id_fit_dataset 
del id_fit_correct_dataset

#### 5.2. Retrieve ID fit descriptors from generated answer

For this section, `ACTIVATION_SOURCE` can be either `'prompt'`, `'generation'` or `'promptGeneration'`

In [None]:
# Retrieve ID descriptors and save results 
# -----------------------------------
from src.inference.run_extraction import run_prompt_and_generation_descriptor_extraction
from src.inference.generation_utils import build_prompt
from src.utils.general import print_time_elapsed
from src.data_reader.pickle_io import merge_batches_and_cleanup

output_path = f"{OUTPUT_DIR}id_fit_results{OUTPUT_TITLE}"

# Runs batched inference on a dataset using a decoder-only language model.
# For each batch, generates answers, extracts token-level activations for the generated answer,
# and appends the results to a pickle file.
print(f"\nStart retrieving ID fit descriptors from {ACTIVATION_SOURCE}...")
t2 = time.time()
res = run_prompt_and_generation_descriptor_extraction(
    model=model,
    tokenizer=tokenizer,
    dataset=id_fit_correct_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(id_fit_correct_dataset),
    save_to_pkl=False,
    output_path=output_path,
    build_prompt_fn=build_prompt,
    layers=LAYERS,  
    activation_source=ACTIVATION_SOURCE,
    hidden_agg=HIDDEN_AGG,
    attn_agg=ATTN_AGG,
    logit_agg=LOGIT_AGG,
    logit_config=LOGIT_CONFIG,
    start_offset=START_OFFSET,
    end_offset=END_OFFSET
)
t3 = time.time()
print("...end!")
print_time_elapsed(t2, t3, label="ID descriptors: ")

# Merge all batches, save as a single file and delete batch directory
_= merge_batches_and_cleanup(directory=output_path, delete=True, confirm='user') 

In [None]:
# Free memory 
del id_fit_dataset 
del id_fit_correct_dataset

### 6. Load Test datasets (that may contain OOD/Hallucinated samples)


In [29]:
# Load test datasets
# -----------------------------------
from src.data_reader.squad_loader import load_id_test_dataset, load_od_test_dataset

# Load possible test dataset 
id_test_dataset = load_id_test_dataset()
id_test_dataset = id_test_dataset.shuffle(SEED) 
id_test_dataset = id_test_dataset.slice(idx_start=0, idx_end=1000) 
id_test_dataset.print_info()
# Total number of samples in squad v2.0 validation answerable set: 5928

# Load impossible test dataset 
od_test_dataset = load_od_test_dataset()
od_test_dataset = od_test_dataset.shuffle(SEED) 
od_test_dataset = od_test_dataset.slice(idx_start=0, idx_end=1000) 
od_test_dataset.print_info()
# Total number of samples in squad v2.0 validation unanswerable set: 5945



===== Dataset Information =====
Dataset({
    features: ['id', 'title', 'context', 'question', 'answers', 'original_index', 'is_impossible'],
    num_rows: 1000
})
Mean ground-truth answer length: 3.06, Max length: 25
Mean context + question length: 140.19, Max length: 651

===== Dataset Information =====
Dataset({
    features: ['id', 'title', 'context', 'question', 'answers', 'original_index', 'is_impossible'],
    num_rows: 1000
})
No valid ground-truth answers to compute length stats.
Mean context + question length: 137.03, Max length: 553


In [61]:
# Visualize one generation with the test impossible dataset
# -----------------------------------
# TODO: ADAPT TO NEW CODE VERSION
'''
from src.inference.inference_utils import analyze_single_generation, build_prompt
from src.inference.activation_utils import register_forward_activation_hook, extract_token_activations

_ = analyze_single_generation(
    model=model,
    tokenizer=tokenizer,
    dataset=od_test_dataset,
    sample_idx=500,
    build_prompt_fn=build_prompt,
    register_forward_activation_hook_fn=register_forward_activation_hook,
    layer_idx=-1,
    extract_token_activations_fn=partial(extract_token_activations, mode=EXTRACTION_MODE),
) 
'''

'\nfrom src.inference.inference_utils import analyze_single_generation, build_prompt\nfrom src.inference.activation_utils import register_forward_activation_hook, extract_token_activations\n\n_ = analyze_single_generation(\n    model=model,\n    tokenizer=tokenizer,\n    dataset=od_test_dataset,\n    sample_idx=500,\n    build_prompt_fn=build_prompt,\n    register_forward_activation_hook_fn=register_forward_activation_hook,\n    layer_idx=-1,\n    extract_token_activations_fn=partial(extract_token_activations, mode=EXTRACTION_MODE),\n) \n'

### 7. Retrieve Test descriptors 

#### 7.1. Retrieve Test descriptors from input

For this section, `ACTIVATION_SOURCE` is `'prompt'` by default

In [None]:
# Retrieve test descriptors and save results 
# -----------------------------------
from src.inference.run_extraction import run_prompt_descriptor_extraction
from src.inference.generation_utils import build_prompt
from src.utils.general import print_time_elapsed
from src.data_reader.pickle_io import merge_batches_and_cleanup

od_output_path = f"{OUTPUT_DIR}od_test_results{OUTPUT_TITLE}" 
id_output_path = f"{OUTPUT_DIR}id_test_results{OUTPUT_TITLE}"

# Runs batched inference on a dataset using a decoder-only language model.
# For each batch, extracts token-level activations, and appends the results to a pickle file.
print("\nStart retrieving test impossible descriptors from inputs...")
t2 = time.time()
res = run_prompt_descriptor_extraction(
    model=model,
    tokenizer=tokenizer,
    dataset=od_test_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(od_test_dataset),
    save_to_pkl=False,
    output_path=od_output_path,
    build_prompt_fn=build_prompt,
    layers=LAYERS,  
    hidden_agg=HIDDEN_AGG,
    attn_agg=ATTN_AGG,
    logit_agg=LOGIT_AGG,
    logit_config=LOGIT_CONFIG,
    start_offset=START_OFFSET,
    end_offset=END_OFFSET
)
t3 = time.time()
print("...end!")
print_time_elapsed(t2, t3, label="Test impossible descriptors: ")

print("\nStart retrieving test possible descriptors from inputs...")
t4 = time.time()
res = run_prompt_descriptor_extraction(
    model=model,
    tokenizer=tokenizer,
    dataset=id_test_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(id_test_dataset),
    save_to_pkl=False,
    output_path=id_output_path,
    build_prompt_fn=build_prompt,
    layers=LAYERS,  
    hidden_agg=HIDDEN_AGG,
    attn_agg=ATTN_AGG,
    logit_agg=LOGIT_AGG,
    logit_config=LOGIT_CONFIG,
    start_offset=START_OFFSET,
    end_offset=END_OFFSET
)
t5 = time.time()
print("...end!")
print_time_elapsed(t4, t5, label="Test possible descriptors: ")

# Merge all batches, save as a single file and delete batch directory
_= merge_batches_and_cleanup(directory=od_output_path, delete=True, confirm='user') 
_= merge_batches_and_cleanup(directory=id_output_path, delete=True, confirm='user') 


Start retrieving test impossible scores from inputs...


  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:06<00:00,  1.02it/s]


...end!
Test impossible scores: Time elapsed: 00 min 06 sec


Start retrieving test possible scores from inputs...


100%|██████████| 7/7 [00:08<00:00,  1.24s/it]

...end!
Test possible scores: Time elapsed: 00 min 08 sec

Directory '../results/raw/TEST/od_test_results_layer18_-1_score_all_hidden_attn_logit_prompt_so0_eo0' does not exist. Nothing to delete.
Directory '../results/raw/TEST/id_test_results_layer18_-1_score_all_hidden_attn_logit_prompt_so0_eo0' does not exist. Nothing to delete.





In [20]:
# Free memory 
del od_test_dataset 
del id_test_dataset

#### 7.2. Retrieve Test descriptors from generated answer

For this section, `ACTIVATION_SOURCE` can be either `'prompt'`, `'generation'` or `'promptGeneration'`

In [None]:
# Retrieve test descriptors and save results 
# -----------------------------------
from src.inference.run_extraction import run_prompt_and_generation_descriptor_extraction
from src.inference.generation_utils import build_prompt
from src.utils.general import print_time_elapsed
from src.data_reader.pickle_io import merge_batches_and_cleanup

od_output_path = f"{OUTPUT_DIR}od_test_results{OUTPUT_TITLE}" 
id_output_path = f"{OUTPUT_DIR}id_test_results{OUTPUT_TITLE}"

# Runs batched inference on a dataset using a decoder-only language model.
# For each batch, extracts token-level activations, and appends the results to a pickle file.
print(f"\nStart retrieving test impossible descriptors from {ACTIVATION_SOURCE}...")
t2 = time.time()
res = run_prompt_and_generation_descriptor_extraction(
    model=model,
    tokenizer=tokenizer,
    dataset=od_test_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(od_test_dataset),
    save_to_pkl=False,
    output_path=od_output_path,
    build_prompt_fn=build_prompt,
    layers=LAYERS,  
    activation_source=ACTIVATION_SOURCE,
    hidden_agg=HIDDEN_AGG,
    attn_agg=ATTN_AGG,
    logit_agg=LOGIT_AGG,
    logit_config=LOGIT_CONFIG,
    start_offset=START_OFFSET,
    end_offset=END_OFFSET
)
t3 = time.time()
print("...end!")
print_time_elapsed(t2, t3, label="Test impossible descriptors: ")


print(f"\nStart retrieving test possible descriptors from {ACTIVATION_SOURCE}...")
t4 = time.time()
res = run_prompt_and_generation_descriptor_extraction(
    model=model,
    tokenizer=tokenizer,
    dataset=id_test_dataset,
    batch_size=BATCH_SIZE,
    idx_start_sample=0,
    max_samples=len(id_test_dataset),
    save_to_pkl=False,
    output_path=id_output_path,
    build_prompt_fn=build_prompt,
    layers=LAYERS,  
    activation_source=ACTIVATION_SOURCE,
    hidden_agg=HIDDEN_AGG,
    attn_agg=ATTN_AGG,
    logit_agg=LOGIT_AGG,
    logit_config=LOGIT_CONFIG,
    start_offset=START_OFFSET,
    end_offset=END_OFFSET
)
t5 = time.time()
print("...end!")
print_time_elapsed(t4, t5, label="Test possible descriptors: ")

# Merge all batches, save as a single file and delete batch directory
_= merge_batches_and_cleanup(directory=od_output_path, delete=True, confirm='user') 
_= merge_batches_and_cleanup(directory=id_output_path, delete=True, confirm='user') 


Start retrieving test impossible embeddings from generation...


  0%|          | 0/7 [00:00<?, ?it/s]

100%|██████████| 7/7 [00:18<00:00,  2.63s/it]


...end!
Test impossible embeddings: Time elapsed: 00 min 18 sec


Start retrieving test possible embeddings from generation...


100%|██████████| 7/7 [00:20<00:00,  2.86s/it]

...end!
Test possible embeddings: Time elapsed: 00 min 20 sec

Directory '../results/raw/TEST/od_test_results_layer18_-1_score_all_hidden_attn_logit_prompt_so0_eo0' does not exist. Nothing to delete.
Directory '../results/raw/TEST/id_test_results_layer18_-1_score_all_hidden_attn_logit_prompt_so0_eo0' does not exist. Nothing to delete.





In [None]:
# Free memory 
del od_test_dataset 
del id_test_dataset

In [7]:
import torch
import numpy as np
from typing import Tuple
from src.ood_methods.ood_utils import l2_normalize
from src.analysis.evaluation import compute_metrics
# if you have cuda version 12:
# uv pip install faiss-gpu-cu12
import faiss 


def fit_to_dataset(fit_embeddings: torch.tensor) -> faiss.Index:
    """
    Constructs the FAISS index from ID data.

    Parameters
    ----------
    fit_embeddings : torch.tensor
        ID embeddings, shape (N, D)

    Returns
    -------
    faiss.Index
        FAISS index built on the ID embeddings,  ready for k-NN search
    """
    dim = fit_embeddings.shape[1]  # embedding dimension
    fit_embeddings = np.array(fit_embeddings).astype(np.float32) # Convert to array 
    norm_fit_embeddings  = l2_normalize(fit_embeddings) # Normalize embeddings

    cpu_index = faiss.IndexFlatL2(dim) # Create a flat L2 index (exact search, not approximate)

    # If GPU requested, move index to GPU
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        res = faiss.StandardGpuResources() # Allocate GPU memory
        index = faiss.index_cpu_to_gpu(res, 0, cpu_index) # Move index to GPU
    else:
        index = cpu_index

    # Add normalized ID embeddings to index
    index.add(norm_fit_embeddings)
    return index



def score_tensor(
    index: faiss.Index,
    inputs: torch.tensor,
    nearest: int = 50,
    batch_size: int = 4
) -> np.ndarray:
    """
    Compute DKNN OOD score for test embeddings.

    For each input sample, returns the distance to its k-th nearest neighbor
    (in the ID set) and the index of this neighbor in the reference set.

    Parameters
    ----------
    index : faiss.Index
        FAISS index built from ID data
    inputs : torch.tensor
        Array of test embeddings shape (N, D)
    nearest : int
        Number of nearest neighbors (k)
    batch_size : int
        Batch size for processing

    Returns
    -------
    np.ndarray
        Array of distances to the k-th nearest neighbor of shape (N,)
    np.ndarray
        Array of indices in the ID set for the k-th nearest neighbor, shape (N,).
    """

    # Convert to numpy float32 array
    inputs = np.array(inputs, dtype=np.float32)

    # Normalize the test embeddings
    norm_inputs = l2_normalize(inputs)

    # Allocate list to store distances and indices
    all_scores = []
    all_indices = []

    # Process in mini-batches to avoid memory overflow
    for i in range(0, norm_inputs.shape[0], batch_size):
        batch = norm_inputs[i:i + batch_size]               # Select batch
        distances, indices  = index.search(batch, nearest)  # FAISS k-NN search
        kth_dist = distances[:, -1]                         # Score = distance to k-th nearest neighbor
        kth_idx = indices[:, -1]                            # Index of the k-th nearest neighbor 
        all_scores.append(kth_dist)                         # Collect scores
        all_indices.append(kth_idx)                         # Collect indices
    
    # Concatenate results from all batches
    return np.concatenate(all_scores),  np.concatenate(all_indices)
### INDICES DECALES???? FAISS
    

def compute_dknn_scores(
    id_fit_embeddings: torch.Tensor,
    id_test_embeddings: torch.Tensor, 
    od_test_embeddings: torch.Tensor,
    k: int = 5,
    batch_size: int = 1000
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Compute DKNN (Deep k-Nearest Neighbors) scores for OOD detection.

    This function:
    - Fits a FAISS index on in-distribution embeddings
    - Computes DKNN scores (distance to k-th nearest neighbor) and indices for 
      both ID and OOD test samples.

    The DKNN score represents the distance to the k-th nearest neighbor in the 
    ID training set. Higher scores indicate samples that are far from the ID 
    distribution (likely OOD), while lower scores indicate samples close to 
    the ID distribution.

    Parameters
    ----------
    id_fit_embeddings : torch.Tensor
        In-distribution training embeddings used to fit the DKNN index.
        Shape: [n_fit_samples, embedding_dim]
    id_test_embeddings : torch.Tensor
        In-distribution test embeddings.
        Shape: [n_id_test_samples, embedding_dim]
    od_test_embeddings : torch.Tensor
        Out-of-distribution test embeddings.
        Shape: [n_ood_test_samples, embedding_dim]
    k : int, optional (default=5)
        Number of nearest neighbors used for scoring.
    batch_size : int, optional (default=1000)
        Batch size for scoring to manage memory.

    Returns
    -------
    dknn_scores_id : np.ndarray,  Shape: [n_id_test_samples]
        DKNN scores (distance to k-th nearest neighbor) for ID test samples. 
    dknn_scores_ood : np.ndarray, Shape: [n_ood_test_samples]
        DKNN scores for OOD test samples.  
    indices_id : np.ndarray, Shape: [n_id_test_samples]
        Indices in the ID set of the k-th nearest neighbor for each ID test sample.
    indices_ood : np.ndarray, Shape: [n_ood_test_samples]
        Indices in the ID set of the k-th nearest neighbor for each OOD test sample.
    """
    index = fit_to_dataset(id_fit_embeddings)

    dknn_scores_id, indices_id   = score_tensor(index, id_test_embeddings, nearest=k, batch_size=batch_size)
    dknn_scores_ood, indices_ood = score_tensor(index, od_test_embeddings, nearest=k, batch_size=batch_size)

    return dknn_scores_id, dknn_scores_ood, indices_id, indices_ood


In [13]:
# ===================================
# Perform DKNN
#
# A high DeepKNN score (distance to k-th NN) => OOD data (far from ID neighbors)
# A low DeepKNN score                        => ID data (close to ID neighbors)
# ===================================

# Define config
# -----------------------------------
config = {'k': 1, 'batch_size': BATCH_SIZE}

# Compute DKNN scores on test data
# -----------------------------------
scores_id, scores_ood, indices_id, indices_ood = compute_dknn_scores(
    id_fit_embeddings=id_fit_embeddings, 
    id_test_embeddings=id_test_embeddings, 
    od_test_embeddings=od_test_embeddings, 
    k=config['k'], 
    batch_size=config['batch_size']
)
print("Mean of ID scores:", np.mean(scores_id))
print("Mean of OOD scores:", np.mean(scores_ood))


Mean of ID scores: 0.007939342
Mean of OOD scores: 0.3082438


In [73]:
id_test_data['id'][0]

'5a5e1d035bc9f4001a75ae34'

In [53]:
indices_id

array([57052, 25678, 59808, ..., 32946, 52359, 60561])

In [14]:
def find_question_by_id(dataset, target_id):
    """
    Recherche la question correspondant à un id donné dans une liste de dicts SQuAD-like.

    Parameters
    ----------
    dataset : list of dict
        Liste des exemples, chaque élément doit avoir une clé 'id'
    target_id : str
        Identifiant recherché

    Returns
    -------
    question : str or None
        La question correspondante si trouvée, sinon None
    """
    for sample in dataset:
        if sample.get('id') == target_id:
            return f"Passage: {sample.get('context')}\nQuestion: {sample.get('question')}"
    return None



In [18]:
for i in range(10):
    qid = id_test_data['id'][i]
    print(qid)
    print("==== id_test_data: ====")
    res = find_question_by_id(od_test_dataset, qid) #od_test_dataset = id data in fact, 
    print(res)

    print("==== most similar id_fit_data: ====")
    context  = id_fit_dataset['context'][indices_id[i]+1]
    question = id_fit_dataset['question'][indices_id[i]+1]
    print(f"Passage: {context}\nQuestion: {question}")
    print("\n")


5a5e1d035bc9f4001a75ae34
==== id_test_data: ====
Passage: Scholars have debated the relationship and differences within āstika philosophies and with nāstika philosophies, starting with the writings of Indologists and Orientalists of the 18th and 19th centuries, which were themselves derived from limited availability of Indian literature and medieval doxographies. The various sibling traditions included in Hindu philosophies are diverse, and they are united by shared history and concepts, same textual resources, similar ontological and soteriological focus, and cosmology. While Buddhism and Jainism are considered distinct philosophies and religions, some heterodox traditions such as Cārvāka are often considered as distinct schools within Hindu philosophy.
Question: What are the two sibling traditions?
==== most similar id_fit_data: ====
Passage: The world's first Institute of Technology the Berg-Schola (Bergschule) established in Selmecbánya, Kingdom of Hungary by the Court Chamber of V

squad v2: train (answerable->ID fit, unanswerable) | val (answerable->ID test, unanswerable->OOD test)     => no theme overlap between train|val. Mais: les contextes (paragraphes Wikipédia) utilisés pour générer les questions unanswerable de SQuAD v2.0 sont les mêmes que ceux des questions answerable de SQuAD v1.1.