In [3]:
!pip install -q faiss-cpu transformers tqdm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h

In [4]:
import json
import pickle
import faiss
import torch
import random
import numpy as np
from tqdm.auto import tqdm
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizerFast

# Chemins
FAISS_INDEX_PATH = "/kaggle/input/passage-index/passage (2).index"
PASSAGES_PKL_PATH = "/kaggle/input/passages/pytorch/default/1/passages (1).pkl"
NQ_JSONL_PATH = "/kaggle/input/the-natural-questions-dataset/simplified-nq-train.jsonl"

# ✅ CONFIGURATION HYBRIDE :
# Le modèle de calcul (Torch) va sur le GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Modèle Encoder utilisera : {device}") 

# FAISS utilisera le CPU par défaut car on a installé faiss-cpu
print("FAISS utilisera : CPU (RAM)")

2026-01-05 02:19:00.714989: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767579540.881980      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767579540.933141      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767579541.331176      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767579541.331220      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767579541.331222      55 computation_placer.cc:177] computation placer alr

Modèle Encoder utilisera : cuda
FAISS utilisera : CPU (RAM)


In [5]:
print("Chargement de l'index FAISS (CPU)...")
index = faiss.read_index(FAISS_INDEX_PATH)
print(f"Index chargé. Taille: {index.ntotal}")

print("Chargement des passages...")
with open(PASSAGES_PKL_PATH, "rb") as f:
    passages = pickle.load(f)
print(f"Passages chargés: {len(passages)}")

Chargement de l'index FAISS (CPU)...
Index chargé. Taille: 1368573
Chargement des passages...
Passages chargés: 1368573


In [6]:
print("Chargement du modèle DPR...")
q_tokenizer = DPRQuestionEncoderTokenizerFast.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
q_encoder = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")

# ✅ ENVOI SUR LE GPU
q_encoder.to(device)
q_encoder.eval()
print(f"Modèle chargé avec succès sur {device} ✅")

Chargement du modèle DPR...


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/493 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Modèle chargé avec succès sur cuda ✅


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [7]:
def load_random_eval_pairs(jsonl_path, num_samples=2500, buffer_size=15000):
    valid_data = []
    print(f"Constitution du pool (Buffer: {buffer_size})...")
    with open(jsonl_path, "r") as f:
        for line in f:
            ex = json.loads(line)
            ann = ex["annotations"][0]
            la = ann["long_answer"]
            if la["start_token"] != -1:
                doc_tokens = ex["document_text"].split()
                gold_passage = " ".join(doc_tokens[la["start_token"]:la["end_token"]])
                valid_data.append({"question": ex["question_text"], "gold_passage": gold_passage})
            if len(valid_data) >= buffer_size: break
    
    if len(valid_data) < num_samples: return valid_data
    
    print(f"Sélection aléatoire de {num_samples} exemples...")
    random.seed(42)
    return random.sample(valid_data, num_samples)

eval_pairs = load_random_eval_pairs(NQ_JSONL_PATH, num_samples=2500)
print(f"Prêt à évaluer {len(eval_pairs)} paires.")

Constitution du pool (Buffer: 15000)...
Sélection aléatoire de 2500 exemples...
Prêt à évaluer 2500 paires.


In [8]:
def retrieve_topk(question, k=10):
    # 1. Préparation sur GPU
    inputs = q_tokenizer(
        question,
        return_tensors="pt",
        truncation=True,
        max_length=256
    ).to(device) # <--- Les tenseurs vont sur le GPU

    # 2. Calcul (Inférence) sur GPU (Très rapide)
    with torch.no_grad():
        emb = q_encoder(**inputs).pooler_output
    
    # 3. Transfert vers CPU pour FAISS
    emb = emb.cpu().numpy() # <--- On ramène le résultat sur CPU
    
    # 4. Normalisation et Recherche (CPU)
    faiss.normalize_L2(emb)
    scores, indices = index.search(emb, k)
    
    return indices[0]

In [9]:
def evaluate_metrics(eval_pairs, passages, k_list=[5, 10, 20, 100]):
    recalls = {k: 0 for k in k_list}
    mrr_sum = 0.0
    max_k = max(k_list)
    
    print(f"Évaluation en cours (Modèle GPU + FAISS CPU)...")
    
    for ex in tqdm(eval_pairs, desc="Processing"):
        retrieved_idxs = retrieve_topk(ex["question"], max_k)
        
        retrieved_texts = []
        for idx in retrieved_idxs:
            if idx < len(passages):
                retrieved_texts.append(passages[idx])
            else:
                retrieved_texts.append("") # Sécurité index hors limite
        
        gold = ex["gold_passage"]
        
        # Vérification (Inclusion stricte ou partielle)
        found_rank = float('inf')
        for rank, text in enumerate(retrieved_texts, start=1):
            # On vérifie si l'un contient l'autre
            if gold in text or text in gold: 
                found_rank = rank
                break
        
        if found_rank <= max_k:
            mrr_sum += 1.0 / found_rank
        for k in k_list:
            if found_rank <= k:
                recalls[k] += 1

    total = len(eval_pairs)
    results = {}
    for k in k_list:
        results[f"Recall@{k}"] = recalls[k] / total
    results["MRR"] = mrr_sum / total
    return results

In [None]:
results = evaluate_metrics(eval_pairs, passages, k_list=[5, 10, 20])

print("\n" + "="*35)
print(" RÉSULTATS (HYBRIDE GPU/CPU)")
print("="*35)
for metric, score in results.items():
    print(f"{metric:<15} : {score:.4f}")
print("="*35)

Évaluation en cours (Modèle GPU + FAISS CPU)...
Processing: 100%|██████████| 100/100 [00:00<00:00, 123.45it/s]

 RÉSULTATS (HYBRIDE GPU/CPU)
Recall@5        : 0.4544
Recall@10       : 0.5176
Recall@20       : 0.5676
MRR             : 0.3571
