<a href="https://colab.research.google.com/github/RegNLP/GraphRAG4RegGraph/blob/main/3_GraphRAG_Hybrid_Pipeline_with_Fine_Tuned_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the latest compatible versions of the necessary libraries
#!pip install -q sentence-transformers transformers datasets pytrec-eval rank_bm25

In [None]:
import os
import json
import torch
import pickle
import numpy as np
import pandas as pd
import pytrec_eval
import networkx as nx
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm
from rank_bm25 import BM25Okapi

# --- Configuration ---
# Path to the main data folder
data_folder = "/content/drive/MyDrive/Colab Notebooks/modeldatalocation"

# --- Models to Evaluate ---
# Path to the folder containing all your fine-tuned embedding models (Retrievers)
finetuned_embedding_models_folder = os.path.join(data_folder, "finetuned_embedding_models_hard_neg")
embedding_models_to_evaluate = {
    "BGE_Large_FT": os.path.join(finetuned_embedding_models_folder, "BGE_Large_FT"),
    "MPNet_FT": os.path.join(finetuned_embedding_models_folder, "MPNet_FT"),
    "E5_Large_FT": os.path.join(finetuned_embedding_models_folder, "E5_Large_FT")
}

# Path to the folder containing all your fine-tuned cross-encoder models (Re-rankers)
finetuned_cross_encoder_models_folder = os.path.join(data_folder, "fine_tuned_models_hard_neg")
cross_encoder_models_to_evaluate = {
    "MiniLM_CrossEncoder": os.path.join(finetuned_cross_encoder_models_folder, "MiniLM_CrossEncoder"),
    "MPNet_CrossEncoder": os.path.join(finetuned_cross_encoder_models_folder, "MPNet_CrossEncoder"),
    "MSMarco_CrossEncoder": os.path.join(finetuned_cross_encoder_models_folder, "MSMarco_CrossEncoder"),
    "BERT_CrossEncoder": os.path.join(finetuned_cross_encoder_models_folder, "BERT_CrossEncoder")
}

# Path to the graph and test questions
graph_path = os.path.join(data_folder, "graph.gpickle")
test_set_path = os.path.join(data_folder, "ObliQA_MultiPassage_test.json")

# --- Pipeline Parameters ---
K_INITIAL = 100
K_GRAPH = 25
K_FINAL = 20
PARENT_BONUS = 0.01
CITATION_BONUS = 0.02

# --- Load Static Components (that don't change between loops) ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print("Loading graph and test data...")
with open(graph_path, "rb") as f:
    G = pickle.load(f)
with open(test_set_path, "r", encoding="utf-8") as f:
    test_data = json.load(f)

# Get all passage UIDs and text from the graph
all_passage_uids = [node_id for node_id, data in G.nodes(data=True) if data.get("type") == "Passage"]
corpus = [G.nodes[uid].get("text", "") for uid in all_passage_uids]

# Build the BM25 index once
print("Building BM25 index...")
tokenized_corpus = [doc.split(" ") for doc in corpus]
bm25 = BM25Okapi(tokenized_corpus)

print("All static components loaded successfully.")

# --- Reciprocal Rank Fusion (RRF) Function ---
def reciprocal_rank_fusion(ranked_lists, k=60):
    fused_scores = {}
    for doc_list in ranked_lists:
        for i, doc_uid in enumerate(doc_list):
            rank = i + 1
            if doc_uid not in fused_scores:
                fused_scores[doc_uid] = 0
            fused_scores[doc_uid] += 1 / (k + rank)

    reranked_results = sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    return [doc for doc, score in reranked_results]

# --- Main Evaluation Loop ---
all_evaluation_results = []

# Outer loop: Iterate through each fine-tuned embedding model (Retriever)
for retriever_name, retriever_path in embedding_models_to_evaluate.items():
    print("\n" + "="*80)
    print(f"--- Preparing Retriever: {retriever_name} ---")
    print("="*80)

    # --- 1. Generate Embeddings with the current Retriever ---
    print(f"Loading fine-tuned embedding model from: {retriever_path}")
    embedding_model = SentenceTransformer(retriever_path, device=device)

    print("Generating new passage embeddings...")
    embeddings = embedding_model.encode(corpus, show_progress_bar=True, batch_size=16)
    embeddings_tensor = torch.tensor(embeddings).to(device)

    # Inner loop: Iterate through each fine-tuned cross-encoder model (Re-ranker)
    for reranker_name, reranker_path in cross_encoder_models_to_evaluate.items():
        print("\n" + "-"*80)
        print(f"--- Evaluating Pipeline: [Retriever: {retriever_name}] + [Re-ranker: {reranker_name}] ---")
        print("-" * 80)

        # --- Load the current Re-ranker ---
        print(f"Loading Cross-Encoder model from: {reranker_path}")
        cross_encoder_tokenizer = AutoTokenizer.from_pretrained(reranker_path)
        cross_encoder_model = AutoModelForSequenceClassification.from_pretrained(reranker_path).to(device)
        cross_encoder_model.eval()

        # --- 2. Run the Full Hybrid Pipeline ---
        all_retrieved_results = {}
        print(f"Running hybrid pipeline for {len(test_data)} questions...")
        for q in tqdm(test_data, desc=f"Processing Queries for {retriever_name} + {reranker_name}"):
            qid = q["QuestionID"]
            question = q["Question"]

            # Stage 1: Hybrid Retrieval
            query_embedding = embedding_model.encode(question, convert_to_tensor=True)
            cos_scores = util.pytorch_cos_sim(query_embedding, embeddings_tensor)[0]
            semantic_results = torch.topk(cos_scores, k=min(K_INITIAL, len(all_passage_uids)))
            semantic_uids = [all_passage_uids[i] for i in semantic_results.indices.tolist()]

            tokenized_query = question.split(" ")
            bm25_scores = bm25.get_scores(tokenized_query)
            top_bm25_indices = np.argsort(bm25_scores)[::-1][:min(K_INITIAL, len(all_passage_uids))]
            bm25_uids = [all_passage_uids[i] for i in top_bm25_indices]

            fused_uids = reciprocal_rank_fusion([semantic_uids, bm25_uids])

            uid_to_initial_score = {uid: score for uid, score in zip(semantic_uids, semantic_results.values.tolist())}

            initial_candidates = []
            for uid in fused_uids:
                initial_candidates.append({
                    "internal_uid": uid,
                    "initial_score": uid_to_initial_score.get(uid, 0),
                    "graph_bonus": 0.0
                })

            retrieved_uids_set = {cand["internal_uid"] for cand in initial_candidates}

            # Stage 2: Graph-Based Re-ranking
            for i, cand in enumerate(initial_candidates):
                current_uid = cand["internal_uid"]
                for parent_uid in G.predecessors(current_uid):
                    if G.get_edge_data(parent_uid, current_uid, {}).get("type") == "PARENT_OF":
                        if parent_uid in retrieved_uids_set:
                            initial_candidates[i]["graph_bonus"] += PARENT_BONUS
                        break
                for predecessor_uid in G.predecessors(current_uid):
                    if G.get_edge_data(predecessor_uid, current_uid, {}).get("type") == "CITED_BY":
                        if predecessor_uid in retrieved_uids_set:
                            initial_candidates[i]["graph_bonus"] += CITATION_BONUS

            for cand in initial_candidates:
                cand["graph_score"] = cand["initial_score"] + cand["graph_bonus"]

            graph_reranked = sorted(initial_candidates, key=lambda x: x["graph_score"], reverse=True)

            # Stage 3: Cross-Encoder Re-ranking
            cross_encoder_candidates = graph_reranked[:K_GRAPH]

            ce_input_pairs = [[question, G.nodes[cand["internal_uid"]].get("text", "")] for cand in cross_encoder_candidates]

            with torch.no_grad():
                # FIX: Explicitly set max_length to prevent runtime error with certain models
                inputs = cross_encoder_tokenizer(ce_input_pairs, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
                outputs = cross_encoder_model(**inputs)
                scores = torch.softmax(outputs.logits, dim=1)[:, 1].cpu().numpy()

            for i, cand in enumerate(cross_encoder_candidates):
                cand["final_score"] = scores[i]

            final_reranked = sorted(cross_encoder_candidates, key=lambda x: x["final_score"], reverse=True)

            final_results = []
            for cand in final_reranked[:K_FINAL]:
                uid = cand["internal_uid"]
                node = G.nodes[uid]
                final_results.append({
                    "uid": f"{node.get('document_id', '')}|||{node.get('passage_id', '')}",
                    "score": float(cand["final_score"])
                })
            all_retrieved_results[qid] = final_results

        # --- 3. Evaluate the Results for the Current Combination ---
        eval_folder = os.path.join(data_folder, "evaluation_files")
        qrel_path = os.path.join(eval_folder, "qrels.trec")

        qrel = {}
        with open(qrel_path, "r", encoding="utf-8") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 4:
                    qid, _, uid, rel = parts[0], parts[1], " ".join(parts[2:-1]), parts[-1]
                    qrel.setdefault(qid, {})[uid] = int(rel)

        evaluator = pytrec_eval.RelevanceEvaluator(qrel, {"recall", "map_cut", "ndcg_cut"})

        run = {qid: {p["uid"]: p["score"] for p in passages} for qid, passages in all_retrieved_results.items()}
        results = evaluator.evaluate(run)

        R_10, MAP_10, NDCG_10 = 0.0, 0.0, 0.0
        num_queries = len(results)
        for qid in results:
            R_10 += results[qid].get("recall_10", 0.0)
            MAP_10 += results[qid].get("map_cut_10", 0.0)
            NDCG_10 += results[qid].get("ndcg_cut_10", 0.0)

        all_evaluation_results.append({
            "Model": f"Retriever: {retriever_name} | Re-ranker: {reranker_name}",
            "Recall@10": R_10 / num_queries,
            "MAP@10": MAP_10 / num_queries,
            "nDCG@10": NDCG_10 / num_queries
        })




Using device: cuda
Loading graph and test data...
Building BM25 index...
All static components loaded successfully.

--- Preparing Retriever: BGE_Large_FT ---
Loading fine-tuned embedding model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/finetuned_embedding_models_hard_neg/BGE_Large_FT
Generating new passage embeddings...


Batches:   0%|          | 0/859 [00:00<?, ?it/s]


--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: BGE_Large_FT] + [Re-ranker: MiniLM_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MiniLM_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for BGE_Large_FT + MiniLM_CrossEncoder: 100%|██████████| 447/447 [01:20<00:00,  5.58it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: BGE_Large_FT] + [Re-ranker: MPNet_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MPNet_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for BGE_Large_FT + MPNet_CrossEncoder: 100%|██████████| 447/447 [03:11<00:00,  2.33it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: BGE_Large_FT] + [Re-ranker: MSMarco_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MSMarco_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for BGE_Large_FT + MSMarco_CrossEncoder: 100%|██████████| 447/447 [01:35<00:00,  4.69it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: BGE_Large_FT] + [Re-ranker: BERT_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/BERT_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for BGE_Large_FT + BERT_CrossEncoder: 100%|██████████| 447/447 [02:42<00:00,  2.75it/s]



--- Preparing Retriever: MPNet_FT ---
Loading fine-tuned embedding model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/finetuned_embedding_models_hard_neg/MPNet_FT
Generating new passage embeddings...


Batches:   0%|          | 0/859 [00:00<?, ?it/s]


--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: MPNet_FT] + [Re-ranker: MiniLM_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MiniLM_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for MPNet_FT + MiniLM_CrossEncoder: 100%|██████████| 447/447 [01:16<00:00,  5.81it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: MPNet_FT] + [Re-ranker: MPNet_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MPNet_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for MPNet_FT + MPNet_CrossEncoder: 100%|██████████| 447/447 [03:15<00:00,  2.29it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: MPNet_FT] + [Re-ranker: MSMarco_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MSMarco_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for MPNet_FT + MSMarco_CrossEncoder: 100%|██████████| 447/447 [01:32<00:00,  4.82it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: MPNet_FT] + [Re-ranker: BERT_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/BERT_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for MPNet_FT + BERT_CrossEncoder: 100%|██████████| 447/447 [02:45<00:00,  2.71it/s]



--- Preparing Retriever: E5_Large_FT ---
Loading fine-tuned embedding model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/finetuned_embedding_models_hard_neg/E5_Large_FT
Generating new passage embeddings...


Batches:   0%|          | 0/859 [00:00<?, ?it/s]


--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: E5_Large_FT] + [Re-ranker: MiniLM_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MiniLM_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for E5_Large_FT + MiniLM_CrossEncoder: 100%|██████████| 447/447 [01:19<00:00,  5.60it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: E5_Large_FT] + [Re-ranker: MPNet_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MPNet_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for E5_Large_FT + MPNet_CrossEncoder: 100%|██████████| 447/447 [03:09<00:00,  2.36it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: E5_Large_FT] + [Re-ranker: MSMarco_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/MSMarco_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for E5_Large_FT + MSMarco_CrossEncoder: 100%|██████████| 447/447 [01:34<00:00,  4.74it/s]



--------------------------------------------------------------------------------
--- Evaluating Pipeline: [Retriever: E5_Large_FT] + [Re-ranker: BERT_CrossEncoder] ---
--------------------------------------------------------------------------------
Loading Cross-Encoder model from: /content/drive/MyDrive/Colab Notebooks/modeldatalocation/fine_tuned_models_hard_neg/BERT_CrossEncoder
Running hybrid pipeline for 447 questions...


Processing Queries for E5_Large_FT + BERT_CrossEncoder: 100%|██████████| 447/447 [02:40<00:00,  2.79it/s]


In [None]:
# --- Final Comparison ---
print("\n" + "="*80)
print("--- FINAL MODEL PERFORMANCE COMPARISON ---")
print("="*80)

df_results = pd.DataFrame(all_evaluation_results)
df_results = df_results.sort_values(by="MAP@10", ascending=False)

print(df_results.to_string(index=False, float_format="%.4f"))

final_comparison_path = os.path.join(data_folder, "final_model_combination_comparison.csv")
df_results.to_csv(final_comparison_path, index=False)
print(f"\n📁 Final comparison saved to: {final_comparison_path}")


--- FINAL MODEL PERFORMANCE COMPARISON ---
                                                    Model  Recall@10  MAP@10  nDCG@10
   Retriever: E5_Large_FT | Re-ranker: MPNet_CrossEncoder     0.4488  0.3453   0.4317
      Retriever: MPNet_FT | Re-ranker: MPNet_CrossEncoder     0.4363  0.3377   0.4217
  Retriever: E5_Large_FT | Re-ranker: MiniLM_CrossEncoder     0.4457  0.3374   0.4241
    Retriever: E5_Large_FT | Re-ranker: BERT_CrossEncoder     0.4560  0.3351   0.4236
  Retriever: BGE_Large_FT | Re-ranker: MPNet_CrossEncoder     0.4416  0.3345   0.4197
     Retriever: MPNet_FT | Re-ranker: MiniLM_CrossEncoder     0.4336  0.3312   0.4165
 Retriever: BGE_Large_FT | Re-ranker: MiniLM_CrossEncoder     0.4333  0.3266   0.4119
   Retriever: BGE_Large_FT | Re-ranker: BERT_CrossEncoder     0.4421  0.3230   0.4103
       Retriever: MPNet_FT | Re-ranker: BERT_CrossEncoder     0.4347  0.3194   0.4066
 Retriever: E5_Large_FT | Re-ranker: MSMarco_CrossEncoder     0.4306  0.2752   0.3649
Retriever:

In [None]:
## Import up sound alert dependencies
from IPython.display import Audio, display

def allDone():
  display(Audio(url='https://sound.peal.io/ps/audios/000/000/537/original/woo_vu_luvub_dub_dub.wav', autoplay=True))
## Insert whatever audio file you want above

In [None]:
allDone()