In [1]:
from dotenv import load_dotenv
from datasets import load_dataset
from src.embedder.dense import Embedder
from src.embedder.sparse import SparseEmbedder
from src.datasource.hybrid import HybridDatasource
from src.utils import evaluate_model, load_test_data

load_dotenv()

queries_dataset = load_dataset("CoIR-Retrieval/cosqa", "queries")["queries"]
corpus_dataset = load_dataset("CoIR-Retrieval/cosqa", "corpus")["corpus"]
default_dataset = load_dataset("CoIR-Retrieval/cosqa", "default")
test_corpus = [function for partition, function in zip(corpus_dataset["partition"], corpus_dataset["text"]) if
               partition == "test"]
test_queries = [query for partition, query in zip(queries_dataset["partition"], queries_dataset["text"]) if
                partition == "test"]


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sparse = SparseEmbedder("Qdrant/bm25")
dense_models = [
    Embedder("Qwen/Qwen3-Embedding-0.6B", 1024),
    Embedder("sentence-transformers/all-MiniLM-L6-v2", 384),
    Embedder("sentence-transformers/all-MiniLM-L6-v2", 384, load_tuned=True)
]

for dense_model in dense_models:
    db = HybridDatasource(sparse, dense_model)
    load_test_data(db, "code-test-hybrid", test_corpus, True)
    print(dense_model.model_name, "Tuned" if dense_model.is_tuned else "", "+", sparse.model_name)
    recall, mrr, ndcg = evaluate_model(db, "code-test-hybrid", test_queries, test_corpus)
    print("Recall:", recall)
    print("MRR:", mrr)
    print("NDCG:", ndcg)

Fetching 18 files: 100%|██████████| 18/18 [00:01<00:00, 15.53it/s]


Qwen/Qwen3-Embedding-0.6B  + Qdrant/bm25
Recall: 1.0
MRR: 0.7600920634920635
NDCG: 0.8167281377289535
sentence-transformers/all-MiniLM-L6-v2  + Qdrant/bm25
Recall: 1.0
MRR: 0.7307460317460317
NDCG: 0.7887414005769483
sentence-transformers/all-MiniLM-L6-v2 Tuned + Qdrant/bm25
Recall: 1.0
MRR: 0.7626404761904761
NDCG: 0.8179986274384728
