In [1]:
import pandas as pd
from tqdm.auto import tqdm

### Retrieval evaluation                                                                                                                                                              
The retrieval evaluation is about how good the search is performing

In [2]:
df_question = pd.read_csv('data/ground-truth-retrieval.csv')

In [3]:
ground_truth = df_question.to_dict(orient='records')

In [4]:
ground_truth[0]

{'id': 0,
 'question': "What do I need to do after clicking the 'Sign Up' button?"}

In [5]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [6]:
from qdrant_client import QdrantClient, models

In [7]:
qdrant_client = QdrantClient("http://localhost:6333")

In [8]:
def qdrant_search(query, limit=5):

    collection_name="project"
    model_handle = "jinaai/jina-embeddings-v2-small-en"
    
    result_points = qdrant_client.query_points(
        collection_name=collection_name,
        query=models.Document( #embed the query text locally with "jinaai/jina-embeddings-v2-small-en"
            text=query,
            model=model_handle 
        ),
        limit=limit, # top closest matches
        with_payload=True #to get metadata in the results
    )

    results = [point.payload for point in result_points.points]
    
    return results

In [32]:
def rrf_search(query: str, limit: int = 1) -> list[models.ScoredPoint]:
    result_points = qdrant_client.query_points(
        collection_name="project-sparse-and-dense",
        prefetch=[
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model="jinaai/jina-embeddings-v2-small-en",
                ),
                using="jina-small",
                limit=(3 * limit),
            ),
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model="Qdrant/bm25",
                ),
                using="bm25",
                limit=(3 * limit),
            ),
        ],
        # Fusion query enables fusion on the prefetched results
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        with_payload=True,
    )


    results = [point.payload for point in result_points.points]

    return results

In [23]:
relevance_total = []
doc_id = q['id']
results = qdrant_search(q['question'])
relevance = [d['id'] == doc_id for d in results]
relevance_total.append(relevance)

In [30]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q['question'])
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [33]:
evaluate(ground_truth, rrf_search)

  0%|          | 0/395 [00:00<?, ?it/s]

{'hit_rate': 0.9063291139240506, 'mrr': 0.7727004219409283}