In [5]:
def _rrf(result_sets_with_rankings, weights, field, k=60, normalize=False, INSPECT=False):
    combined_results = {}

    # Process each result set and apply reciprocal rank formula
    for result_set, results in result_sets_with_rankings.items():
        weight = weights[result_set]
        for rank, result in enumerate(results, 1):
            _id = result['_id']
            if _id in combined_results:
                combined_results[_id]["score"] += weight / (k + rank)
            else:
                combined_results[_id] = {
                    "score": weight / (k + rank),
                    "text": result["_source"].get(field, '')
                }

    # Sort results by score in descending order
    sorted_results = sorted(combined_results.items(), key=lambda item: item[1]['score'], reverse=True)

    # Optionally normalize scores
    if normalize:
        min_score = min(result["score"] for _, result in sorted_results)
        max_score = max(result["score"] for _, result in sorted_results)
        for _, result in sorted_results:
            result["normalized_score"] = (result["score"] - min_score) / (max_score - min_score) if max_score > min_score else 0

    # Optionally inspect final results
    if INSPECT:
        print("FINAL")
        for _id, res in sorted_results:
            print(f"ID: {_id}, Score: {res['score']:.6f}, Snippet: {res['text'][:100]}...")

    return sorted_results

In [6]:
match_results = [
    {'_id': 'doc1', '_source': {'title': 'Introduction to RRF'}},
    {'_id': 'doc2', '_source': {'title': 'RRF in Practice'}},
    {'_id': 'doc3', '_source': {'title': 'Advanced RRF Techniques'}}
]

knn_results = [
    {'_id': 'doc3', '_source': {'title': 'Advanced RRF Techniques'}},
    {'_id': 'doc1', '_source': {'title': 'Introduction to RRF'}},
    {'_id': 'doc4', '_source': {'title': 'RRF Case Studies'}}
]

weights = {
    "match": 1.0,  # Assign equal weight for simplicity
    "knn": 1.0
}

result_sets = {
    "match": match_results,
    "knn": knn_results
}

field = 'title'  # Field to use for displaying the document's title

In [8]:
_rrf(result_sets, weights, field, normalize=True)

[('doc1',
  {'score': 0.03252247488101534,
   'text': 'Introduction to RRF',
   'normalized_score': 1.0}),
 ('doc3',
  {'score': 0.032266458495966696,
   'text': 'Advanced RRF Techniques',
   'normalized_score': 0.9846231409125283}),
 ('doc2',
  {'score': 0.016129032258064516,
   'text': 'RRF in Practice',
   'normalized_score': 0.015376859087471663}),
 ('doc4',
  {'score': 0.015873015873015872,
   'text': 'RRF Case Studies',
   'normalized_score': 0.0})]