![image.png](attachment:image.png)

In [1]:
!pip install -q rank_bm25

In [2]:
from rank_bm25 import BM25Okapi
from pprint import pprint

In [3]:
corpus = [
    "EURI is an AI assistant for developers.",
    "BM25 is a ranking function based on term frequency and document length.",
    "Retrieval-augmented generation uses a retriever and a reranker.",
    "EURI helps in question answering using context from documents."
]


In [4]:
tokenized_corpus = [doc.lower().split() for doc in corpus]

In [5]:
tokenized_corpus

[['euri', 'is', 'an', 'ai', 'assistant', 'for', 'developers.'],
 ['bm25',
  'is',
  'a',
  'ranking',
  'function',
  'based',
  'on',
  'term',
  'frequency',
  'and',
  'document',
  'length.'],
 ['retrieval-augmented',
  'generation',
  'uses',
  'a',
  'retriever',
  'and',
  'a',
  'reranker.'],
 ['euri',
  'helps',
  'in',
  'question',
  'answering',
  'using',
  'context',
  'from',
  'documents.']]

In [6]:
bm25 = BM25Okapi(tokenized_corpus)

In [7]:
query = "EURI helps in question answering using context "

In [8]:
tokenized_query = query.lower().split()

In [9]:
tokenized_query

['euri', 'helps', 'in', 'question', 'answering', 'using', 'context']

In [10]:
scores = bm25.get_scores(tokenized_query)

In [11]:
scores

array([0.        , 0.        , 0.        , 5.08378716])

In [12]:
ranked = sorted(zip(corpus, scores), key=lambda x: x[1], reverse=True)

In [13]:
ranked

[('EURI helps in question answering using context from documents.',
  np.float64(5.083787162323222)),
 ('EURI is an AI assistant for developers.', np.float64(0.0)),
 ('BM25 is a ranking function based on term frequency and document length.',
  np.float64(0.0)),
 ('Retrieval-augmented generation uses a retriever and a reranker.',
  np.float64(0.0))]

In [14]:
print("🔍 Ranked Results (Highest to Lowest BM25 Score):\n")
for rank, (doc, score) in enumerate(ranked, 1):
    print(f"{rank}. [Score: {score:.2f}] {doc}")

🔍 Ranked Results (Highest to Lowest BM25 Score):

1. [Score: 5.08] EURI helps in question answering using context from documents.
2. [Score: 0.00] EURI is an AI assistant for developers.
3. [Score: 0.00] BM25 is a ranking function based on term frequency and document length.
4. [Score: 0.00] Retrieval-augmented generation uses a retriever and a reranker.
