In [1]:
import pandas as pd
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
)
from dataclasses import dataclass
from typing import Tuple, List, Optional, Mapping, Any

torch.manual_seed(101)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Loading bm-25 ranked documents

df = pd.read_json("../data/bm_25_ranking.json")
df.head()

Unnamed: 0,rel_rank,query,doc
0,1,foods and supplements to lower blood sugar,A healthy diet is essential to reversing predi...
1,2,foods and supplements to lower blood sugar,"Ohio State University, researchers saw insulin..."
2,3,foods and supplements to lower blood sugar,Food And Supplements That Lower Blood Sugar Le...
3,4,foods and supplements to lower blood sugar,But studies show that eating certain foods may...
4,5,foods and supplements to lower blood sugar,7 easy tips to reduce insulin and lose belly f...


In [3]:
# Re-rank documents using fine-tuned model


class Reranker():

    def __init__(self, model_name):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def rank_documents(self,
                       query: str,
                       documents: List[str],
                       top_k: int = 10) -> List[Tuple[str, float]]:
        
        scores = []
        for doc in documents:
            input_text = f"Query: {query} Document: {doc} Relevant: "
            inputs = self.tokenizer(
                input_text,
                return_tensors="pt",
                max_length=512,
                truncation=True
            ).to(self.device)
        
            with torch.no_grad():
                outputs = self.model(**inputs, labels=inputs["input_ids"])
                score = outputs.loss.item()
            scores.append(score)

        ranked_docs = list(zip(documents, scores))
        ranked_docs.sort(key=lambda x: x[1], reverse=True)

        return ranked_docs[:top_k]

    def print_ranking(self,
                      query: str,
                      documents: List[str]) -> None:
        ranked_results = self.rank_documents(query, documents)
        print(f"\n {query} \n")
        for rank, (doc, score) in enumerate(ranked_results, 1):
            print(f"{rank}. Document: {doc[:100]}...\n")

In [4]:
# Example of a re-rank on 1000 documents 

query = df["query"].unique()[0]
relevant_documents = df[df["query"] == query]["doc"].values

In [5]:
model_1 = "pratham4521/T5-base-masmarco-finetuned"
ranker = Reranker(model_name = model_1)
ranker.print_ranking(query, relevant_documents)

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.



 foods and supplements to lower blood sugar 

1. Document: Choosing healthful meal and snack options can help you avoid heart disease and its complications. Be...

2. Document: (That's just from supplements; it's okay if your daily magnesium intake is higher than 350 mg if you...

3. Document: Whether youâve been diagnosed with diabetes or simply monitor your blood sugar levels to track you...

4. Document: Before you treat for hypoglycemia, and if you are able, you should check your blood sugar level to m...

5. Document: There are many approaches you can take when dealing with your diabetes treatment plan and how to sta...

6. Document: GINGER TEA LOWERS BLOOD SUGAR. My balanced carb and protein diet is stabilized at 25gm fat, 100gm ca...

7. Document: Side effects to some ingredients of high-dose pyridoxine hydrochloride (which is injected into the v...

8. Document: So white bread is classified as having a high glycemic index. Brown rice, in contrast, is digested m...

9. Docume