In [9]:
from langchain_community.embeddings import DeepInfraEmbeddings
from dotenv import load_dotenv
import os
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_chroma import Chroma
from langchain_community.embeddings import DeepInfraEmbeddings

load_dotenv()

query = "What is solvency II?"
k=10

embedding_model = DeepInfraEmbeddings(model_id="Qwen/Qwen3-Embedding-8B", deepinfra_api_token=os.environ["DEEPINFRA_API_KEY"])

vectorstore = Chroma("QWEN_SOLVENCY_II_V1", embedding_model, os.path.join("data", "vector_stores", "solvency-II-files"))
retriever = vectorstore.as_retriever(search_kwargs={'k': k})

# Qwen3 Reranker

In [None]:
import requests
import json
from langchain_core.documents.base import Document
from langchain_core.documents import BaseDocumentCompressor
from typing import Any, Dict, List, Optional, Sequence, Union
from langchain_core.callbacks.manager import Callbacks
from copy import deepcopy
from langchain_core.utils import secret_from_env
from pydantic import ConfigDict, Field, SecretStr, model_validator



class Qwen3Reranker(BaseDocumentCompressor):
    model: Optional[str] = None
    """Model to use for reranking. Mandatory to specify the model name."""

    top_n: Optional[int] = 3
    """Number of documents to return."""

    api_key: Optional[SecretStr] = Field(
        default_factory=secret_from_env("COHERE_API_KEY", default=None)
    )
    """DeepInfra API key. Must be specified directly"""

    def __init__(self, model, api_key):
        super().__init__()
        self.model = model
        self.api_key = api_key

    """
    Example output: [{'index': 0, 'relevance_score': 0.86413884},
    {'index': 2, 'relevance_score': 0.15784983},
    {'index': 1, 'relevance_score': 0.01999476}]
    """
    def rerank(self, documents: Sequence[Union[str, Document, dict]],
        query: str,
        top_n: Optional[int] = None,) -> List[Dict[str, Any]]:
        # max chunks size of 32,768 tokens

        # Define the URL
        url = f"https://api.deepinfra.com/v1/inference/Qwen/{self.model}"

        # Define the headers
        headers = {
            "Authorization": f"bearer {self.api_key}",
            "Content-Type": "application/json"
        }

        # query = "What is the capital of United States of America?"
        # documents = ["The capital of USA is Washington DC.", "Pototoes are a type of vegetable.", "The capital of France is Paris."]
        queries = [query] * len(documents)

        page_contents = []

        print(f"Documents: {documents}")
        for document in documents:
            if isinstance(document, Document):
                page_contents.append(document.page_content)
            elif isinstance(document, str):
                page_contents.append(document)
            else:
                raise Exception("Not Implemented: should be of type langchain Document")

        # Define the data payload
        data = {
            "queries": queries,
            "documents": page_contents,
        }

        # Make the POST request
        response = requests.post(url, headers=headers, data=json.dumps(data))

        # Print the response from the server
        # print(response.json())

        reranked_format = []

        scores = response.json()['scores']


        top_n = top_n if (top_n is None or top_n > 0) else self.top_n

        indexed_scores = list(enumerate(scores))

        sorted_scores = sorted(indexed_scores, key=lambda item: item[1], reverse=True)

        top_n_slice = sorted_scores[:top_n]

        reranked_format = [
            {
                "index": original_index,
                "relevance_score": score
            }
            for original_index, score in top_n_slice
        ]
        return reranked_format
    
    def compress_documents(
        self,
        documents: Sequence[Document],
        query: str,
        callbacks: Optional[Callbacks] = None,
    ) -> Sequence[Document]:
        """
        Compress documents using Deepinfra Qwen-3's rerank API.

        Args:
            documents: A sequence of documents to compress.
            query: The query to use for compressing the documents.
            callbacks: Callbacks to run during the compression process.

        Returns:
            A sequence of compressed documents.
        """
        compressed = []
        for res in self.rerank(documents, query):
            doc = documents[res["index"]]
            doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata))
            doc_copy.metadata["relevance_score"] = res["relevance_score"]
            compressed.append(doc_copy)
        return compressed
    

reranker = Qwen3Reranker(model="Qwen3-Reranker-8B", api_key=os.environ["DEEPINFRA_API_KEY"])

# reranker.rerank(query=query, documents=retriever.get_relevant_documents(query=query))

compressor = ContextualCompressionRetriever(base_compressor=reranker, 
                                            base_retriever=retriever
)

compressor.invoke(
    query
)

Documents: [Document(id='cb3c9f6b-0b41-4958-bc9a-87d4741a0673', metadata={'total_pages': 14, 'title': '', 'creator': 'Microsoft® Word 2010', 'format': 'PDF 1.5', 'author': 'Beatriz Ferriz', 'subject': '', 'creationDate': "D:20150130154103+01'00'", 'source': 'data\\raw\\solvency-II-files\\guidelines-level 3-v0.1 - TRUNCATED\\Guidelines on supervisory review process.pdf', 'moddate': '2015-01-30T15:41:03+01:00', 'file_path': 'data\\raw\\solvency-II-files\\guidelines-level 3-v0.1 - TRUNCATED\\Guidelines on supervisory review process.pdf', 'trapped': '', 'modDate': "D:20150130154103+01'00'", 'producer': 'Microsoft® Word 2010', 'keywords': '', 'Header 2': '\uf0b7 “College” refers to the college of supervisors as defined in Article', 'creationdate': '2015-01-30T15:41:03+01:00'}, page_content='212(1)(e) of the Solvency II Directive;'), Document(id='fa763a05-d731-440b-b4a4-5e69b00cd6c2', metadata={'trapped': '', 'producer': 'Microsoft® Word 2010', 'title': '', 'author': 'Katalin Almasi', 'subje

[Document(metadata={'total_pages': 14, 'title': '', 'creator': 'Microsoft® Word 2010', 'format': 'PDF 1.5', 'author': 'Beatriz Ferriz', 'subject': '', 'creationDate': "D:20150130154103+01'00'", 'source': 'data\\raw\\solvency-II-files\\guidelines-level 3-v0.1 - TRUNCATED\\Guidelines on supervisory review process.pdf', 'moddate': '2015-01-30T15:41:03+01:00', 'file_path': 'data\\raw\\solvency-II-files\\guidelines-level 3-v0.1 - TRUNCATED\\Guidelines on supervisory review process.pdf', 'trapped': '', 'modDate': "D:20150130154103+01'00'", 'producer': 'Microsoft® Word 2010', 'keywords': '', 'Header 2': '\uf0b7 “College” refers to the college of supervisors as defined in Article', 'creationdate': '2015-01-30T15:41:03+01:00', 'relevance_score': 0.31742626428604126}, page_content='212(1)(e) of the Solvency II Directive;'),
 Document(metadata={'trapped': '', 'producer': 'Microsoft® Word 2010', 'title': '', 'author': 'Katalin Almasi', 'subject': '', 'creationDate': "D:20150709105650+02'00'", 'cre

In [34]:
# test reranker
# test queries and documents
test_query="What is the capital of United States of America?"
test_documents=[
    "Pototoes are a type of vegetable.",
    "The capital of USA is Washington DC.",
    "The capital of France is Paris."
]

reranker = Qwen3Reranker(model="Qwen3-Reranker-8B", api_key=os.environ["DEEPINFRA_API_KEY"])

reranker.rerank(documents=test_documents, query=test_query)

# compressor = ContextualCompressionRetriever(base_compressor=reranker, 
#                                             base_retriever=Chroma.from_documents(test_docuemnts, embedding_model)
# )

Documents: ['Pototoes are a type of vegetable.', 'The capital of USA is Washington DC.', 'The capital of France is Paris.']


[{'index': 0, 'relevance_score': 1.9947297005273867e-06},
 {'index': 1, 'relevance_score': 0.961533784866333},
 {'index': 2, 'relevance_score': 0.0002959570847451687}]

# AWS Bedrock - do not have permission on account

In [24]:
from langchain_aws import BedrockRerank


model_arn = f"arn:aws:bedrock:us-east-1:{os.environ["AWS_ACCOUNT_ID"]}:inference-profile/us.cohere.rerank-v3-5:0"

reranker = BedrockRerank(
    model_arn=model_arn,
    # aws_bearer_token=os.environ["AWS_BEARER_TOKEN_BEDROCK"],
)

reranker.rerank(
    query="What is the capital of United States of America?",
    documents=[
        "The capital of USA is Washington DC.",
        "Pototoes are a type of vegetable.",
        "The capital of France is Paris."
    ])

AccessDeniedException: An error occurred (AccessDeniedException) when calling the Rerank operation: You are not authorized to invoke the Rerank operation.

# Cohere

In [17]:
from langchain_cohere import CohereRerank

# test reranker
# test queries and documents
test_query="What is the capital of United States of America?"
test_documents=[
    "The capital of USA is Washington DC.",
    "Pototoes are a type of vegetable.",
    "The capital of France is Paris."
]

reranker = CohereRerank(
    model="rerank-v3.5"
)

responses = reranker.rerank(
    query=test_query,
    documents=test_documents
)

In [19]:
for response in responses:
    print(test_documents[response["index"]])

The capital of USA is Washington DC.
The capital of France is Paris.
Pototoes are a type of vegetable.


In [20]:
responses

[{'index': 0, 'relevance_score': 0.86413884},
 {'index': 2, 'relevance_score': 0.15784983},
 {'index': 1, 'relevance_score': 0.01999476}]

In [30]:
from langchain_cohere import CohereRerank
# https://docs.cohere.com/docs/reranking-best-practices

# max chunks size of 4096 tokens



reranker = CohereRerank(
    model="rerank-v3.5"
)


compressor = ContextualCompressionRetriever(base_compressor=reranker, 
                                            base_retriever=retriever
)

compressor.invoke(
    query
)

[Document(metadata={'total_pages': 8, 'subject': '', 'title': '', 'file_path': 'data\\raw\\solvency-II-files\\guidelines-level 3-v0.1 - TRUNCATED\\Guidelines on Own Risk Solvency Assessment .pdf', 'trapped': '', 'creationDate': "D:20150909140814+02'00'", 'author': '', 'Header 2': '**Guidelines on own risk and solvency assessment **', 'modDate': "D:20150909140814+02'00'", 'keywords': '', 'format': 'PDF 1.5', 'producer': 'Microsoft® Word 2010', 'creator': 'Microsoft® Word 2010', 'creationdate': '2015-09-09T14:08:14+02:00', 'source': 'data\\raw\\solvency-II-files\\guidelines-level 3-v0.1 - TRUNCATED\\Guidelines on Own Risk Solvency Assessment .pdf', 'moddate': '2015-09-09T14:08:14+02:00', 'relevance_score': 0.7443038}, page_content='**1.** **Introduction**\n\n1.1. According to Article 16 of Regulation (EU) No. 1094/2010 of the European\nParliament and of the Council of 24 November 2010 establishing a European\nSupervisory Authority (hereinafter “EIOPA Regulation”) [1,] EIOPA issues these\