In [1]:
from typing import Dict, List, Union

import pandas as pd

from sp_rag.core.retrievers import RetrievalStrategy, RetrieverFactory
from sp_rag.settings import settings
from sp_rag.vectordb import QdrantVectorDB

In [2]:
from dotenv import load_dotenv

load_dotenv(override=True)

True

In [3]:
COLLECTION_NAME = "papers"
QDRANT_URL = "http://localhost:6333"

qdrant_db = QdrantVectorDB(url=QDRANT_URL, collection=COLLECTION_NAME)
vectorstore = qdrant_db.vector_store

  return torch._C._cuda_getDeviceCount() > 0


In [4]:

user_query = (
    "What are the latest transformer architectures in NLP published after 2022? Limit it to 10 results."
)

In [5]:

def get_simple_query_result(query: str) -> Dict[str, str]:
    # Just return the original query
    return {"Query": query}


def get_multi_query_results(query: str) -> Dict[str, List[str]]:
    retriever = RetrieverFactory.get_retriever(
        RetrievalStrategy.MULTI_QUERY, vectorstore
    )
    queries = retriever.llm_chain.invoke({"question": query})
    return {"Queries": queries}


def get_self_query_results(query: str) -> Dict[str, Union[str, Dict]]:
    retriever = RetrieverFactory.get_retriever(
        RetrievalStrategy.SELF_QUERY, vectorstore
    )
    query_obj = retriever.query_constructor.invoke(query)
    print(query_obj)
    return {
        "Rewritten Query": query_obj.query,
        "Filter": query_obj.filter,
    }

In [6]:
# Simple
simple_data = get_simple_query_result(user_query)
df_simple = pd.DataFrame([simple_data])
df_simple


Unnamed: 0,Query
0,What are the latest transformer architectures ...


In [7]:
# Multi-query
multi_data = get_multi_query_results(user_query)
df_multi = pd.DataFrame(multi_data["Queries"], columns=["Generated Queries"])
df_multi

Unnamed: 0,Generated Queries
0,What are the most recent transformer models in...
1,Can you list the latest advancements in transf...
2,What new transformer-based architectures in NL...


In [8]:
# Self-query
self_data = get_self_query_results(user_query)
df_self = pd.DataFrame(
    [
        {
            "Rewritten Query": self_data["Rewritten Query"],
            "Filter (metadata constraints)": str(self_data["Filter"]),
        }
    ]
)
df_self

query='transformer architectures in NLP' filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='published_date', value={'date': '2022-12-31', 'type': 'date'}) limit=None


Unnamed: 0,Rewritten Query,Filter (metadata constraints)
0,transformer architectures in NLP,comparator=<Comparator.GT: 'gt'> attribute='pu...


In [9]:
print("## SimpleRetriever Result")
display(df_simple)

print("\n## MultiQueryRetriever Generated Queries")
display(df_multi)

print("\n## SelfQueryRetriever Structured Output")
display(df_self)

## SimpleRetriever Result


Unnamed: 0,Query
0,What are the latest transformer architectures ...



## MultiQueryRetriever Generated Queries


Unnamed: 0,Generated Queries
0,What are the most recent transformer models in...
1,Can you list the latest advancements in transf...
2,What new transformer-based architectures in NL...



## SelfQueryRetriever Structured Output


Unnamed: 0,Rewritten Query,Filter (metadata constraints)
0,transformer architectures in NLP,comparator=<Comparator.GT: 'gt'> attribute='pu...
