In [1]:
sql_query = f"""
SELECT 
    chunk_id, 
    doc_id, 
    text, 
    metadata, 
    -- 1 minus distance gives us a similarity score (closer to 1.0 is better)
    (1 - (embedding <=> %s::vector))::FLOAT AS similarity_score
FROM chunks
WHERE status = 'active'
-- Order by similarity (closest vectors first)
ORDER BY embedding <=> %s::vector
-- Limit to top matches
LIMIT %s;
"""

In [2]:
keyword_query = f"""
SELECT 
    chunk_id, 
    doc_id, 
    text, 
    metadata,
    -- ts_rank_cd calculates a relevancy score based on word frequency and proximity
    ts_rank_cd(body_search, plainto_tsquery('english', %s::vector)) AS keyword_score
FROM chunks
WHERE body_search @@ plainto_tsquery('english', %s::vetor)
  AND status = 'active'
ORDER BY keyword_score DESC
LIMIT %s;
"""

In [3]:
from agents.db_hooks import get_db_connection

In [4]:
def retrieve_similar_chunks(query_embedding, top_k=10):
    
    with get_db_connection() as conn:
        with conn.cursor() as cur:
            cur.execute(sql_query, (query_embedding, query_embedding, top_k))
            results_semantic = cur.fetchall()
            cur.execute(keyword_query, (query_embedding, query_embedding, top_k))
            results_keyword = cur.fetchall()
            return results_semantic, results_keyword

In [5]:
from utils.get_embedd_model import embedding_model
model = embedding_model()

  from .autonotebook import tqdm as notebook_tqdm


Embedding model initialized


In [6]:
from utils.get_llm import get_llm
llm = get_llm()

In [7]:
from pydantic import BaseModel

In [8]:
class RefinedQuery(BaseModel):
    refined_query: str

In [9]:
class RetrievalPipeline:
    def __init__(self, embedding_model, llm, top_k=5):
        self.embedding_model = embedding_model
        self.llm = llm
        self.top_k = top_k
    def query_refiner_agent(user_raw_query: str, chat_history: list = None) -> str:
        """
        Refines the user query for better Vector and Keyword search performance.
        """
        prompt = f"""
        You are an expert search optimizer for a Banking RAG system.
        Your goal is to rewrite the user's query to be more descriptive for a search engine.
        
        Original Query: {user_raw_query}
        Chat History: {chat_history if chat_history else "None"}
        
        Instructions:
        1. Expand banking abbreviations.
        2. Add synonyms (e.g., if user says 'steps', add 'procedure').
        3. Keep it concise but information-dense.
        4. Return ONLY the refined query string.
        5. Do not add any explanations or extra text, just the refined query.
        """
        
        # Call your LLM here
        refined_query = llm.with_structured_output(RefinedQuery).invoke(prompt)
        return refined_query.refined_query

    def retrieve(self, query):
        query_embedding = self.embedding_model.embed_query(query)
        return retrieve_similar_chunks(query_embedding, self.top_k)

    def generate_response(self, query):
        retrieved_chunks_sem,retrieved_chunks_key = self.retrieve(query)
        # For simplicity, we concatenate the retrieved chunks' text
        context_sem = "\n".join([chunk[2] for chunk in retrieved_chunks_sem])  # chunk[2] is the text
        context_key = "\n".join([chunk[2] for chunk in retrieved_chunks_key])
        prompt = f"""
        System: You are a helpful assistant that answers questions based on provided context.
        Task: Answer the question based on the retrieved context. 
              If the answer is not present in the context, say you don't know.
              Answer strictly in less than 2 lines.\n
        Context:\n{context_sem}\n
        Question: {query}\n
        Answer:"""
        response_sem = self.llm.invoke(prompt)
        prompt = f"""
        System: You are a helpful assistant that answers questions based on provided context.
        Task: Answer the question based on the retrieved context. 
              If the answer is not present in the context, say you don't know.
              Answer strictly in less than 2 lines.\n
        Context:\n{context_key}\n
        Question: {query}\n
        Answer:"""
        response_key = self.llm.invoke(prompt)
        return response_sem,response_key

In [10]:
ret = RetrievalPipeline(model, llm)

In [11]:
answer_sem,answer_key = ret.generate_response("when does Mr. Ravi Subbaiah Pagadala resigned?")
    

ðŸš€ Connection Pool Initialized


UndefinedFunction: function plainto_tsquery(unknown, vector) does not exist
LINE 8:     ts_rank_cd(body_search, plainto_tsquery('english', ARRAY...
                                    ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.


In [20]:
res = answer.content
# \u202f is the Narrow No-Break Space
# \u00a0 is the standard Non-Breaking Space
clean_answer = res.replace('\u202f', ' ').replace('\u00a0', ' ')
print(clean_answer)

Mr. Ravi Subbaiah Pagadala resigned effective 02 November 2022.
