In [8]:
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from utils import available_pdfs

In [9]:
load_dotenv()

True

In [10]:
llm = ChatGroq(model = "llama-3.3-70b-versatile",max_tokens=16000)

In [11]:
from pydantic import BaseModel, Field
from typing import Literal

class Parser(BaseModel):
    """LLM will be allowed to output only 'summarization' or 'rag' based on the user query."""
    query: Literal["summarization", "rag"]
    pdfs: list[str] = Field(...,description="pdfs mentioned in the user query, if any")

In [12]:
structured_llm = llm.with_structured_output(Parser)

In [38]:
prompt = PromptTemplate(
    template="""
You are a decision-making agent. Based on the user's query, you must select:

1. <summarization> — Use this **only if** the user asks for a summary of the **entire PDF**.
2. <rag> — Use this for **any other type of query**, such as questions about specific sections, topics, paragraphs, or details from the PDF.
3. <pdfs> — Choose the **relevant PDF filenames** from the provided list that best match the user query. If none are relevant, return an **empty list**.

Your response must include:
- One tag: `<summarization>` or `<rag>`
- A list of relevant PDFs in the `<pdfs>` tag

Format:
<decision>
<pdfs>[list of relevant PDFs]</pdfs>

User Query: {input}

Available PDFs: {pdfs}
""",
    input_variables=["input", "pdfs"],
)

In [39]:
chain = prompt | structured_llm

In [None]:
pdfs = available_pdfs()
response = chain.invoke({'input' : "what is summary of the topic machine learning in the ai engineering pdf", 'pdfs': pdfs})

'rag'

In [41]:
response

Parser(query='rag', pdfs=['AI Engineering.pdf'])

In [43]:
print(response.pdfs)

['AI Engineering.pdf']
