In [46]:
# 1. Loading Environment variables
from dotenv import load_dotenv
import os
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

In [None]:
# 2. Import Dependencies
from langchain.chat_models import init_chat_model
from langchain_community.document_loaders import PyPDFLoader 
from langchain_text_splitters import RecursiveCharacterTextSplitter 
from langchain.tools import tool
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq 
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain.agents import create_agent



In [48]:
# 3. Define documents categories and file mapping
files = [
    ("CaseStudy_1.pdf", "CaseStudy"),
    ("CaseStudy_2.pdf", "CaseStudy"),
    ("CaseStudy_3.pdf", "CaseStudy"),
    ("faqs_1.pdf", "faq"),
    ("faqs_2.pdf", "faq"),
    ("Newsletter_1.pdf", "Newsletter"),
    ("Newsletter_2.pdf", "Newsletter"),
    ("Newsletter_3.pdf", "Newsletter"),
]

In [49]:
# 4. Prepare Chroma directories per category
base_dir = "./chroma_agentic"
os.makedirs(base_dir, exist_ok=True)
embedding_model = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")

In [50]:
# 5. Split and create category-based vector stores
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
category_docs = {"CaseStudy": [], "faq": [], "Newsletter": []}

for file_path, category in files:
    print(f"Processing: {file_path}")
    loader = PyPDFLoader(file_path)
    pages = loader.load()
    for page in pages:
        page.metadata["category"] = category
    chunks = splitter.split_documents(pages)
    category_docs[category].extend(chunks)

Processing: CaseStudy_1.pdf
Processing: CaseStudy_2.pdf
Processing: CaseStudy_3.pdf
Processing: faqs_1.pdf
Processing: faqs_2.pdf
Processing: Newsletter_1.pdf
Processing: Newsletter_2.pdf
Processing: Newsletter_3.pdf


In [51]:
# 6. Create or load Chroma DB for each category
vectorstores = {}
for category, docs in category_docs.items():
    persist_dir = os.path.join(base_dir, category.lower())
    os.makedirs(persist_dir, exist_ok=True)

    if len(os.listdir(persist_dir)) == 0:
        print(f"üß† Creating new vector store for {category}...")
        vs = Chroma.from_documents(
            documents=docs,
            embedding=embedding_model,
            persist_directory=persist_dir
        )
    else:
        print(f"üîÅ Loading existing vector store for {category}...")
        vs = Chroma(embedding_function=embedding_model, persist_directory=persist_dir)

    vectorstores[category] = vs

print("\n‚úÖ Vector stores ready for all categories!\n")

üîÅ Loading existing vector store for CaseStudy...
üîÅ Loading existing vector store for faq...
üîÅ Loading existing vector store for Newsletter...

‚úÖ Vector stores ready for all categories!



In [52]:
# 7. Agentic category selector
category_prompt = PromptTemplate(
    input_variables=["question"],
    template="""
You are an intelligent assistant. Based on the user's question, decide which category of company documents
the answer is most likely found in. 

Choose only one category from: 
1. faq 
2. CaseStudy 
3. Newsletter

Question: {question}
Category:
"""
)

def choose_category(question):
    """Ask the LLM to classify query into a document category"""
    response = llm.invoke(category_prompt.format(question=question))
    result = response.content.strip()
    if "case" in result.lower():
        return "CaseStudy"
    elif "faq" in result.lower():
        return "faq"
    elif "news" in result.lower():
        return "Newsletter"
    else:
        return "faq"  # Default fallback

In [53]:
# 8. Build the retrieval QA chain
qa_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a company assistant. Use the provided context to answer the user's question clearly and concisely.

<context>
{context}
</context>

Question: {question}
Answer:
"""
)   

In [54]:
def get_rag_response(question):
    """Complete RAG pipeline with dynamic category routing"""
    category = choose_category(question)
    print(f"\nü§ñ Chosen Category: {category}")

    vectorstore = vectorstores[category]
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

    response = retriever.invoke({"query": question})
    print("\n Response:\n", response["result"])

    print("\nüìö Sources:")
    for doc in response["source_documents"]:
        print(f" - {doc.metadata.get('source')} | Category: {doc.metadata.get('category')}")

In [55]:
llm = init_chat_model(
    "llama-3.3-70b-versatile",
    model_provider="groq"
)

In [56]:
agent_executor = create_agent(llm, [choose_category])