In [None]:
import os
import json
import logging
from pathlib import Path
import pypdf
from docx import Document as DocxDocument
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.memory import ConversationBufferMemory
from agno.agent import Agent
from agno.tools.reasoning import ReasoningTools
from agno.memory.v2.memory import Memory
from agno.memory.v2.db.sqlite import SqliteMemoryDb

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)



In [None]:
# Azure OpenAI Configuration
AZURE_CONFIG = {
    "api_key": "", 
    "endpoint": "",
    "api_version": "2024-12-01-preview",
    "embedding_deployment": "text-embedding-ada-002",
    "gpt_deployment": "gpt-4o"
}

# Initialize Azure OpenAI clients
llm = AzureChatOpenAI(
    openai_api_key=AZURE_CONFIG["api_key"],
    azure_endpoint=AZURE_CONFIG["endpoint"],
    api_version=AZURE_CONFIG["api_version"],
    deployment_name=AZURE_CONFIG["gpt_deployment"],
    temperature=0.7
)

embeddings = AzureOpenAIEmbeddings(
    openai_api_key=AZURE_CONFIG["api_key"],
    azure_endpoint=AZURE_CONFIG["endpoint"],
    api_version=AZURE_CONFIG["api_version"],
    deployment=AZURE_CONFIG["embedding_deployment"]
)

In [None]:


# Custom tool for document summarization
class DocumentSummaryTool:
    name = "document_summary"
    description = "Summarizes a document chunk for concise understanding."
    
    def run(self, document_text: str) -> str:
        """Summarize a document chunk using the LLM."""
        try:
            prompt = f"Summarize the following document text in 2-3 sentences:\n{document_text}"
            summary = llm.invoke(prompt).content
            logger.info("Generated document summary")
            return summary
        except Exception as e:
            logger.error(f"Error summarizing document: {e}")
            return "Unable to summarize document."

# InsuranceRAGSystem: Manages document ingestion, embedding, and retrieval
class InsuranceRAGSystem:
    def __init__(self, data_dir="data", db_dir="chroma_db"):
        self.data_dir = Path(data_dir)
        self.db_dir = Path(db_dir)
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.vectorstore = None
        logger.info("Initializing InsuranceRAGSystem")
        # Ensure data directory exists before embedding
        self.initialize_data_dir()
        self.embed_documents()

    def initialize_data_dir(self):
        """Create data directory and sample document if empty."""
        if not self.data_dir.exists():
            self.data_dir.mkdir(parents=True, exist_ok=True)
            logger.info(f"Created data directory at {self.data_dir}")
            self.create_sample_document()
        elif not any(self.data_dir.iterdir()):
            self.create_sample_document()
        logger.info(f"Data directory initialized at {self.data_dir}")

    def create_sample_document(self):
        """Create a sample insurance document if none exist."""
        sample_content = """
        # Insurance Basics
        ## Auto Insurance Types
        - **Liability Coverage**: Covers damages to others if you're at fault in an accident.
        - **Collision Coverage**: Covers damage to your car from a collision.
        - **Comprehensive Coverage**: Covers non-collision damage (e.g., theft, natural disasters).
        - **Personal Injury Protection (PIP)**: Covers medical expenses for you and your passengers.
        ## Home Insurance Claims
        - **Step 1**: Contact your insurance provider immediately.
        - **Step 2**: Document the damage with photos and videos.
        - **Step 3**: Submit a claim form with detailed descriptions.
        """
        # For testing, create multiple files if they don't exist to match log output
        files_to_create = {
            "sample_insurance_info.txt": sample_content,
            "auto_insurance.txt": "Details about auto insurance policies...",
            "home_insurance.txt": "Information on home insurance claims and coverage...",
            "life_insurance.txt": "Understanding life insurance options..."
        }
        for filename, content in files_to_create.items():
            filepath = self.data_dir / filename
            if not filepath.exists():
                 with open(filepath, "w") as f:
                    f.write(content)
                 logger.info(f"Created sample document: {filename}")


    def load_documents(self):
        """Load and process documents from data directory."""
        documents = []
        for file_path in self.data_dir.iterdir():
            if file_path.suffix.lower() in [".pdf", ".txt", ".docx"]:
                content = self.read_file(file_path)
                if content:
                    doc = Document(page_content=content, metadata={"source": str(file_path)})
                    documents.append(doc)
                    logger.info(f"Loaded document: {file_path}")
        return documents

    def read_file(self, file_path):
        """Read content from PDF, text, or Word files."""
        try:
            if file_path.suffix.lower() == ".pdf":
                with open(file_path, "rb") as f:
                    pdf = pypdf.PdfReader(f)
                    text = "".join(page.extract_text() for page in pdf.pages if page.extract_text())
                    return text
            elif file_path.suffix.lower() == ".txt":
                with open(file_path, "r", encoding="utf-8") as f:
                    return f.read()
            elif file_path.suffix.lower() == ".docx":
                doc = DocxDocument(file_path)
                return "\n".join(paragraph.text for paragraph in doc.paragraphs if paragraph.text)
            return ""
        except Exception as e:
            logger.error(f"Error reading {file_path}: {e}")
            return ""

    def embed_documents(self):
        """Embed all documents and store in ChromaDB."""
        try:
            # self.initialize_data_dir() # Already called in __init__
            documents = self.load_documents()
            if not documents:
                logger.warning("No documents found in data directory. Attempting to create sample document.")
                self.create_sample_document() # Ensure sample is created if dir was empty
                documents = self.load_documents()
                if not documents:
                    logger.error("Still no documents found after attempting to create sample. Aborting embedding.")
                    return

            chunks = self.text_splitter.split_documents(documents)
            if not chunks:
                logger.warning("No chunks to embed after splitting documents.")
                return

            # Ensure the persist directory exists
            self.db_dir.mkdir(parents=True, exist_ok=True)

            self.vectorstore = Chroma.from_documents(
                documents=chunks,
                embedding=embeddings,
                persist_directory=str(self.db_dir)
            )
            self.vectorstore.persist()
            logger.info(f"Embedded {len(chunks)} document chunks into {self.db_dir}")
        except Exception as e:
            logger.error(f"Error embedding documents: {e}")
            raise

    def search(self, query):
        """Search for relevant documents based on query."""
        if not self.vectorstore:
            # Attempt to load an existing vectorstore if not initialized
            if self.db_dir.exists() and any(self.db_dir.iterdir()):
                try:
                    self.vectorstore = Chroma(persist_directory=str(self.db_dir), embedding_function=embeddings)
                    logger.info(f"Loaded existing vectorstore from {self.db_dir}")
                except Exception as e:
                    logger.error(f"Error loading existing vectorstore: {e}. Please reload/reset.")
                    return []
            else:
                logger.warning("Vectorstore not initialized and no existing store found. Please load/embed documents.")
                return []
        try:
            docs = self.vectorstore.similarity_search(query, k=3)
            logger.info(f"Retrieved {len(docs)} documents for query: {query}")
            return docs
        except Exception as e:
            logger.error(f"Error searching documents: {e}")
            return []

    def stats(self):
        """Return statistics about stored documents."""
        if not self.vectorstore:
            if self.db_dir.exists() and any(self.db_dir.iterdir()): # Try to load if not loaded
                try:
                    self.vectorstore = Chroma(persist_directory=str(self.db_dir), embedding_function=embeddings)
                except Exception as e:
                    logger.error(f"Could not load vectorstore for stats: {e}")
                    return "No documents loaded (failed to load existing)."
            else:
                return "No documents loaded."
        try:
            count = self.vectorstore._collection.count()
            logger.info(f"Vectorstore stats: {count} documents")
            return f"Number of documents: {count}"
        except Exception as e:
            logger.error(f"Error getting vectorstore stats: {e}")
            return "Error retrieving stats."


    def reset(self):
        """Reset the vectorstore."""
        try:
            if self.db_dir.exists():
                import shutil
                shutil.rmtree(self.db_dir) # Simpler and more robust way to delete directory
                logger.info(f"Removed existing ChromaDB directory: {self.db_dir}")
            self.db_dir.mkdir(parents=True, exist_ok=True) # Recreate directory
            self.vectorstore = None # Clear in-memory vectorstore
            self.embed_documents() # Re-initialize and embed
            logger.info("Vectorstore reset and re-embedded")
        except Exception as e:
            logger.error(f"Error resetting vectorstore: {e}")

# InsuranceMultiAgentSystem: Manages collaborative agents with enhanced Agno features
class InsuranceMultiAgentSystem:
    def __init__(self, rag_system, llm_client): # Renamed llm to llm_client to avoid clash
        self.rag_system = rag_system
        self.llm_client = llm_client # Use renamed llm
        self.memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True
        )
        
        # Ensure 'tmp' directory exists for SQLite DB
        Path("tmp").mkdir(exist_ok=True)
        try:
            self.sqlite_memory_db = SqliteMemoryDb(table_name="insurance_memories", db_file="tmp/agent.db")
            self.sqlite_memory = Memory(db=self.sqlite_memory_db)
            logger.info("Initialized SQLite memory at tmp/agent.db")
        except Exception as e:
            logger.error(f"Error initializing SQLite memory: {e}")
            raise
        self.initialize_agents()

    def initialize_agents(self):
        """Initialize the team of agents with Agno features."""
        knowledge_retriever_instructions = [
            "You are a KnowledgeRetriever. Use the provided context to answer the user's question.",
            "Focus on retrieving accurate information from insurance documents.",
            "Use reasoning to ensure relevance and accuracy.",
            "Summarize document content if it is lengthy."
        ]
        claims_specialist_instructions = [
            "You are a ClaimsSpecialist. Provide detailed guidance on handling insurance claims.",
            "Explain steps clearly, including any documentation or processes required.",
            "Use reasoning to address edge cases and potential user concerns."
        ]
        policy_advisor_instructions = [
            "You are a PolicyAdvisor. Offer expert advice on insurance policy options.",
            "Explain policy types, coverage details, and considerations for choosing them.",
            "Use reasoning to tailor advice to the user's query."
        ]
        customer_service_instructions = [
            "You are a CustomerService agent. Greet the user warmly and coordinate responses.",
            "Ensure the user's question is clearly understood before passing it to other agents.",
            "Use memory to personalize responses based on past interactions."
        ]
        lead_agent_instructions = [
            "You are a LeadAgent coordinating a team of specialized insurance agents.",
            "Collect responses from the KnowledgeRetriever, ClaimsSpecialist, PolicyAdvisor, and CustomerService agents.",
            "Use reasoning to summarize and refine their inputs into a clear, concise, and accurate response.",
            "Return the response in JSON format with fields: 'answer' (string), 'sources' (list of strings), and 'confidence' (float between 0 and 1)."
        ]

        self.knowledge_retriever = Agent(
            name="KnowledgeRetriever",
            model=self.llm_client,
            instructions=knowledge_retriever_instructions,
            tools=[ReasoningTools(add_instructions=True), DocumentSummaryTool()],
            memory=self.sqlite_memory,
            enable_agentic_memory=True,
            description="Retrieves and summarizes information from insurance documents.",
            markdown=True
        )
        self.claims_specialist = Agent(
            name="ClaimsSpecialist",
            model=self.llm_client,
            instructions=claims_specialist_instructions,
            tools=[ReasoningTools(add_instructions=True)],
            memory=self.sqlite_memory,
            enable_agentic_memory=True,
            description="Specializes in insurance claim processes.",
            markdown=True
        )
        self.policy_advisor = Agent(
            name="PolicyAdvisor",
            model=self.llm_client,
            instructions=policy_advisor_instructions,
            tools=[ReasoningTools(add_instructions=True)],
            memory=self.sqlite_memory,
            enable_agentic_memory=True,
            description="Advises on insurance policy options.",
            markdown=True
        )
        self.customer_service = Agent(
            name="CustomerService",
            model=self.llm_client,
            instructions=customer_service_instructions,
            tools=[ReasoningTools(add_instructions=True)],
            memory=self.sqlite_memory,
            enable_agentic_memory=True,
            description="Coordinates responses and greets users.",
            markdown=True
        )
        self.lead_agent = Agent(
            name="LeadAgent",
            model=self.llm_client,
            team=[self.knowledge_retriever, self.claims_specialist, self.policy_advisor, self.customer_service],
            instructions=lead_agent_instructions,
            tools=[ReasoningTools(add_instructions=True)],
            memory=self.sqlite_memory,
            enable_agentic_memory=True,
            description="Coordinates the insurance agent team and provides JSON responses.",
            markdown=True
        )
        logger.info("Agents initialized with ReasoningTools, DocumentSummaryTool, and SQLite memory")

    def answer_question(self, question):
        """Process a question through the agent team."""
        memory_context = ""
        try:
            if hasattr(self.sqlite_memory, 'db') and hasattr(self.sqlite_memory.db, 'get_messages'):
                # Ensure sqlite_memory.db (SqliteMemoryDb instance) is used
                past_memories = self.sqlite_memory.db.get_messages()[-3:] 
                # Use .get() for safer dictionary access
                memory_context = "\n".join([f"Past Q: {m.get('message', '')}\nPast A: {m.get('response', '')}" for m in past_memories])
                if memory_context:
                    logger.info("Retrieved recent past memories for personalization")
            else:
                logger.warning("SQLite memory object or its 'db' attribute does not have 'get_messages' method.")
        except Exception as e:
            logger.warning(f"Error retrieving memory messages: {e}")
        
        docs = self.rag_system.search(question)
        context = "\n".join([doc.page_content for doc in docs])
        sources = [doc.metadata.get('source', 'Unknown source') for doc in docs]
        
        cs_prompt = (
            f"Greetings! I'm here to help with your insurance question: {question}\n"
            f"Past interactions:\n{memory_context}\n"
            f"Instructions: {self.customer_service.instructions[0]}"
        )
        cs_response = self.llm_client.invoke(cs_prompt).content
        
        doc_summary_tool = DocumentSummaryTool() # Instantiate the tool
        doc_summary = doc_summary_tool.run(context) if context else "No relevant documents found."
        kr_prompt = (
            f"Question: {question}\n"
            f"Document Summary: {doc_summary}\n"
            f"Raw Context: {context}\n"
            f"Instructions: {self.knowledge_retriever.instructions[0]}"
        )
        kr_response = self.llm_client.invoke(kr_prompt).content
        
        cspecial_prompt = f"Question: {question}\nInstructions: {self.claims_specialist.instructions[0]}"
        cspecial_response = self.llm_client.invoke(cspecial_prompt).content
        
        pa_prompt = f"Question: {question}\nInstructions: {self.policy_advisor.instructions[0]}"
        pa_response = self.llm_client.invoke(pa_prompt).content
        
        inputs = (
            f"CustomerService: {cs_response}\n"
            f"KnowledgeRetriever: {kr_response}\n"
            f"ClaimsSpecialist: {cspecial_response}\n"
            f"PolicyAdvisor: {pa_response}"
        )
        # The LeadAgent itself should use its team and tools. Direct LLM call for lead_agent is not standard for Agno.
        # However, to match the original structure closely for now:
        lead_agent_prompt = (
            f"Question: {question}\nInputs from team:\n{inputs}\n"
            # The lead_agent_instructions already specify JSON output. Adding it again to prompt might be redundant
            # but keeping it for consistency with the original snippet's intent.
            f"Instructions: {self.lead_agent.instructions[0]}\n"
             "Return the response in JSON format with fields: 'answer' (string), 'sources' (list of strings), and 'confidence' (float)."
        )

        json_response_str = self.llm_client.invoke(lead_agent_prompt).content
        
        response_dict = {}
        try:
            response_dict = json.loads(json_response_str)
            if not isinstance(response_dict, dict) or 'answer' not in response_dict:
                logger.warning(f"Lead agent output not in expected JSON dict format. Output: {json_response_str}")
                response_dict = {'answer': json_response_str, 'sources': sources, 'confidence': 0.7} # Fallback
            if 'sources' not in response_dict or not response_dict['sources']: # Ensure sources from RAG are included if agent doesn't provide
                response_dict['sources'] = sources
            if 'confidence' not in response_dict:
                response_dict['confidence'] = 0.85 # Default confidence if not provided
        except json.JSONDecodeError:
            logger.warning(f"Failed to parse JSON from lead agent: {json_response_str}")
            response_dict = {'answer': json_response_str, 'sources': sources, 'confidence': 0.6} # Fallback

        self.memory.save_context({"input": question}, {"output": response_dict['answer']})
        
        try:
            # Ensure sqlite_memory.db (SqliteMemoryDb instance) is used for add_message
            if hasattr(self.sqlite_memory, 'db') and hasattr(self.sqlite_memory.db, 'add_message'):
                 self.sqlite_memory.db.add_message({"message": question, "response": response_dict['answer']})
            else:
                logger.error("SQLite memory object or its 'db' attribute does not have 'add_message' method.")
        except Exception as e:
            logger.error(f"Error adding message to SQLite memory: {e}")
            
        logger.info(f"Processed question: {question}")
        return response_dict, docs


# Main program
def main():
    # Initialize systems
    rag_system = InsuranceRAGSystem()
    # Pass the global llm client to the multi_agent_system
    multi_agent_system = InsuranceMultiAgentSystem(rag_system, llm)


    sample_questions = [
        "What types of auto insurance should I consider?",
        "How do I file a home insurance claim?"
    ]

    print("Testing with sample questions:")
    for question in sample_questions:
        print(f"\nQuestion: {question}")
        response, docs = multi_agent_system.answer_question(question)
        print(f"Answer: {response.get('answer', 'N/A')}")
        print(f"Sources: {response.get('sources', [])}")
        print(f"Confidence: {response.get('confidence', 0.0)}")
        print("Relevant Documents:")
        if docs:
            for doc_item in docs: # Renamed doc to doc_item to avoid conflict if doc is used above
                print(f"- {doc_item.metadata.get('source', 'Unknown')}: {doc_item.page_content[:100]}...")
        else:
            print("No relevant documents found.")


    print("\nInteractive Mode - Type your question or use commands: stats, search [query], reload, reset, quit")
    while True:
        user_input = input("\nYour input: ").strip()
        if user_input.lower() == "quit":
            break
        elif user_input.lower() == "stats":
            print(rag_system.stats())
        elif user_input.lower().startswith("search "):
            query = user_input[len("search "):].strip()
            retrieved_docs = rag_system.search(query) # Renamed to avoid conflict
            print("Search Results:")
            if retrieved_docs:
                for doc_item in retrieved_docs:
                    print(f"- {doc_item.metadata.get('source', 'Unknown')}: {doc_item.page_content[:100]}...")
            else:
                print("No documents found for your search query.")
        elif user_input.lower() == "reload":
            print("Reloading documents...")
            rag_system.embed_documents()
            print("Documents reloaded.")
        elif user_input.lower() == "reset":
            print("Resetting vectorstore...")
            rag_system.reset()
            print("Vectorstore reset.")
        else:
            response, retrieved_docs = multi_agent_system.answer_question(user_input) # Renamed to avoid conflict
            print(f"Answer: {response.get('answer', 'N/A')}")
            print(f"Sources: {response.get('sources', [])}")
            print(f"Confidence: {response.get('confidence', 0.0)}")
            print("Relevant Documents:")
            if retrieved_docs:
                for doc_item in retrieved_docs:
                    print(f"- {doc_item.metadata.get('source', 'Unknown')}: {doc_item.page_content[:100]}...")
            else:
                print("No relevant documents found.")


if __name__ == "__main__":
    main()