<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Luna_Tax_Copilot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Luna Tax Copilot Implementation**

In [10]:
!pip install -qU langchain langchain-openai langchain-community langgraph openai chromadb neo4j python-dotenv pandas numpy pypdf docx2txt openpyxl
!pip install -q langchain_neo4j
!pip install -qU crawl4ai
!playwright install

╔══════════════════════════════════════════════════════╗
║ Host system is missing dependencies to run browsers. ║
║ Missing libraries:                                   ║
║     libwoff2dec.so.1.0.2                             ║
║     libgstgl-1.0.so.0                                ║
║     libgstcodecparsers-1.0.so.0                      ║
║     libavif.so.13                                    ║
║     libharfbuzz-icu.so.0                             ║
║     libenchant-2.so.2                                ║
║     libsecret-1.so.0                                 ║
║     libhyphen.so.0                                   ║
║     libmanette-0.2.so.0                              ║
╚══════════════════════════════════════════════════════╝
    at validateDependenciesLinux (/usr/local/lib/python3.11/dist-packages/playwright/driver/package/lib/server/registry/dependencies.js:216:9)
    at process.processTicksAndRejections (node:internal/process/task_queues:105:5)
    at async Registry._validateHo

In [11]:
import os
from typing import Dict, List, Any, Tuple
from datetime import datetime

# LangChain imports
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_neo4j import GraphCypherQAChain, Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

# LangGraph imports for agent orchestration
from langgraph.graph import END, StateGraph

# Required environment variables
# os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
# os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
# os.environ["NEO4J_USERNAME"] = "neo4j"
# os.environ["NEO4J_PASSWORD"] = "password"

In [14]:
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy

async def crawl_ato_legal_database(max_pages=100):
    config = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(
            max_depth=3,
            include_external=False
        ),
        scraping_strategy=LXMLWebScrapingStrategy(),
        verbose=True
    )

    start_url = "https://www.ato.gov.au/single-page-applications/legaldatabase#Law"

    async with AsyncWebCrawler() as crawler:
        results = []
        try:
            async for page in crawler.acrawl(start_url, config=config):
                results.append(page)
                print(f"\nCrawled Page {len(results)}: {page.url}")
                print(f"Sample Content (First 300 chars):\n{page.content[:300]}...\n")

                # Save page content
                with open(f"page_{len(results)}.html", "w", encoding="utf-8") as f:
                    f.write(page.content)

                if len(results) >= max_pages:
                    break

        except Exception as e:
            print(f"Error during crawling: {str(e)}")

await crawl_ato_legal_database()

TargetClosedError: BrowserType.launch: Target page, context or browser has been closed

In [None]:
# Initialize the language model
def get_llm(model_name="gpt-4o", temperature=0):
    """Initialize the language model"""
    return ChatOpenAI(model=model_name, temperature=temperature)

In [None]:
# ----- DATA PROCESSING COMPONENTS -----

def init_data_crawler(ato_db_connection_string=None):
    """
    Initialize the data crawler that fetches data from the ATO database

    Args:
        ato_db_connection_string: Connection string to the ATO database

    Returns:
        Function that crawls data from the ATO database
    """
    def crawl_data(query: str) -> List[Dict[str, Any]]:
        """
        Crawl data from the ATO database based on the query

        Args:
            query: Query to fetch data from the ATO database

        Returns:
            List of documents retrieved from the ATO database
        """
        # This would be implemented with actual database connection code
        # Here we're simulating retrieval with sample data
        print(f"Crawling data with query: {query}")

        # Simulated data retrieval
        sample_data = [
            {"id": "1", "title": "Income Tax Assessment", "content": "Guidelines for assessing taxable income..."},
            {"id": "2", "title": "GST Regulations", "content": "Goods and Services Tax regulations and exemptions..."},
            {"id": "3", "title": "Tax Deductions", "content": "Eligible deductions for various business expenses..."},
        ]

        return sample_data

    return crawl_data

def init_data_cleaner():
    """
    Initialize the data cleaner that preprocesses crawled data

    Returns:
        Function that cleans raw data
    """
    def clean_data(raw_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """
        Clean raw data by removing irrelevant information, standardizing formats

        Args:
            raw_data: Raw data from the crawler

        Returns:
            Cleaned data
        """
        cleaned_data = []

        for item in raw_data:
            # Implement cleaning logic: remove HTML, standardize dates, etc.
            cleaned_item = {
                "id": item["id"],
                "title": item["title"],
                "content": item["content"].replace("...", ""),  # Simple cleaning example
                "cleaned_timestamp": datetime.now().isoformat()
            }
            cleaned_data.append(cleaned_item)

        return cleaned_data

    return clean_data

def init_data_chunker():
    """
    Initialize the data chunker that splits documents into chunks for embedding

    Returns:
        Function that chunks cleaned data
    """
    def chunk_data(cleaned_data: List[Dict[str, Any]]) -> List[Document]:
        """
        Chunk cleaned data into smaller pieces for processing

        Args:
            cleaned_data: Cleaned data from the cleaner

        Returns:
            List of document chunks
        """
        # Initialize text splitter for chunking
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len,
        )

        document_chunks = []

        for item in cleaned_data:
            # Create a Document object
            doc = Document(
                page_content=item["content"],
                metadata={
                    "id": item["id"],
                    "title": item["title"],
                    "source": "ATO Database",
                    "timestamp": item.get("cleaned_timestamp")
                }
            )

            # Split the document into chunks
            chunks = text_splitter.split_documents([doc])
            document_chunks.extend(chunks)

        return document_chunks

    return chunk_data

def init_data_encoder():
    """
    Initialize the data encoder that creates embeddings from document chunks

    Returns:
        Function that encodes document chunks into embeddings
    """
    def encode_data(document_chunks: List[Document]) -> Tuple[List[Document], List[List[float]]]:
        """
        Encode document chunks into embeddings

        Args:
            document_chunks: Document chunks from the chunker

        Returns:
            Tuple of document chunks and their embeddings
        """
        # Initialize embeddings
        embeddings = OpenAIEmbeddings()

        # Create embeddings for each document chunk
        texts = [doc.page_content for doc in document_chunks]
        embedded_vectors = embeddings.embed_documents(texts)

        return document_chunks, embedded_vectors

    return encode_data

In [None]:
# ----- DATABASE COMPONENTS -----

def init_vector_database():
    """
    Initialize the vector database for storing embeddings

    Returns:
        Vector database instance and functions to interact with it
    """
    # Initialize vector store
    embeddings = OpenAIEmbeddings()
    vector_db = Chroma(embedding_function=embeddings, collection_name="tax_regulations")

    def store_in_vector_db(documents: List[Document], embeddings: List[List[float]]) -> None:
        """Store documents and their embeddings in the vector database"""
        texts = [doc.page_content for doc in documents]
        metadatas = [doc.metadata for doc in documents]

        # Add documents to the vector store
        vector_db.add_texts(texts=texts, metadatas=metadatas, embeddings=embeddings)
        print(f"Stored {len(documents)} documents in the vector database")

    def search_vector_db(query: str, top_k: int = 5) -> List[Document]:
        """Search the vector database for relevant documents"""
        results = vector_db.similarity_search(query, k=top_k)
        return results

    return {
        "vector_db": vector_db,
        "store": store_in_vector_db,
        "search": search_vector_db
    }

def init_relationship_database():
    """
    Initialize the graph database for storing relationships

    Returns:
        Graph database instance and functions to interact with it
    """
    # Initialize Neo4j graph store
    graph_db = Neo4j(
        url=os.environ.get("NEO4J_URI", "neo4j://localhost:7687"),
        username=os.environ.get("NEO4J_USERNAME", "neo4j"),
        password=os.environ.get("NEO4J_PASSWORD", "password")
    )

    def store_relationship(source_id: str, target_id: str, relationship_type: str, properties: Dict = None) -> None:
        """Store a relationship in the graph database"""
        if properties is None:
            properties = {}

        # Create Cypher query to add relationship
        query = f"""
        MATCH (source) WHERE source.id = $source_id
        MATCH (target) WHERE target.id = $target_id
        CREATE (source)-[r:{relationship_type} $properties]->(target)
        RETURN source, r, target
        """

        params = {
            "source_id": source_id,
            "target_id": target_id,
            "properties": properties
        }

        graph_db.query(query, params)

    def store_document_node(document: Document) -> None:
        """Store a document as a node in the graph database"""
        # Create Cypher query to add node
        query = """
        CREATE (d:Document {
            id: $id,
            title: $title,
            content: $content,
            source: $source
        })
        """

        params = {
            "id": document.metadata.get("id"),
            "title": document.metadata.get("title"),
            "content": document.page_content,
            "source": document.metadata.get("source")
        }

        graph_db.query(query, params)

    def query_graph(cypher_query: str, params: Dict = None) -> List[Dict]:
        """Query the graph database with a Cypher query"""
        if params is None:
            params = {}

        results = graph_db.query(cypher_query, params)
        return results

    return {
        "graph_db": graph_db,
        "store_relationship": store_relationship,
        "store_node": store_document_node,
        "query": query_graph
    }

In [None]:
# ----- AGENT COMPONENTS -----

def init_context_engine(vector_db_search):
    """
    Initialize the context engine that processes prompts and provides context

    Args:
        vector_db_search: Function to search the vector database

    Returns:
        Function that processes prompts and provides context
    """
    def process_prompt(prompt: str, goal: str = None) -> Dict:
        """
        Process the prompt, extract context, and prepare for optimization

        Args:
            prompt: User prompt
            goal: Optional goal to guide the context engine

        Returns:
            Context information for the prompt optimizer
        """
        # Retrieve relevant documents from vector DB
        relevant_docs = vector_db_search(prompt, top_k=3)

        # Extract and format context from retrieved documents
        context = "\n\n".join([f"Document {i+1}:\n{doc.page_content}" for i, doc in enumerate(relevant_docs)])

        return {
            "original_prompt": prompt,
            "goal": goal,
            "retrieved_context": context,
            "document_ids": [doc.metadata.get("id") for doc in relevant_docs]
        }

    return process_prompt

def init_prompt_optimizer(llm):
    """
    Initialize the prompt optimizer that enhances prompts with context

    Args:
        llm: Language model

    Returns:
        Function that optimizes prompts
    """
    # Prompt template for optimizing user prompts
    prompt_template = PromptTemplate.from_template(
        """You are a Tax Copilot that helps tax professionals.

        Original prompt: {original_prompt}
        Goal: {goal}

        Relevant context from tax regulations:
        {retrieved_context}

        Rewrite the prompt to be more specific and include relevant tax regulations from the context.
        Modified prompt:
        """
    )

    # Create chain for prompt optimization
    prompt_chain = (
        prompt_template
        | llm
        | StrOutputParser()
    )

    def optimize_prompt(context_info: Dict) -> Dict:
        """
        Optimize the prompt based on context and goal

        Args:
            context_info: Context information from the context engine

        Returns:
            Dictionary with original and optimized prompts
        """
        # Run the prompt through the chain
        optimized_prompt = prompt_chain.invoke(context_info)

        return {
            "original_prompt": context_info["original_prompt"],
            "optimized_prompt": optimized_prompt,
            "retrieved_context": context_info["retrieved_context"],
            "document_ids": context_info["document_ids"]
        }

    return optimize_prompt

def init_retriever(vector_db_search, graph_db_query):
    """
    Initialize the retriever that fetches information from databases

    Args:
        vector_db_search: Function to search the vector database
        graph_db_query: Function to query the graph database

    Returns:
        Function that retrieves information
    """
    def retrieve_information(query: str) -> Dict:
        """
        Retrieve information from both vector and graph databases

        Args:
            query: Query to search for

        Returns:
            Retrieved information from both databases
        """
        # Retrieve from vector DB
        vector_results = vector_db_search(query, top_k=5)

        # Retrieve from graph DB
        # Look for related tax regulations via graph relationships
        graph_query = """
        MATCH (d:Document)-[r]-(related)
        WHERE d.title CONTAINS $keyword OR d.content CONTAINS $keyword
        RETURN related.title, related.content, type(r) as relationship
        LIMIT 5
        """
        graph_results = graph_db_query(graph_query, {"keyword": query})

        # Combine results
        return {
            "vector_results": vector_results,
            "graph_results": graph_results,
            "query": query
        }

    return retrieve_information

def init_reranker(llm):
    """
    Initialize the reranker that prioritizes retrieved information

    Args:
        llm: Language model

    Returns:
        Function that reranks retrieved information
    """
    # Create reranking prompt
    rerank_template = PromptTemplate.from_template(
        """You are a tax expert assistant.

        You need to rank the following documents based on their relevance to the query: {query}

        Documents:
        {documents}

        Return a JSON list of document indices in order of relevance (most relevant first),
        with a brief explanation for each ranking.
        """
    )

    rerank_chain = (
        rerank_template
        | llm
        | StrOutputParser()
    )

    def rerank_results(retrieval_results: Dict) -> Dict:
        """
        Rerank retrieved results based on relevance to query

        Args:
            retrieval_results: Results from the retriever

        Returns:
            Reranked results
        """
        # Format documents for reranking
        vector_docs = "\n\n".join([
            f"Document {i}: {doc.page_content}"
            for i, doc in enumerate(retrieval_results["vector_results"])
        ])

        # Rerank using LLM
        rerank_result = rerank_chain.invoke({
            "query": retrieval_results["query"],
            "documents": vector_docs
        })

        return {
            "original_results": retrieval_results,
            "reranked_results": rerank_result
        }

    return rerank_results

def init_reasoning_and_planning(llm):
    """
    Initialize the reasoning and planning module

    Args:
        llm: Language model

    Returns:
        Function that creates workflows
    """
    # Create reasoning prompt
    reasoning_template = PromptTemplate.from_template(
        """You are a tax expert assistant.

        Based on the following query and available information, create a workflow plan
        to help the tax professional.

        Query: {query}

        Relevant information:
        {context}

        Create a step-by-step workflow plan to address this tax query. Include:
        1. Key tax regulations to consider
        2. Analysis steps
        3. Documentation needed
        4. Potential advice or recommendations

        Workflow plan:
        """
    )

    reasoning_chain = (
        reasoning_template
        | llm
        | StrOutputParser()
    )

    def create_workflow(reranked_results: Dict) -> Dict:
        """
        Create a workflow plan based on the query and retrieved information

        Args:
            reranked_results: Reranked results from the reranker

        Returns:
            Workflow plan
        """
        # Extract context from reranked results
        query = reranked_results["original_results"]["query"]

        # Use vector results as context
        context = "\n\n".join([
            doc.page_content
            for doc in reranked_results["original_results"]["vector_results"]
        ])

        # Generate workflow
        workflow_plan = reasoning_chain.invoke({
            "query": query,
            "context": context
        })

        return {
            "query": query,
            "workflow_plan": workflow_plan,
            "context": context
        }

    return create_workflow

def init_workflow_execution(llm, graph_db_store_relationship):
    """
    Initialize the workflow execution module

    Args:
        llm: Language model
        graph_db_store_relationship: Function to store relationships in the graph database

    Returns:
        Function that executes workflows
    """
    # Create execution prompt
    execution_template = PromptTemplate.from_template(
        """You are a tax expert assistant.

        Execute the following workflow plan to provide a detailed response to the tax query:

        Query: {query}

        Workflow plan:
        {workflow_plan}

        Context information:
        {context}

        Generate a comprehensive response that follows the workflow steps and addresses the query.
        Include specific tax regulations, guidelines, and actionable advice.

        Response:
        """
    )

    execution_chain = (
        execution_template
        | llm
        | StrOutputParser()
    )

    def execute_workflow(workflow_info: Dict) -> Dict:
        """
        Execute a workflow plan to generate a response

        Args:
            workflow_info: Workflow plan and context

        Returns:
            Response from workflow execution
        """
        # Execute workflow
        response = execution_chain.invoke({
            "query": workflow_info["query"],
            "workflow_plan": workflow_info["workflow_plan"],
            "context": workflow_info["context"]
        })

        # Store workflow execution in graph DB as a relationship
        # Between query and response (in a real system)
        # This is just a placeholder for demonstration
        try:
            graph_db_store_relationship(
                "query_node",  # This would be a real node id in production
                "response_node",  # This would be a real node id in production
                "GENERATED",
                {"timestamp": datetime.now().isoformat()}
            )
        except Exception as e:
            print(f"Warning: Could not store relationship in graph DB: {e}")

        return {
            "query": workflow_info["query"],
            "response": response
        }

    return execute_workflow

def init_generator(llm):
    """
    Initialize the generator that creates the final response

    Args:
        llm: Language model

    Returns:
        Function that generates final responses
    """
    # Create generator prompt
    generator_template = PromptTemplate.from_template(
        """You are a tax expert assistant.

        Format the following response to be clear, professional, and actionable for a tax professional:

        Query: {query}

        Response content:
        {response}

        Format this as a professional tax advisory response with proper headings, bullet points where appropriate,
        and clear actionable steps.

        Final response:
        """
    )

    generator_chain = (
        generator_template
        | llm
        | StrOutputParser()
    )

    def generate_response(execution_result: Dict) -> str:
        """
        Generate the final response

        Args:
            execution_result: Result from workflow execution

        Returns:
            Formatted final response
        """
        # Generate final response
        final_response = generator_chain.invoke({
            "query": execution_result["query"],
            "response": execution_result["response"]
        })

        return final_response

    return generate_response

def init_guardrails(llm):
    """
    Initialize the guardrails that ensure response safety and quality

    Args:
        llm: Language model

    Returns:
        Function that applies guardrails to responses
    """
    # Create guardrail prompt
    guardrail_template = PromptTemplate.from_template(
        """You are a tax compliance expert.

        Review the following response to ensure it:
        1. Only provides factually accurate tax information
        2. Doesn't give absolute tax advice without caveats
        3. Recommends consulting a tax professional for specific situations
        4. Doesn't contain outdated tax regulations
        5. Makes clear distinctions between federal and state/local tax matters

        Response to review:
        {response}

        If the response meets all criteria, return it unchanged.
        If it needs revision, provide the corrected version that addresses the issues.

        Reviewed response:
        """
    )

    guardrail_chain = (
        guardrail_template
        | llm
        | StrOutputParser()
    )

    def apply_guardrails(response: str) -> str:
        """
        Apply guardrails to ensure response quality and safety

        Args:
            response: Generated response

        Returns:
            Response after applying guardrails
        """
        # Apply guardrails
        safe_response = guardrail_chain.invoke({"response": response})

        return safe_response

    return apply_guardrails

In [None]:
# ----- LANGGRAPH STATE AND WORKFLOW -----

def create_tax_copilot_graph():
    """
    Create the LangGraph for the Tax Copilot pipeline

    Returns:
        Configured StateGraph for the Tax Copilot
    """
    # Initialize the language model
    llm = get_llm()

    # Initialize data processing components
    data_crawler = init_data_crawler()
    data_cleaner = init_data_cleaner()
    data_chunker = init_data_chunker()
    data_encoder = init_data_encoder()

    # Initialize databases
    vector_db = init_vector_database()
    graph_db = init_relationship_database()

    # Initialize agent components
    context_engine = init_context_engine(vector_db["search"])
    prompt_optimizer = init_prompt_optimizer(llm)
    retriever = init_retriever(vector_db["search"], graph_db["query"])
    reranker = init_reranker(llm)
    reasoning_and_planning = init_reasoning_and_planning(llm)
    workflow_execution = init_workflow_execution(llm, graph_db["store_relationship"])
    generator = init_generator(llm)
    guardrails = init_guardrails(llm)

    # Define the state
    class State:
        """State for the Tax Copilot pipeline"""
        prompt: str
        goal: str = None
        context_info: Dict = None
        optimized_prompt: Dict = None
        retrieval_results: Dict = None
        reranked_results: Dict = None
        workflow_plan: Dict = None
        execution_result: Dict = None
        generated_response: str = None
        final_response: str = None

    # Create the graph
    workflow = StateGraph(State)

    # Add nodes to the graph
    workflow.add_node("context_engine", lambda state: {"context_info": context_engine(state.prompt, state.goal)})
    workflow.add_node("prompt_optimizer", lambda state: {"optimized_prompt": prompt_optimizer(state.context_info)})
    workflow.add_node("retriever", lambda state: {"retrieval_results": retriever(state.optimized_prompt["optimized_prompt"])})
    workflow.add_node("reranker", lambda state: {"reranked_results": reranker(state.retrieval_results)})
    workflow.add_node("reasoning_and_planning", lambda state: {"workflow_plan": reasoning_and_planning(state.reranked_results)})
    workflow.add_node("workflow_execution", lambda state: {"execution_result": workflow_execution(state.workflow_plan)})
    workflow.add_node("generator", lambda state: {"generated_response": generator(state.execution_result)})
    workflow.add_node("guardrails", lambda state: {"final_response": guardrails(state.generated_response)})

    # Add edges to the graph
    workflow.add_edge("context_engine", "prompt_optimizer")
    workflow.add_edge("prompt_optimizer", "retriever")
    workflow.add_edge("retriever", "reranker")
    workflow.add_edge("reranker", "reasoning_and_planning")
    workflow.add_edge("reasoning_and_planning", "workflow_execution")
    workflow.add_edge("workflow_execution", "generator")
    workflow.add_edge("generator", "guardrails")
    workflow.add_edge("guardrails", END)

    # Set the entry point
    workflow.set_entry_point("context_engine")

    # Compile the graph
    return workflow.compile()

In [None]:
# ----- MAIN FUNCTION TO INITIALIZE THE SYSTEM -----

def setup_tax_copilot_system():
    """
    Set up the Tax Copilot system with data processing and agent components

    Returns:
        Initialized Tax Copilot system
    """
    # Initialize LLM
    llm = get_llm()

    # Initialize databases
    vector_db = init_vector_database()
    graph_db = init_relationship_database()

    # Set up data processing pipeline
    data_crawler = init_data_crawler()
    data_cleaner = init_data_cleaner()
    data_chunker = init_data_chunker()
    data_encoder = init_data_encoder()

    # Process initial data (in a real system, this would fetch from ATO database)
    raw_data = data_crawler("initial tax regulations")
    cleaned_data = data_cleaner(raw_data)
    document_chunks = data_chunker(cleaned_data)
    documents, embeddings = data_encoder(document_chunks)

    # Store processed data in databases
    vector_db["store"](documents, embeddings)

    # Store documents in graph database
    for doc in documents:
        try:
            graph_db["store_node"](doc)
        except Exception as e:
            print(f"Warning: Could not store document in graph DB: {e}")

    # Create sample relationships (in a real system, these would be derived from the data)
    try:
        graph_db["store_relationship"]("1", "2", "RELATES_TO", {"type": "reference"})
        graph_db["store_relationship"]("2", "3", "SUPPLEMENTS", {"section": "deductions"})
    except Exception as e:
        print(f"Warning: Could not store relationships in graph DB: {e}")

    # Create the LangGraph for the Tax Copilot
    tax_copilot_graph = create_tax_copilot_graph()

    return tax_copilot_graph

def run_tax_copilot(graph, query, goal=None):
    """
    Run the Tax Copilot with a user query

    Args:
        graph: Compiled StateGraph for the Tax Copilot
        query: User query
        goal: Optional goal to guide the context engine

    Returns:
        Final response
    """
    # Create initial state
    initial_state = {"prompt": query, "goal": goal}

    # Run the graph
    result = graph.invoke(initial_state)

    return result["final_response"]

# ----- EXAMPLE USAGE -----

def main():
    """Example usage of the Tax Copilot system"""
    # Setup the system
    print("Setting up Tax Copilot system...")
    tax_copilot = setup_tax_copilot_system()

    # Example queries
    queries = [
        "What are the deduction limits for business expenses?",
        "How do I report foreign income on my tax return?",
        "Can I claim home office expenses if I'm working remotely?"
    ]

    # Run the queries
    for query in queries:
        print(f"\nProcessing query: {query}")
        response = run_tax_copilot(tax_copilot, query)
        print(f"\nResponse: {response}")

if __name__ == "__main__":
    main()