In [None]:
# ------------------------------------------------------------------------------
# DOCUMENT GENERATION WITH GUIDELINES
# ------------------------------------------------------------------------------
def generate_document_with_guidelines(
    user_query: str,
    guidelines: List[str],
    vector_store: VectorStore,
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Searches the VectorStore for relevant doc chunks based on user_query,
    then iterates over each guideline to produce a separate snippet.
    Returns one combined "document" with all guideline-based responses.
    """
    # 1) Retrieve top-k relevant documents
    search_results = vector_store.search(user_query, k=k)

    # 2) Build a single combined context from the top docs
    combined_context_parts = []
    for doc, sim_score in search_results:
        meta_str = ""
        if doc.metadata:
            meta_parts = [f"{key}: {val}" for key, val in doc.metadata.items()]
            meta_str = "\n".join(meta_parts)

        context_str = (
            f"---\nContent:\n{doc.content}\n"
            f"Metadata:\n{meta_str}\n"
            f"Similarity Score: {sim_score:.4f}\n---"
        )
        combined_context_parts.append(context_str)
    combined_context = "\n\n".join(combined_context_parts)

    # 3) Loop over each guideline, generate a snippet
    final_snippets = []
    for idx, guideline in enumerate(guidelines, start=1):
        prompt = f"""
You have the following user query:
{user_query}

Context from relevant documents (with metadata):
{combined_context}

Guideline #{idx}: {guideline}

Based on the user query and the above context, create a concise 
section of a final document that follows this guideline. 
Use only the provided context if needed.
"""
        snippet = get_completion(
            prompt,
            max_new_tokens=max_new_tokens,
            temperature=temperature
        )
        formatted_snippet = f"### GUIDELINE #{idx}: {guideline}\n{snippet.strip()}\n"
        final_snippets.append(formatted_snippet)

    # 4) Combine all guideline-based snippets
    final_document = "\n\n".join(final_snippets)
    return final_document


In [None]:
from typing import List
from LatestHelp import VectorStore, Document, get_completion

# ------------------------------------------------------------------------------
# DOCUMENT GENERATION WITH GUIDELINES (Separate Snippets)
# ------------------------------------------------------------------------------
def generate_document_with_guidelines(
    user_query: str,
    guidelines: List[str],
    vector_store: VectorStore,
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Searches the VectorStore for relevant doc chunks based on user_query,
    then iterates over each guideline to produce a separate snippet.
    Returns one combined "document" with all guideline-based responses.
    """
    # 1) Retrieve top-k relevant documents
    search_results = vector_store.search(user_query, k=k)

    # 2) Build a single combined context from the top docs
    combined_context_parts = []
    for doc, sim_score in search_results:
        meta_str = ""
        if doc.metadata:
            meta_parts = [f"{key}: {val}" for key, val in doc.metadata.items()]
            meta_str = "\n".join(meta_parts)

        context_str = (
            f"---\nContent:\n{doc.content}\n"
            f"Metadata:\n{meta_str}\n"
            f"Similarity Score: {sim_score:.4f}\n---"
        )
        combined_context_parts.append(context_str)
    combined_context = "\n\n".join(combined_context_parts)

    # 3) Loop over each guideline, generate a snippet
    final_snippets = []
    for idx, guideline in enumerate(guidelines, start=1):
        prompt = f"""
You have the following user query:
{user_query}

Context from relevant documents (with metadata):
{combined_context}

Guideline #{idx}: {guideline}

Based on the user query and the above context, create a concise 
section of a final document that follows this guideline. 
Use only the provided context if needed.
"""
        snippet = get_completion(
            prompt,
            max_new_tokens=max_new_tokens,
            temperature=temperature
        )
        formatted_snippet = f"### GUIDELINE #{idx}: {guideline}\n{snippet.strip()}\n"
        final_snippets.append(formatted_snippet)

    # 4) Combine all guideline-based snippets
    final_document = "\n\n".join(final_snippets)
    return final_document


# ------------------------------------------------------------------------------
# ALTERNATIVE: SINGLE CONSOLIDATED REPORT
# ------------------------------------------------------------------------------
def generate_consolidated_report(
    guidelines: List[str],
    vector_store: VectorStore,
    user_query: str = "",
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Loops over each guideline, retrieves relevant context, and
    builds ONE single consolidated report that addresses them all.
    Optionally includes the 'user_query' to provide more context
    for the final prompt if desired.
    """
    # Step 1: Create a dictionary to collect top retrieved references for each guideline
    all_retrieved = {}

    # Step 2: For each guideline, retrieve the top-k relevant doc chunks
    for guideline in guidelines:
        search_results = vector_store.search(guideline, k=k)  # [(Document, score), ...]
        
        # Gather doc contents
        relevant_passages = [doc.content for doc, _ in search_results]
        all_retrieved[guideline] = relevant_passages

    # Step 3: Build a single prompt that includes ALL guidelines and retrieved context
    final_prompt = "You are an expert tasked with creating ONE cohesive report.\n"
    if user_query:
        final_prompt += f"User Query: {user_query}\n"
    final_prompt += (
        "\nBelow are multiple guidelines, each followed by top retrieved passages.\n"
        "Use them to synthesize one final, cohesive document:\n\n"
    )

    for i, guideline in enumerate(guidelines, start=1):
        final_prompt += f"GUIDELINE {i}: {guideline}\n"
        final_prompt += "Top Matching Passages:\n"
        for j, passage in enumerate(all_retrieved[guideline], start=1):
            # Optionally truncate or segment the passage
            final_prompt += f"  [{j}] {passage[:500]}...\n"
        final_prompt += "\n"

    final_prompt += (
        "Please produce a SINGLE consolidated document that addresses all guidelines. "
        "Focus on clarity, cohesion, and completeness.\n\n"
        "Begin your final report now:\n\n"
    )

    # Step 4: Generate one final completion
    final_report = get_completion(
        prompt=final_prompt,
        max_new_tokens=max_new_tokens,
        temperature=temperature
    )

    return final_report


# ------------------------------------------------------------------------------
# Example Usage
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    # Example guidelines
    guidelines_list = [
        "Guideline 1: Handling user data under GDPR",
        "Guideline 2: Best practices for data retention",
        "Guideline 3: Encryption requirements at rest and in transit",
    ]

    # Example user query
    user_query = "What do we need to do to ensure compliance with user data regulations?"

    # Initialize or load a VectorStore
    store = VectorStore(persist_directory="rag_index")
    loaded = store.load_local_index()
    if not loaded:
        # If the index doesn't exist, you'd create it from documents:
        # docs = [Document(...), Document(...), ...]
        # store.create_index(docs, force_recreate=True)
        pass

    # Example 1: Generate separate guideline-based snippets
    final_doc_snippets = generate_document_with_guidelines(
        user_query, 
        guidelines_list, 
        vector_store=store,
        k=3, 
        max_new_tokens=512, 
        temperature=0.7
    )
    print("=== SEPARATE SNIPPETS ===")
    print(final_doc_snippets)

    # Example 2: Generate one consolidated final report
    consolidated_report = generate_consolidated_report(
        guidelines=guidelines_list,
        vector_store=store,
        user_query=user_query,
        k=3,
        max_new_tokens=512,
        temperature=0.7
    )
    print("\n=== SINGLE CONSOLIDATED REPORT ===")
    print(consolidated_report)


In [None]:
# ------------------------------------------------------------------------------
# EXAMPLE USAGE (Comment out if you only want library code)
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    # 1) Suppose we load & chunk a sample file (PDF, TXT, or CSV)
    file_path = "sample.txt"  # adjust to your actual file
    doc_chunks = DocumentProcessor.load_and_chunk_file(file_path)

    # 2) Create a vector store and index the doc chunks
    vs = VectorStore(persist_directory="rag_index")
    vs.create_index(doc_chunks, force_recreate=True)

    # 3) Some example guidelines
    my_guidelines = [
        "Provide a step-by-step approach.",
        "Use plain language for a broad audience."
    ]

    # 4) Generate a combined doc addressing each guideline
    user_query = "How can I plan my personal finances effectively?"
    final_doc = generate_document_with_guidelines(
        user_query=user_query,
        guidelines=my_guidelines,
        vector_store=vs,
        k=3,
        max_new_tokens=512,
        temperature=0.7
    )

    print("\n===== FINAL DOCUMENT =====\n")
    print(final_doc)