In [None]:
def query_with_guidelines(
    query: str,
    guidelines: List[str],
    vector_store: VectorStore,
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Executes a search against your existing vector store, then loops over each
    guideline to generate a separate response snippet. Returns one combined output.

    Parameters:
      - query (str): The user's original question or prompt.
      - guidelines (List[str]): A list of guidelines to loop through.
      - vector_store (VectorStore): Your existing VectorStore with a .search(...) method.
      - k (int): Number of top matching chunks to retrieve from the vector store.
      - max_new_tokens (int): Max tokens for each LLM response.
      - temperature (float): Sampling temperature for the LLM.

    Returns:
      - A single string that combines each response snippet under its respective guideline.
    """

    # 1) Search your vector store for top-k relevant chunks:
    search_results = vector_store.search(query, k=k)
    
    # 2) Optionally, assemble the top chunks into a single context string:
    combined_context = "\n\n".join([doc.content for doc, _ in search_results])

    # 3) Loop over each guideline and generate a snippet for each:
    final_output = []
    for idx, guideline in enumerate(guidelines, start=1):
        # Build a prompt that includes:
        #   - The original user query
        #   - The combined context from your top k search results
        #   - The specific guideline to follow
        prompt = f"""
You are given the following user query:
{query}

Here is some relevant context from our documents:
{combined_context}

Now apply the following guideline:
{guideline}

Please generate a concise response that follows this guideline.
"""

        # 4) Use your LLM completion function to get the snippet:
        snippet = get_completion(
            prompt,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
        )

        # 5) Format this snippet and store it:
        formatted_snippet = (
            f"### Guideline {idx}: {guideline}\n\n"
            f"**Response**:\n{snippet.strip()}\n"
            "----------------------------------------\n"
        )
        final_output.append(formatted_snippet)

    # 6) Combine all guideline-based responses into one big string:
    return "\n".join(final_output)


In [None]:
my_guidelines = [
    "Use formal language.",
    "Focus on the financial aspects.",
    "Provide a step-by-step plan."
]

response_text = query_with_guidelines(
    query="How should I allocate my budget?",
    guidelines=my_guidelines,
    vector_store=existing_vector_store,
    k=3,               # top-3 relevant chunks
    max_new_tokens=512,
    temperature=0.7
)

print(response_text)
