In [None]:
# ------------------------------------------------------------------------------
# DOCUMENT GENERATION WITH GUIDELINES
# ------------------------------------------------------------------------------
def generate_document_with_guidelines(
    user_query: str,
    guidelines: List[str],
    vector_store: VectorStore,
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Searches the VectorStore for relevant doc chunks based on user_query,
    then iterates over each guideline to produce a separate snippet.
    Returns one combined "document" with all guideline-based responses.
    """
    # 1) Retrieve top-k relevant documents
    search_results = vector_store.search(user_query, k=k)

    # 2) Build a single combined context from the top docs
    combined_context_parts = []
    for doc, sim_score in search_results:
        meta_str = ""
        if doc.metadata:
            meta_parts = [f"{key}: {val}" for key, val in doc.metadata.items()]
            meta_str = "\n".join(meta_parts)

        context_str = (
            f"---\nContent:\n{doc.content}\n"
            f"Metadata:\n{meta_str}\n"
            f"Similarity Score: {sim_score:.4f}\n---"
        )
        combined_context_parts.append(context_str)
    combined_context = "\n\n".join(combined_context_parts)

    # 3) Loop over each guideline, generate a snippet
    final_snippets = []
    for idx, guideline in enumerate(guidelines, start=1):
        prompt = f"""
You have the following user query:
{user_query}

Context from relevant documents (with metadata):
{combined_context}

Guideline #{idx}: {guideline}

Based on the user query and the above context, create a concise 
section of a final document that follows this guideline. 
Use only the provided context if needed.
"""
        snippet = get_completion(
            prompt,
            max_new_tokens=max_new_tokens,
            temperature=temperature
        )
        formatted_snippet = f"### GUIDELINE #{idx}: {guideline}\n{snippet.strip()}\n"
        final_snippets.append(formatted_snippet)

    # 4) Combine all guideline-based snippets
    final_document = "\n\n".join(final_snippets)
    return final_document


In [None]:
def run_react_chain(user_query: str, combined_context: str, guideline: str, idx: int,
                    max_new_tokens: int = 1024, temperature: float = 0.7) -> str:
    """
    Constructs a chain-of-thought prompt using the ReAct framework for a single guideline,
    and uses the LLaMA get_completion function (from LatestHelp.py) to generate a response.
    
    Parameters:
      user_query (str): The model validation question.
      combined_context (str): The extracted context from MDD subsections containing model information.
      guideline (str): The specific guideline instruction.
      idx (int): The guideline index number.
      max_new_tokens (int): Maximum token length for the LLaMA model output.
      temperature (float): Sampling temperature for the output generation.
    
    Returns:
      str: The response generated by the LLaMA model.
    """
    
    prompt = f"""
Using this RAG function, I want you to implement a hardcoded version of the ReAct framework to do the following:
Your task is to assist a Quantitative Model Validator working in the Model Risk Management team of a bank, to find answers to policy questions about Model Development Document (MDD) based on the provided context.
Contents of the subsection(s) of MDD is used as the only input context to answer the Model Validation policy questions. You are a highly accurate assistant who strictly answers only based on the information in the provided context.

Strictly follow these Generation Instructions:
- Your response should be accurate, coherent, detailed, and descriptive by including all the important statistics, tables, terminologies, and definitions.
- Your response should be relevant to the question being asked.
- Your response should be honest, focused, and grounded in the provided context.
- Do not change or assume any definition, terminology, statistical data, numerical information, or table information.
- Always respond with "Not found" when you cannot find relevant information in the context.
- Always respond with "Not found" if any information asked is not explicitly mentioned.
- Use the important keywords and phrases from the context to frame your response.
- Use bullet points only when required.
- Only use the information provided under the specific product, business segment or aspect when answering questions. If the context includes details about multiple products, ensure your response is limited to the product specified in the query. Do not include information from other products, businesses, or other aspects.

### Thought
[Your initial analysis of the problem]
### Action: Step 1
[First concrete action to take/solve]
### Observation
[What are the results from this action?]
### Action: Step 2
[Next action based on previous observation]
### Observation
[Results from second action]

...(repeat actions/observations as needed)

### Final Answer
[Your conclusive answer based on the chain of reasoning]

You have the following user query:
{user_query}

Context from relevant documents (with metadata):
{combined_context}

Guideline #{idx}: {guideline}

Based on the user query and the above context, create a concise section of a final document that follows this guideline.
Focus on using the available context effectively.
"""
    # Call the LLaMA model's get_completion function (from LatestHelp.py) to generate the answer.
    return get_completion(prompt, max_new_tokens=max_new_tokens, temperature=temperature)


def run_react_chain_loop(user_query: str, combined_context: str, guidelines: list,
                         max_new_tokens: int = 1024, temperature: float = 0.7) -> dict:
    """
    Iterates over a list of guidelines, calling the run_react_chain function for each one.
    Returns a dictionary mapping guideline indices to their respective generated responses.
    
    Parameters:
      user_query (str): The model validation question.
      combined_context (str): The extracted context containing model information.
      guidelines (list): A list of guideline strings that each describe a section of the final document.
      max_new_tokens (int): Maximum token length for each LLaMA model output.
      temperature (float): Sampling temperature for the output generation.
    
    Returns:
      dict: A dictionary where each key is the guideline index and the value is the generated answer.
    """
    results = {}
    for idx, guideline in enumerate(guidelines, start=1):
        answer = run_react_chain(user_query, combined_context, guideline, idx,
                                 max_new_tokens=max_new_tokens, temperature=temperature)
        results[idx] = answer
    return results


# Example usage:
if __name__ == "__main__":
    # The user query and combined_context should be replaced with actual model validation details and context.
    sample_query = "Describe the growth of the portfolio over the past 5, 10 years."
    sample_context = (
        "Section X: The portfolio has shown consistent growth in balance sheet contributions "
        "over the past 10 years with a 5% annual increase. Additional details about products "
        "and market conditions are stored in subsequent sections."
    )
    
    guidelines = [
        "Briefly describe the business portfolio to which the model applies",
        "Include the products and business segments offered by the business line",
        "Describe the products of the LOB and portfolio to which this model applies.",
        "Describe any current or planned changes in the products, channels, policies, programs, organization, or marketing practices that may impact the model under consideration.",
        "Assess how close the current customer base to the target customer profile is.",
        "Consider whether the customer base is likely to shift over the lifetime of the model.",
        "Specify the current and possible future market conditions and the impact they may have on the portfolio and the model.",
        "Describe the growth of the portfolio over the past 5, 10, X years, both in size and in significance to the balance sheet.",
        "If this is a revalidation or the model replaces an existing model, highlight the key relevant changes on the business between the previous model developments and model validations, and this validation.",
        "Include all the changes related to modeling, such as model framework and theory, variables, data sources, and programs; "
        "business changes, such as policy or strategy; environmental changes, such as competitor actions, economic changes, and political or regulatory changes; "
        "and any other changes that impact the model, its implementation, evaluation, and usage.",
        "Include a table or summary of the portfolio, product, or business metrics of the business in the most recent and past periods. "
        "These can include metrics such as balances, losses, recoveries, number of accounts, average account size, credit limits, etc."
    ]
    
    outputs = run_react_chain_loop(sample_query, sample_context, guidelines)
    for idx, answer in outputs.items():
        print(f"Guideline #{idx} Answer:")
        print(answer)
        print("----------------------------------------------------")
