In [None]:
def load_file(file_path: str) -> List[Document]:
    """
    Load a file (PDF, TXT, or CSV), clean the text, and chunk it into Document objects.
    """
    ext = os.path.splitext(file_path)[1].lower()

    if ext == ".pdf":
        text = DocumentProcessor.load_pdf(file_path)
    elif ext == ".txt":
        text = DocumentProcessor.load_txt(file_path)
    elif ext == ".csv":
        text = DocumentProcessor.load_csv(file_path)
    else:
        raise ValueError(f"Unsupported file format: {ext}")

    # Chunk the text
    documents = DocumentProcessor.chunk_text(text)
    return documents


def load_documents(file_path):
    """
    Load a file (PDF, TXT, or CSV) into Documents, then create a new VectorStore index.
    """
    # Load and chunk document
    documents = load_file(file_path)

    # Create and return vector store
    vector_store = VectorStore()  # or HybridSearcher() if you want both vector + keyword
    vector_store.create_index(documents, force_recreate=True)
    print(f"Processed {len(documents)} chunks from file: {file_path}")

    return vector_store


vector_store = load_documents("/commons/corpra_share/k152356/ReAct_Testing/metatext.txt")


In [None]:
# ------------------------------------------------------------------------------
# DOCUMENT GENERATION WITH GUIDELINES
# ------------------------------------------------------------------------------
def generate_document_with_guidelines(
    user_query: str,
    guidelines: List[str],
    vector_store: VectorStore,
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Searches the VectorStore for relevant doc chunks based on user_query,
    then iterates over each guideline to produce a separate snippet.
    Returns one combined "document" with all guideline-based responses.
    """
    # 1) Retrieve top-k relevant documents
    search_results = vector_store.search(user_query, k=k)

    # 2) Build a single combined context from the top docs
    combined_context_parts = []
    for doc, sim_score in search_results:
        meta_str = ""
        if doc.metadata:
            meta_parts = [f"{key}: {val}" for key, val in doc.metadata.items()]
            meta_str = "\n".join(meta_parts)

        context_str = (
            f"---\nContent:\n{doc.content}\n"
            f"Metadata:\n{meta_str}\n"
            f"Similarity Score: {sim_score:.4f}\n---"
        )
        combined_context_parts.append(context_str)
    combined_context = "\n\n".join(combined_context_parts)

    # 3) Loop over each guideline, generate a snippet
    final_snippets = []
    for idx, guideline in enumerate(guidelines, start=1):
        prompt = f"""
You have the following user query:
{user_query}

Context from relevant documents (with metadata):
{combined_context}

Guideline #{idx}: {guideline}

Based on the user query and the above context, create a concise 
section of a final document that follows this guideline. 
Use only the provided context if needed.
"""
        snippet = get_completion(
            prompt,
            max_new_tokens=max_new_tokens,
            temperature=temperature
        )
        formatted_snippet = f"### GUIDELINE #{idx}: {guideline}\n{snippet.strip()}\n"
        final_snippets.append(formatted_snippet)

    # 4) Combine all guideline-based snippets
    final_document = "\n\n".join(final_snippets)
    return final_document


In [None]:
def generate_document_with_guidelines(
    user_query: str,
    guidelines: list,
    vector_store: VectorStore,
    k: int = 3,
    max_new_tokens: int = 512,
    temperature: float = 0.7
) -> str:
    """
    Searches the VectorStore for relevant document chunks based on user_query,
    then iterates over each guideline to produce a separate snippet.
    Returns one combined "document" with all guideline-based responses.
    
    Parameters:
      user_query (str): The model validation question.
      guidelines (list): A list of guideline strings that define sections of the final document.
      vector_store (VectorStore): An instance of the VectorStore used for retrieving context.
      k (int): The number of top documents to retrieve from the vector store.
      max_new_tokens (int): Maximum tokens for each generated snippet.
      temperature (float): Sampling temperature for generation.
    
    Returns:
      str: The final combined document based on the generated guideline sections.
    """
    # 1) Retrieve top-k relevant documents
    search_results = vector_store.search(user_query, k=k)

    # 2) Build a combined context from the top documents
    combined_context_parts = []
    for doc, sim_score in search_results:
        meta_str = ""
        if doc.metadata:
            meta_parts = [f"{key}: {val}" for key, val in doc.metadata.items()]
            meta_str = "\n".join(meta_parts)
        context_str = (
            f"---\nContent:\n{doc.content}\n"
            f"Metadata:\n{meta_str}\n"
            f"Similarity Score: {sim_score:.4f}\n---"
        )
        combined_context_parts.append(context_str)
    combined_context = "\n\n".join(combined_context_parts)

    # 3) Iterate over each guideline and generate the associated snippet
    final_snippets = []
    for idx, guideline in enumerate(guidelines, start=1):
        prompt = f"""
Using this RAG function, I want you to implement a hardcoded version of the ReAct framework to do the following:
Your task is to assist a Quantitative Model Validator working in the Model Risk Management team of a bank, to find answers to policy questions about Model Development Document (MDD) based on the provided context.
Contents of the subsection(s) of MDD is used as the only input context to answer the Model Validation policy questions. You are a highly accurate assistant who strictly answers only based on the information in the provided context.

Strictly follow these Generation Instructions:
- Your response should be accurate, coherent, detailed, and descriptive by including all the important statistics, tables, terminologies, and definitions.
- Your response should be relevant to the question being asked.
- Your response should be honest, focused, and grounded in the provided context.
- Do not change or assume any definition, terminology, statistical data, numerical information, or table information.
- Always respond with "Not found" when you cannot find relevant information in the context.
- Always respond with "Not found" if any information asked is not explicitly mentioned.
- Use the important keywords and phrases from the context to frame your response.
- Use bullet points only when required.
- Only use the information provided under the specific product, business segment or aspect when answering questions. If the context includes details about multiple products, ensure your response is limited to the product specified in the query. Do not include information from other products, businesses, or other aspects.

You have the following user query:
{user_query}

Context from relevant documents (with metadata):
{combined_context}

Guideline #{idx}: {guideline}

Based on the user query and the above context, create a concise section of a final document that follows this guideline.
Focus on using the available context effectively.
"""
        # Generate snippet using the LLaMA model's get_completion function
        snippet = get_completion(prompt, max_new_tokens=max_new_tokens, temperature=temperature)
        formatted_snippet = f"### GUIDELINE #{idx}: {guideline}\n{snippet.strip()}\n"
        final_snippets.append(formatted_snippet)

    # 4) Combine all guideline-based snippets into one document
    final_document = "\n\n".join(final_snippets)
    return final_document


In [None]:
if __name__ == "__main__":
    # Define a sample user query and a list of guidelines for generating the document sections.
    user_query = "Describe the growth of the portfolio over the past 5, 10 years."
    guidelines = [
        "Briefly describe the business portfolio to which the model applies",
        "Include the products and business segments offered by the business line",
        "Describe the products of the LOB and portfolio to which this model applies.",
        "Describe any current or planned changes in the products, channels, policies, programs, organization, or marketing practices that may impact the model under consideration.",
        "Assess how close the current customer base to the target customer profile is.",
        "Consider whether the customer base is likely to shift over the lifetime of the model.",
        "Specify the current and possible future market conditions and the impact they may have on the portfolio and the model.",
        "Describe the growth of the portfolio over the past 5, 10, X years, both in size and in significance to the balance sheet.",
        "If this is a revalidation or the model replaces an existing model, highlight the key relevant changes on the business between the previous model developments and model validations, and this validation.",
        "Include all the changes related to modeling, such as model framework and theory, variables, data sources, and programs; business changes, such as policy or strategy; environmental changes, such as competitor actions, economic changes, and political or regulatory changes; and any other changes that impact the model, its implementation, evaluation, and usage.",
        "Include a table or summary of the portfolio, product, or business metrics of the business in the most recent and past periods. These can include metrics such as balances, losses, recoveries, number of accounts, average account size, credit limits, etc."
    ]

    # Generate the final document by integrating the context with guideline responses.
    final_document = generate_document_with_guidelines(
        user_query=user_query,
        guidelines=guidelines,
        vector_store=vector_store,  # Assuming vector_store is defined and initialized
        k=3,
        max_new_tokens=512,
        temperature=0.7
    )

    print("Final Document:")
    print(final_document)


In [None]:
import os
from LatestHelp import (
    load_documents,
    get_completion,  # This is used for generating text completions.
    Document,        # The Document data structure.
)

# Dummy implementation for generate_chat_template.
# Replace this with your actual generation function as required.
def generate_chat_template(model, tokenizer, messages, temperature, max_new_tokens):
    # For demonstration, this dummy function combines all messages and calls get_completion.
    # In practice, you'd likely use a conversation-aware prompting method.
    combined_prompt = "\n".join(msg["content"] for msg in messages)
    return get_completion(combined_prompt, max_new_tokens=max_new_tokens, temperature=temperature)

def rag_react_generate_mdd_documentation(file_path, model_metadata, sub_section_name, guidelines, model, tokenizer):
    """
    Generates a Model Development Document (MDD) subsection report using a RAG-based ReAct prompting method.
    
    This function uses the RAG implementation in LatestHelp.py to load and chunk the input file,
    builds a vector store, and then retrieves context from the stored documents. The retrieved context,
    combined with the provided model metadata and subsection title, is used as the only knowledge base
    when answering the policy questions.

    Parameters:
      - file_path (str): Path to the file (PDF, TXT, or CSV) containing the MDD information.
      - model_metadata (str): Metadata describing the model.
      - sub_section_name (str): Title of the MDD subsection.
      - guidelines (list): A list of guideline questions/statements for the individual answers.
      - model: The language model (e.g., a Hugging Face model).
      - tokenizer: The tokenizer corresponding to the model.
    
    Returns:
      - outputs (list): The list of individual guideline responses and the final comprehensive report.
    """
    # Use the RAG implementation from LatestHelp.py to load documents and create a vector store.
    vector_store = load_documents(file_path)
    
    # Combine all document chunks into one context string.
    # (Alternatively, you could perform per-guideline retrieval, but here we combine for simplicity.)
    context = "\n".join(doc.content for doc in vector_store.documents)
    
    # Define the system prompt for individual responses.
    system_prompt_individual_answers = (
        "Your task is to assist a Quantitative Model Validator working in the Model Risk Management team of a bank, to find answers to policy "
        "questions about Model Development Document (MDD) based on the provided context. \n"
        "Contents of the subsection(s) of MDD is used as the only input context to answer the Model Validation policy questions. You are a "
        "highly accurate assistant who strictly answers only based on the information in the provided context.\n\n"
        "Strictly follow these Generation Instructions:\n"
        "- Your response should be accurate, coherent, detailed, and descriptive by including all the important statistics, tables, terminologies, and definitions.\n"
        "- Your response should be relevant to the question being asked.\n"
        "- Your response should be honest, focused, and grounded in the provided context.\n"
        "- Do not change or assume any definition, terminology, statistical data, numerical information, or table information.\n"
        "- Always respond with \"Not found\" when you cannot find relevant information in the context.\n"
        "- Always respond with \"Not found\" if any information asked is not explicitly mentioned.\n"
        "- Use the important keywords and phrases from the context to frame your response.\n"
        "- Use bullet points only when required.\n"
        "- Only use the information provided under the specific product, business segment or aspect when answering questions. If the context includes details about multiple products, ensure your response is limited to the product specified in the query. Do not include information from other products, businesses, or other aspects."
    )
    
    # Define the main prompt that embeds the model metadata, subsection name, and the retrieved context.
    prompt = f"""
Consider model with metadata:
{model_metadata}

You are provided with long and detailed contents of subsection of MDD titled {sub_section_name} to be used as the only context.

Context:
{context}

DO NOT USE ANY OTHER INFORMATION OR YOUR OWN KNOWLEDGE APART FROM THE CONTEXT.
Based on the above context information, provide a detailed, coherent and complete answer to the following question (treat a statement as a direct question and answer accordingly)
"""
    
    # Initialize the conversation with the two system prompts.
    messages = [
        {"role": "system", "content": system_prompt_individual_answers},
        {"role": "system", "content": prompt}
    ]
    
    outputs = []
    
    # Loop through each guideline to obtain individual responses.
    for guideline in guidelines:
        # Each guideline becomes a user prompt.
        guideline_message = [{"role": "user", "content": guideline}]
        output = generate_chat_template(
            model=model,
            tokenizer=tokenizer,
            messages=guideline_message,
            temperature=0.2,
            max_new_tokens=8000
        )
        messages.append({"role": "assistant", "content": output})
        
        print(guideline)
        print("#" * 15)
        print(output)
        print("\n")
        
        outputs.append(output)
    
    # Build the final summarization prompt to merge all responses.
    system_prompt_summarize_answers = f"""
All the above answers to policy questions are required to create subsection titled {sub_section_name} for the Model Validation Document (MVD) report.
Combine all the previous answers from the assistant in the same order and create a comprehensive and continuous report while following these instructions:
- Final response should be continuous, accurate, coherent, detailed and descriptive
- Final response should be grounded in the provided input context information, policy questions and their generated answers
- Including all the statistics, tables, terminologies and definitions from the individual answers
"""
    messages = messages[:-2]  # Adjust conversation history if required.
    messages.append({"role": "user", "content": system_prompt_summarize_answers})
    
    final_output = generate_chat_template(
        model=model,
        tokenizer=tokenizer,
        messages=messages,
        temperature=0.2,
        max_new_tokens=8000
    )
    messages.append({"role": "assistant", "content": final_output})
    
    print("#" * 10)
    print(final_output)
    
    outputs.append(final_output)
    
    return outputs

# ----------------------------------------------------------------------------
# Example usage:
if __name__ == "__main__":
    # Define parameters for the MDD subsection.
    file_path = "/commons/corpra_share/k152356/ReAct_Testing/metatext.txt"
    model_metadata = "Example model metadata details, including version, type, and performance metrics."
    sub_section_name = "Business Portfolio Overview"
    guidelines = [
        "Briefly describe the business portfolio to which the model applies",
        "Include the products and business segments offered by the business line",
        "Describe the products of the LOB and portfolio to which this model applies.",
        "Describe any current or planned changes in the products, channels, policies, programs, organization, or marketing practices that may impact the model under consideration.",
        "Assess how close the current customer base to the target customer profile is.",
        "Consider whether the customer base is likely to shift over the lifetime of the model.",
        "Specify the current and possible future market conditions and the impact they may have on the portfolio and the model.",
        "Describe the growth of the portfolio over the past 5, 10, X years, both in size and in significance to the balance sheet.",
        "If this is a revalidation or the model replaces an existing model, highlight the key relevant changes on the business between the previous model developments and model validations, and this validation.",
        "Include all the changes related to modeling, such as model framework and theory, variables, data sources, and programs; "
        "business changes, such as policy or strategy; environmental changes, such as competitor actions, economic changes, and political or regulatory changes; "
        "and any other changes that impact the model, its implementation, evaluation, and usage.",
        "Include a table or summary of the portfolio, product, or business metrics of the business in the most recent and past periods. "
        "These can include metrics such as balances, losses, recoveries, number of accounts, average account size, credit limits, etc."
    ]
    
    # Assume model and tokenizer are already initialized as in LatestHelp.py.
    model = None     # Replace with the actual model instance.
    tokenizer = None # Replace with the actual tokenizer instance.
    
    outputs = rag_react_generate_mdd_documentation(file_path, model_metadata, sub_section_name, guidelines, model, tokenizer)
    
    print("Final generated outputs:")
    for idx, output in enumerate(outputs):
        print(f"Output {idx+1}:\n{output}\n")


In [None]:
# ------------------------------------------------------------------------------
# System prompt for individual MDD answers
# ------------------------------------------------------------------------------
system_prompt_individual_answers = (
    "Your task is to assist a Quantitative Model Validator working in the Model Risk Management team of a bank, "
    "to find answers to policy questions about Model Development Document (MDD) based on the provided context. \n"
    "Contents of the subsection(s) of MDD is used as the only input context to answer the Model Validation policy questions. You are a "
    "highly accurate assistant who strictly answers only based on the information in the provided context.\n\n"
    "Strictly follow these Generation Instructions:\n"
    "- Your response should be accurate, coherent, detailed, and descriptive by including all the important statistics, tables, terminologies, and definitions.\n"
    "- Your response should be relevant to the question being asked.\n"
    "- Your response should be honest, focused, and grounded in the provided context.\n"
    "- Do not change or assume any definition, terminology, statistical data, numerical information, or table information.\n"
    "- Always respond with \"Not found\" when you cannot find relevant information in the context.\n"
    "- Always respond with \"Not found\" if any information asked is not explicitly mentioned.\n"
    "- Use the important keywords and phrases from the context to frame your response.\n"
    "- Use bullet points only when required.\n"
    "- Only use the information provided under the specific product, business segment or aspect when answering questions. If the context includes details about multiple products, ensure your response is limited to the product specified in the query. Do not include information from other products, businesses, or other aspects."
)

# ------------------------------------------------------------------------------
# List of guidelines for which to generate answers
# ------------------------------------------------------------------------------
guidelines = [
    "Briefly describe the business portfolio to which the model applies",
    "Include the products and business segments offered by the business line",
    "Describe the products of the LOB and portfolio to which this model applies.",
    "Describe any current or planned changes in the products, channels, policies, programs, organization, or marketing practices that may impact the model under consideration.",
    "Assess how close the current customer base to the target customer profile is.",
    "Consider whether the customer base is likely to shift over the lifetime of the model.",
    "Specify the current and possible future market conditions and the impact they may have on the portfolio and the model.",
    "Describe the growth of the portfolio over the past 5, 10, X years, both in size and in significance to the balance sheet.",
    "If this is a revalidation or the model replaces an existing model, highlight the key relevant changes on the business between the previous model developments and model validations, and this validation.",
    "Include all the changes related to modeling, such as model framework and theory, variables, data sources, and programs; "
    "business changes, such as policy or strategy; environmental changes, such as competitor actions, economic changes, and political or regulatory changes; "
    "and any other changes that impact the model, its implementation, evaluation, and usage.",
    "Include a table or summary of the portfolio, product, or business metrics of the business in the most recent and past periods. "
    "These can include metrics such as balances, losses, recoveries, number of accounts, average account size, credit limits, etc."
]

# ------------------------------------------------------------------------------
# Function to answer guidelines using RAG with role declarations.
# ------------------------------------------------------------------------------
def answer_guidelines_rag_chat(context: str, guidelines: List[str], max_new_tokens: int = 512, temperature: float = 0.2) -> List[str]:
    # Initialize conversation with system instructions and the context.
    messages = [
        {"role": "system", "content": system_prompt_individual_answers},
        {"role": "system", "content": f"Context:\n{context}"}
    ]
    outputs = []
    
    # Loop through each guideline.
    for guideline in guidelines:
        # Append the guideline as a user message.
        messages.append({"role": "user", "content": guideline})
        # Generate a response using the chat-based completion function.
        response = get_completion_from_messages(messages, max_new_tokens=max_new_tokens, temperature=temperature)
        # Append the assistant's reply to the message history.
        messages.append({"role": "assistant", "content": response})
        # Print the guideline and its answer.
        print("Guideline:")
        print(guideline)
        print("#" * 30)
        print("Answer:")
        print(response)
        print("\n")
        outputs.append(response)
    
    return outputs

# ------------------------------------------------------------------------------
# Main execution: Load documents, build vector store, and run guidelines using RAG
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    # Load and index your document file.
    vector_store = load_documents("/commons/corpra_share/k152356/ReAct_Testing/metatext.txt")
    
    # For each guideline, you could also retrieve context from the vector store. For demonstration,
    # we'll assume you wish to use the full context from the file.
    # You might retrieve context for a specific guideline as follows:
    # results = vector_store.search(guideline, k=3)
    # context = "\n\n".join([doc.content for doc, _ in results])
    #
    # Here, we assume 'context' is an aggregated extracted MDD text. You can retrieve and pass the specific context you desire.
    aggregated_context = "\n\n".join([doc.content for doc in vector_store.documents])
    
    # Answer all guidelines using the chat-based role declaration approach.
    guideline_answers = answer_guidelines_rag_chat(aggregated_context, guidelines)

In [None]:
# ------------------------------------------------------------------------------
# Chat-based get_completion implementation with role declarations.
# ------------------------------------------------------------------------------
def get_completion_from_messages(messages: List[Dict[str, str]], max_new_tokens: int = 512, temperature: float = 0.7) -> str:
    # Combine the messages into a single prompt string with clear role markers.
    prompt = "\n".join(f"{msg['role'].capitalize()}: {msg['content']}" for msg in messages)
    input_data = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=1024,
    ).to(device)
    with torch.no_grad():
        output = completion_model.generate(
            **input_data,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            pad_token_id=tokenizer.eos_token_id,
            repetition_penalty=1.2
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)