## LLMs

In [52]:
import os
from tkinter.messagebox import QUESTION

from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import display, Markdown
from src.data_processing import *
import asyncio
import warnings
warnings.filterwarnings("ignore")

from pinecone.grpc import PineconeGRPC as Pinecone
# Load environment variables from .env
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

In [53]:
def retriever(query: str, PINECONE_API_KEY: str, OPENAI_API_KEY:str, top_k: int = 20):

    # Load Pinecone Index
    pc = Pinecone(PINECONE_API_KEY)
    index = pc.Index("legal-hierarchical-rag")

    # Load OpenAI Client
    client = OpenAI(api_key=OPENAI_API_KEY)

    loop = asyncio.get_event_loop()

    # Asynchronously create embedding
    embedding_result = client.embeddings.create(input=query, model="text-embedding-3-large")
    xq = embedding_result.data[0].embedding

    # Asynchronously query the Pinecone index
    res = index.query(vector=xq, top_k=top_k, include_metadata=True)

    res.matches = sorted(res.matches, key=lambda chunk: int(chunk['id']))

    # Combine Chunks
    chunk_list = []
    for r in res.matches:
        chunk_list.append(r["metadata"]["content"])
    unique_chunks = list(set(chunk_list))
    question_context = " ".join(unique_chunks)
    return question_context

def chunk_retriever(query: str, PINECONE_API_KEY: str, OPENAI_API_KEY:str, top_k: int = 20):
    # Load Pinecone Index
    pc = Pinecone(PINECONE_API_KEY)
    index = pc.Index("legal-hierarchical-rag")

    # Load OpenAI Client
    client = OpenAI(api_key=OPENAI_API_KEY)

    loop = asyncio.get_event_loop()

    # Asynchronously create embedding
    embedding_result = client.embeddings.create(input=query, model="text-embedding-3-large")
    xq = embedding_result.data[0].embedding

    # Asynchronously query the Pinecone index
    res = index.query(vector=xq, top_k=top_k, include_metadata=True)
    res.matches = sorted(res.matches, key=lambda chunk: int(chunk['id']))

    # Combine Chunks
    chunk_list = []
    for r in res.matches:
        chunk_list.append(r["metadata"]["content"])
    unique_chunks = list(set(chunk_list))
    return unique_chunks, res

## Query

In [54]:
query = "What are the business requirements for Regulation K?"
question_context = retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=20)
unique_chunks, res = chunk_retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=20)
print(len(question_context), len(unique_chunks))

44616 20


## LLLMs

### Retrieval Grader

In [55]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=openai_api_key)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
docs = retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=40)
print(retrieval_grader.invoke({"document": docs, "question": query}))


binary_score='yes'


### Generate

In [56]:

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")
# LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=openai_api_key)
# Chain
rag_chain = prompt | llm | StrOutputParser()
# Run
generation = rag_chain.invoke({"context": docs, "question": query})
print(generation)

The business requirements for Regulation K include maintaining effective systems of records, controls, and reports to ensure high standards of banking and financial prudence in international operations. U.S. banking organizations must supervise their foreign branches and subsidiaries to conform to these standards, providing information on risk assets, market risk exposure, liquidity management, and internal controls. Additionally, they must comply with recordkeeping and reporting requirements, including submitting reports on foreign operations and maintaining segregation of asset and liability accounts for International Banking Facilities (IBFs).


### Hallucination Grader

In [57]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=openai_api_key)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({"documents": docs, "generation": generation})

GradeHallucinations(binary_score='yes')

### Answer Grader


In [58]:
# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=openai_api_key)
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = answer_prompt | structured_llm_grader
answer_grader.invoke({"question": query, "generation": generation})

GradeAnswer(binary_score='yes')

### Question Re-writer

In [59]:
### Question Re-writer

# LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=1, api_key=openai_api_key)

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n
     for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": query})

'What specific business requirements need to be adhered to under Regulation K?'

## Graph

### Base Class

In [60]:
from typing import List

from typing_extensions import TypedDict


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """

    question: str
    generation: str
    documents: List[str]
    filtered_documents: List[str]
    document_chunks: List[str]

### Nodes

In [61]:
def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever(query=question, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=20)
    return {"documents": documents, "question": question}

def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """
    question = state["question"]
    doc_chunks = state["document_chunks"]

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    print("Original Number of Documents: ", len(doc_chunks))
    filtered_docs = []
    for d in doc_chunks:
        try:
            score = retrieval_grader.invoke({"document": d, "question": question})
        except Exception as e:
            print(f"Error processing document: {e}")
            continue
        grade = score.binary_score
        if grade == "yes":
            filtered_docs.append(d)

    print("Number of Documents after Relevance Check: ", len(filtered_docs))
    print("NUmber of Documents deleted: ", len(doc_chunks)-len(filtered_docs))
    return {"documents": filtered_docs, "question": question}

def transform_query(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]

    # Re-write question
    better_question = question_rewriter.invoke({"question": question})
    return {"documents": documents, "question": better_question}




### Edges

In [62]:
def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    filtered_documents = state["documents"]

    if not filtered_documents:
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
        )
        return "transform_query"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

def grade_generation_v_documents_and_question(state):
    """
    This function grades the generation based on the provided documents and question.
    """
    print("---CHECK HALLUCINATIONS---")

    # Retrieve "question" and provide a fallback
    question = state.get("question", None)
    if question is None:
        raise ValueError("The 'question' key is missing in the state dictionary.")

    # Retrieve "filtered_documents" and provide a fallback or an error if it's missing
    documents = state.get("filtered_documents", None)
    if documents is None:
        raise ValueError("The 'filtered_documents' key is missing in the state dictionary.")

    generation = state.get("generation", None)
    if generation is None:
        raise ValueError("The 'generation' key is missing in the state dictionary.")

    # Example function call to hallucination_grader with the corrected state
    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    return score


## Build Graph

In [63]:
from langgraph.graph import END, StateGraph, START

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generatae
workflow.add_node("transform_query", transform_query)  # transform_query

# Build graph
workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)
workflow.add_edge("transform_query", "retrieve")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "transform_query",
    },
)

# Compile
app = workflow.compile()
app

ReadTimeout: HTTPSConnectionPool(host='mermaid.ink', port=443): Read timed out. (read timeout=10)

<langgraph.graph.state.CompiledStateGraph at 0x111dc96d0>

In [64]:
from pprint import pprint

# Run
inputs = {"question": query, "documents": question_context, "document_chunks": unique_chunks}
inputs = {"question": query, "documents": question_context, "document_chunks": unique_chunks, "filtered_documents":grade_documents(state=inputs)}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint("\n---\n")

# Final generation
pprint(value["generation"])
app


---CHECK DOCUMENT RELEVANCE TO QUESTION---
Original Number of Documents:  20


KeyboardInterrupt: 

In [65]:
# Run
query = "What does Regulation K, specifically Part 211, cover regarding the international banking operations of U.S. and foreign banking organizations, and how does it regulate nonbanking activities and the operations of foreign banks within the United States?"
question_context = retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=40)
unique_chunks, res = chunk_retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=40)

inputs = {"question": query, "documents": question_context, "document_chunks": unique_chunks}
unique_chunk_dictionary = grade_documents(state=inputs)
filtered_documents = unique_chunk_dictionary["documents"]
filtered_text = " ".join(filtered_documents)
generation = rag_chain.invoke({"context": filtered_text, "question": query})
print(generation)


---CHECK DOCUMENT RELEVANCE TO QUESTION---
Original Number of Documents:  40
Number of Documents after Relevance Check:  29
NUmber of Documents deleted:  11
Regulation K, specifically Part 211, governs the international banking operations of U.S. and foreign banking organizations. It covers permissible activities abroad, including commercial banking, financing, leasing, and investment services, and sets limits on nonbanking activities and investments. Additionally, it regulates the operations of foreign banks within the U.S., including the establishment and supervision of branches and agencies, and requires compliance with U.S. banking laws and standards.


In [73]:
count = 0
updated_res_matches = res.matches
for match in updated_res_matches:
    if match["metadata"]["content"] in filtered_documents:
        count += 1
        del match


In [74]:
count = 0
for match in updated_res_matches:
    if match["metadata"]["content"] in filtered_documents:
        count += 1

print(count)

29


In [49]:
sources = {}
for i in range(len(res.matches)):
    if res.matches[i]["metadata"]["title"] not in sources:
        sources[res.matches[i]["metadata"]["title"]] = 1
    else:
        sources[res.matches[i]["metadata"]["title"]] += 1

sources

{'CFR-2024-title12-vol4.txt': 1,
 'CFR-2024-title12-vol5.txt': 2,
 'CFR-2024-title12-vol2.txt': 29,
 'CFR-2024-title12-vol3.txt': 3,
 'CFR-2024-title12-vol1.txt': 5}

In [50]:
developer_prompt = '''
    # **Regulatory Requirement Extraction & Formatting**

    ## **Instructions**
    - Your task is to extract and format **clear, explicit legal requirements** from the given regulatory text.
    - These requirements should be high-level and actionable.
    - Avoid duplicate requirements.
    - Combine requirements where possible.
    - These requirements must be presented in a structured format for easy reference and compliance tracking.

    ---

    ## **Output Format**
    Provide the extracted requirements as a **CSV-parsable table** using the following structure:

    | Business Requirement |
    |----------------------|
    | [Extracted requirement 1] |
    | [Extracted requirement 2] |
    | … |

    - Each row must contain **one standalone regulatory requirement**.
    - Maintain the **exact wording** from the regulation—**do not paraphrase, summarize, or interpret**.
    ---


    # Inputs
    ## **Regulatory Text**
    {context}

    ## **Question**
    {question}

    ---  '''

# Create a chat prompt template using the detailed prompt.
prompt = ChatPromptTemplate([
        ("system", developer_prompt),
    ])

# Initialize the ChatOpenAI language model with a specific model name and temperature.
llm = ChatOpenAI(model_name="o3-mini", reasoning_effort="high", api_key=openai_api_key)

# Combine the prompt, the language model, and the output parser into a processing chain.
rag_chain = prompt | llm | StrOutputParser()

# Asynchronously invoke the chain with the provided inputs.
generation = rag_chain.invoke({
    "context": filtered_text,
    "question": query,
})

display(Markdown(generation))

| Business Requirement |
|----------------------|
| U.S. banking organizations conducting international operations under Regulation K Part 211 shall supervise and administer their foreign branches and subsidiaries in such a manner as to ensure that their operations conform to high standards of banking and financial prudence. |
| U.S. banking organizations shall maintain effective systems of records, controls, and reports—including providing in particular information on risk assets, exposure to market risk, liquidity management, operations, internal controls, legal and operational risk, and conformance to management policies—to keep management informed of the activities and condition of their foreign operations. |
| Reports on risk assets shall be sufficient to permit an appraisal of credit quality and assessment of exposure to loss, providing full information on the condition of material borrowers. |
| Each branch, agency, commercial lending company, or affiliate of a foreign bank shall be examined on-site at least once during each 12‑month period (or on an 18‑month cycle for qualifying small institutions) by the appropriate supervisory authority. |
| Permissible activities abroad for U.S. and foreign banking organizations shall be limited to those activities that are usual in connection with the transaction of banking or other financial operations abroad—as specified in Sec. 211.10(a)—including commercial banking, financing (such as commercial, consumer, mortgage banking, and factoring), leasing (of real or personal property), acting as fiduciary, underwriting insurance, providing advisory services, data processing, management consulting, underwriting and distributing equity and debt securities, operating a travel agency (in connection with financial services abroad), and engaging in commodity-swap transactions, all subject to the applicable limitations, prior notice, and approval procedures. |
| An investor that is well capitalized and well managed may, after providing 30 days’ prior written notice to the Board, underwrite equity securities outside the United States provided that the aggregate commitments do not exceed the applicable tier 1 capital limits specified in Sec. 211.10(a)(14); and an investor that is not well capitalized and well managed may underwrite such securities subject to an aggregate limit of $60 million following a 30‑day prior notice. |
| Any shares held in trading or dealing accounts for longer than 90 days shall be reported to the senior management of the investor. |
| Any banking organization that opens, closes, or relocates a branch shall report such change in a manner prescribed by the Board. |
| Applications, notices, and reports required under Regulation K Part 211 shall be filed with the appropriate Federal Reserve Bank, and the Board shall act on an application within 60 calendar days after receipt, unless an extension is communicated with its reasons. |
| A U.S. banking organization wishing to engage abroad in data processing or data transmission activities beyond those described in Regulation Y must apply for the Board’s prior consent under the procedures set out in Regulation K. |
| Foreign banks underwriting securities to be distributed in the United States shall be deemed to be engaged in U.S. operations and must either obtain financial holding company status under the Gramm‐Leach‐Bliley Act or secure authority under section 4(c)(8) of the Bank Holding Company Act, with all related underwriting activities (including the booking of fees and risk) subject to the relevant limitations. |
| Banking institutions shall submit to the Board, at least quarterly, reports regarding the amounts and composition of their holdings of international assets, and—in accordance with section 907(b) of the International Lending Supervision Act—such information on material concentrations shall be made publicly available upon request. |
| Regulation K Part 211 defines and regulates the nonbanking activities in which bank holding companies and foreign banking organizations may engage directly or through a subsidiary, subject to specific limits and subject to Board review and, where applicable, prior consent. |
| For foreign banks not subject to comprehensive consolidated supervision, the Board shall develop and publish criteria to evaluate the safety and soundness of their U.S. operations and may impose supervisory constraints or require agreements to ensure that those operations meet the required standards. |

In [51]:
developer_prompt = '''
    # **Regulatory Requirement Extraction & Formatting**

    ## **Instructions**
    - Your task is to extract and format **clear, explicit legal requirements** from the given regulatory text.
    - These requirements should be high-level and actionable.
    - Avoid duplicate requirements.
    - Combine requirements where possible.
    - These requirements must be presented in a structured format for easy reference and compliance tracking.
    - Remove any requirements that are not applicable to the given question.
    - Each requirements must be actionable
    - Provide the legislative reference for each requirement.
    ---

    ## **Output Format**
    Provide the extracted requirements as a **CSV-parsable table** using the following structure:

    | Business Requirement |
    |----------------------|
    | [Extracted requirement 1] |
    | [Extracted requirement 2] |
    | … |

    - Each row must contain **one standalone regulatory requirement**.
    - Maintain the **exact wording** from the regulation—**do not paraphrase, summarize, or interpret**.
    ---


    # Inputs
    ## **Regulatory Text**
    {}

    ---  '''.format(filtered_text)

client = OpenAI(api_key=openai_api_key)
completion = client.chat.completions.create(
        model="o3-mini",
        reasoning_effort="high",
        messages=[
            {"role": "developer", "content": developer_prompt},
            {"role": "user", "content": query}
        ]
    )

display(Markdown(completion.choices[0].message.content))


| Business Requirement |
|----------------------|
| "This subpart sets out rules governing the international and foreign activities of U.S. banking organizations, including procedures for establishing foreign branches and Edge and agreement corporations to engage in international banking, and for investments in foreign organizations. [12 CFR 211.1]" |
| "A U.S. banking organization that falls within the scope of this subpart—as a member bank, an Edge corporation, an agreement corporation, or a bank holding company—shall comply with the rules governing its international operations and investments. [12 CFR 211.1(c)]" |
| "A foreign bank applying to establish an office in the United States must submit an application and comply with the procedures and standards for approval as prescribed in this subpart. [12 CFR 211.24]" |
| "Unless specifically made eligible for the exemptions by the Board, a foreign banking organization shall qualify for the exemptions afforded by this section only if, disregarding its United States banking, more than half of its worldwide business is banking; and more than half of its banking business is outside the United States. [12 CFR 211.23(a)]" |
| "A foreign banking organization must meet at least two of the following requirements: (i) Banking assets held outside the United States exceed total worldwide nonbanking assets; (ii) Revenues derived from the business of banking outside the United States exceed total revenues derived from its worldwide nonbanking business; or (iii) Net income derived from the business of banking outside the United States exceeds total net income derived from its worldwide nonbanking business; and meet at least two of the following: (i) Banking assets held outside the United States exceed banking assets held in the United States; (ii) Revenues derived from the business of banking outside the United States exceed revenues derived from the business of banking in the United States; or (iii) Net income derived from the business of banking outside the United States exceeds net income derived from the business of banking in the United States. [12 CFR 211.23(a)(1)-(2)]" |
| "U.S. banking organizations conducting international operations shall supervise and administer their foreign branches and subsidiaries in such a manner as to ensure that their operations conform to high standards of banking and financial prudence, including maintaining effective systems of records, controls, and reports to keep management informed of their activities and condition. [12 CFR 211.13(a)(1)]" |
| "U.S. banking organizations shall make available to examiners the reports on condition—including internal and external audits and any other information necessary to determine compliance with U.S. banking law—as prescribed in this subpart. [12 CFR 211.13(c)]" |
| "Each branch or agency of a foreign bank shall be examined on-site at least once during each 12‑month period by the appropriate supervisory agency, with additional examinations as determined necessary by the Board. [12 CFR 211.26(c)(1)]" |
| "Direct and indirect investments made by U.S. banking organizations abroad shall be made in accordance with the general consent, limited general consent, prior notice, or specific consent procedures specified in this subpart, and in compliance with applicable minimum capital adequacy standards. [12 CFR 211.9]" |

## Question + Concise Summary

In [47]:
from src.concise_summary import *

query = "Describe Regulation K: Part 211 -- International Banking Operations?"
question_context = retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=10)
unique_chunks, res = chunk_retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=10)
summary = concise_summary(original_question=query, context=question_context, OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"))
inputs = {"question": query, "documents": question_context, "document_chunks": unique_chunks}
updated_query = transform_query(inputs)
print(summary)
print(updated_query["question"])

---TRANSFORM QUERY---
Regulation K, specifically Part 211, governs the international banking operations of U.S. banking organizations and foreign banking organizations. Its core purpose is to establish a framework for permissible nonbanking activities and to regulate the operations of foreign banks within the United States. The regulation outlines the authority, definitions, and procedures for foreign banking organizations, including their interstate banking operations, approval processes for establishing offices, and examination standards. It also addresses the limitations on loans, disclosure of supervisory information, and the criteria for evaluating U.S. operations of foreign banks. Overall, Regulation K aims to ensure that foreign banks comply with U.S. laws while facilitating their participation in the U.S. financial system, thereby promoting stability and transparency in international banking activities.
What is Regulation K: Part 211 about in terms of international banking oper

In [None]:
question_context = retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=10)
unique_chunks, res = chunk_retriever(query=query, PINECONE_API_KEY=pinecone_api_key, OPENAI_API_KEY=openai_api_key, top_k=10)
inputs = {"question": query, "documents": question_context, "document_chunks": unique_chunks}
unique_chunk_dictionary = grade_documents(state=inputs)
filtered_documents = unique_chunk_dictionary["documents"]
filtered_text = " ".join(filtered_documents)
print(len(question_context), len(filtered_text))

In [28]:
res.matches[0]["metadata"].keys()

dict_keys(['content', 'postchunk', 'title', 'id', 'prechunk', 'token_count'])

In [35]:
title_list[0]["title"]

'CFR-2024-title12-vol2.txt'