In [1]:
import time

from IPython.display import Markdown, display  # Import
from openai import OpenAI
import warnings
import tiktoken
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os
from dotenv import load_dotenv
import pandas as pd
from io import StringIO
from src.business_requirements import *
from src.prompts import *

warnings.filterwarnings("ignore")
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")


## Query Question

In [2]:
query = "What are the business requirements for Regulation K?"
response = verbatim_business_requirements(query, top_k=25)
display(Markdown(response))

Business Requirement^^
"Within three days of a decision to disapprove an investment, the Board shall notify the investor in writing and state the reasons for the disapproval."^^
"An investment in an export trading company that has not been disapproved shall be made within one year from the date of the notice not to disapprove, unless the time period is extended by the Board or by the appropriate Federal Reserve Bank."^^
"No foreign bank may establish a branch or an agency, or acquire ownership or control of a commercial lending company, without the prior approval of the Board."^^
"A foreign bank shall submit an application to, and obtain prior approval from, the OCC before it establishes a Federal branch or agency or exercises fiduciary powers at a Federal branch."^^
"A foreign bank must receive a license from the OCC to open and operate its initial Federal branch or agency in the United States."^^
"An application to establish a U.S. office of a foreign bank shall not be approved unless the Board determines that the foreign bank engages directly in the business of banking outside of the United States and is subject to comprehensive supervision or regulation on a consolidated basis by the appropriate authorities in its home country, and that the foreign bank has furnished the information the Board needs to adequately assess the application."^^
"The Board shall take final action on any application under the relevant approval procedures not later than 180 days after receipt of the application, except that the period may be extended for an additional 180 days after providing notice of, and the reasons for, the extension to the applicant."^^
"Each banking institution shall establish an allocated transfer risk reserve (ATRR) for specified international assets when required by the Board in accordance with Regulation K."^^
"An eligible investor that does not qualify under the general consent procedure for an investment in an export trading company shall give the Board 60 days’ prior written notice of the proposed investment."^^
"A proposed investment in an export trading company may be made before the expiration of the 60‑day notice period if the Board notifies the investor in writing of its intention not to disapprove the investment."

## Create Excel Sheet

In [3]:
def clean_dataframe(df, column):
    """
    Cleans the DataFrame by:
      - Dropping rows in which the specified column does not contain any alphabetical characters (A-Z, a-z).
      - Removing the substring "| " from the specified column.

    Parameters:
      df (pd.DataFrame): The input DataFrame.
      column (str): The name of the column to clean.

    Returns:
      pd.DataFrame: The cleaned DataFrame.
    """
    # Ensure the column values are treated as strings.
    df[column] = df[column].astype(str)

    # Keep only rows where the column contains at least one alphabetical character.
    df = df[df[column].str.contains(r'[A-Za-z]', na=False)]

    # Remove the exact substring "| " from the column.
    df[column] = df[column].str.replace("| ", "", regex=False)

    # Remove the exact substring "|" from the column.
    df[column] = df[column].str.replace("|", "", regex=False)

    # Remove the exact substring "^^" from the column.
    df[column] = df[column].str.replace("^^", "", regex=False)

    # Remove the exact substring "" " from the column.
    df[column] = df[column].str.replace('''"''', "", regex=False)
    return df

df = convert_str_to_df(response)

# Clean Table
df = df.dropna(axis=1, how='all')
df = clean_dataframe(df, df.columns[0])
df


Unnamed: 0,Business Requirement^^
0,Within three days of a decision to disapprove ...
1,An investment in an export trading company tha...
2,No foreign bank may establish a branch or an a...
3,"A foreign bank shall submit an application to,..."
4,A foreign bank must receive a license from the...
5,An application to establish a U.S. office of a...
6,The Board shall take final action on any appli...
7,Each banking institution shall establish an al...
8,An eligible investor that does not qualify und...
9,A proposed investment in an export trading com...


In [8]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# docs
docs =

generation = rag_chain.invoke({"context": docs, "question": question})

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])