In [1]:
import time

from IPython.display import Markdown, display  # Import
from openai import OpenAI
import warnings
import tiktoken
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os
from dotenv import load_dotenv
from src.embeddings import *
from src.data_processing import *

warnings.filterwarnings("ignore")
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")


## Load Index

In [2]:
pc = Pinecone(pinecone_api_key)
index = pc.Index("legal-assistant-rag")
index.describe_index_stats()

{'dimension': 3072,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 13660}},
 'total_vector_count': 13660}

## Query

In [25]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
query = "What are the main business requirements for Regulation O?"
xq = client.embeddings.create(input=query, model="text-embedding-3-large").data[0].embedding
res = index.query(vector=xq, top_k=20, include_metadata=True)
res

{'matches': [{'id': '10661',
              'metadata': {'content': 'Each executive officer or director of a '
                                      'member bank the shares of \n'
                                      'which are not publicly traded shall '
                                      'report annually to the board of \n'
                                      'directors of the member bank the '
                                      'outstanding amount of any credit that \n'
                                      'was extended to the executive officer '
                                      'or director and that is secured by \n'
                                      'shares of the member bank.\n'
                                      '\n'
                                      '[Reg. O, 59 FR 8837, Feb. 24, 1994. '
                                      'Redesignated at 71 FR 71474, Dec. \n'
                                      '11, 2006]\n'
                                      '

In [None]:
response_context = ''
for r in res.matches:
    r["matches"]["metadata"]

In [12]:
res.matches[0]

{'id': '10661',
 'metadata': {'content': 'Each executive officer or director of a member bank '
                         'the shares of \n'
                         'which are not publicly traded shall report annually '
                         'to the board of \n'
                         'directors of the member bank the outstanding amount '
                         'of any credit that \n'
                         'was extended to the executive officer or director '
                         'and that is secured by \n'
                         'shares of the member bank.\n'
                         '\n'
                         '[Reg. O, 59 FR 8837, Feb. 24, 1994. Redesignated at '
                         '71 FR 71474, Dec. \n'
                         '11, 2006]\n'
                         '\n'
                         '\n'
                         '<R05>\n'
                         'Sec.  215.11  Civil penalties.\n'
                         '\n'
                         '    Any me

In [26]:
c = sorted(res.matches, key=lambda chunk: int(chunk['id']))
print(len(c))

20


In [27]:
for i in c:
    print(i["id"], i["score"])

2167 0.55118847
2300 0.5293496
2357 0.5204428
2799 0.5463193
2800 0.52279943
4089 0.5283464
5199 0.5328881
7591 0.52971715
8801 0.53396064
8802 0.55762064
10650 0.55030704
10651 0.59523314
10652 0.5306656
10659 0.5417333
10660 0.5926206
10661 0.5996702
12156 0.5214681
12704 0.5289556
13165 0.5371414
13166 0.5424339


In [4]:
chunk_list = []
for r in res.matches:
    chunk_list.append(r["metadata"]["prechunk"])
    chunk_list.append(r["metadata"]["content"])
    chunk_list.append(r["metadata"]["postchunk"])
unique_chunks = list(set(chunk_list))
print("Total Number of Chunks: ", len(chunk_list))
question_context = " ".join(unique_chunks)
print("Character Length: ", len(question_context))
print("Token Size: ", count_tokens_gpt4(question_context))



Total Number of Chunks:  120
Character Length:  438527
Token Size:  97279


In [None]:
developer_prompt = '''
# **Refined Prompt: Regulatory Requirement Extraction & Formatting**

## **Instructions**
Your task is to extract and format **explicit business requirements** from the given regulatory text to answer the user questions.

---

## **Regulatory Text**
{}

## **Output Format**
Provide the extracted requirements in a **CSV-parsable table** with the following structure:

| **Business Requirement** |
|----------------------------------------------------|
| [Extracted requirement 1] |
| [Extracted requirement 2] |
| ... |

- Each row must contain a **single** regulatory requirement.
- Maintain **verbatim wording** from the regulation—**do not paraphrase or summarize**.

---

## **Extraction Guidelines**
1. **Comprehensive Coverage**: Extract **all** explicit business requirements present in the regulation.
2. **Accuracy**: Use the **exact** wording from the regulatory text.
3. **Structural Integrity**: Ensure extracted text is **clearly formatted** and **CSV-compatible**.
4. **Thorough Review**: Carefully analyze the entire regulatory text to avoid omissions.

---

## **Additional Notes**
- If a requirement is ambiguous, **extract the most explicit portion** while keeping it verbatim.
- Exclude any **non-requirement** statements (e.g., general guidance, definitions, or contextual explanations).

'''.format(question_context)

In [72]:
from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": query}
  ]
)
display(Markdown(completion.choices[0].message.content))



| **Business Requirement** |
|----------------------------------------------------|
| No member bank may extend credit to any insider of the bank or insider of its affiliates unless the extension of credit is made on substantially the same terms as, and following credit underwriting procedures that are not less stringent than, those prevailing at the time for comparable transactions by the bank with other persons that are not covered by this part and who are not employed by the bank. |
| A member bank may not extend credit to any insider of the bank or insider of its affiliates in an amount that, when aggregated with the amount of all other extensions of credit to that person and all related interests of that person, exceeds the higher of $25,000 or 5 percent of the member bank's unimpaired capital and unimpaired surplus, unless the extension of credit has been approved in advance by a majority of the entire board of directors of that bank and the interested party has abstained from participating directly or indirectly in the voting. |
| In no event may a member bank extend credit to any insider of the bank or insider of its affiliates in an amount that, when aggregated with all other extensions of credit to that person, and all related interests of that person, exceeds $500,000, except by complying with the requirements for board approval. |
| No member bank may extend credit to any insider of the bank or insider of its affiliates in an amount that, when aggregated with the amount of all other extensions of credit by the member bank to that person and to all related interests of that person, exceeds the lending limit of the member bank. |
| A member bank may not extend credit to any insider of the bank or insider of its affiliates unless the extension of credit is in an amount that, when aggregated with the amount of all outstanding extensions of credit by that bank to all such insiders, does not exceed the bank's unimpaired capital and unimpaired surplus. |
| The prohibition on paying overdrafts applies to an account of an executive officer or director of the bank or executive officer or director of its affiliates unless the payment of funds is made in accordance with a written, preauthorized, interest-bearing extension of credit plan that specifies a method of repayment or a written, preauthorized transfer of funds from another account of the account holder at the bank. |
| No member bank may pay an overdraft of an executive officer or director of the bank or executive officer or director of its affiliates on an account at the bank, unless the payment of funds is made in accordance with a written, preauthorized, interest-bearing extension of credit plan that specifies a method of repayment or a written, preauthorized transfer of funds from another account of the account holder at the bank. |
| No executive officer, director, or principal shareholder of a member bank or any of its affiliates shall knowingly receive from a member bank, directly or indirectly, any extension of credit not authorized under this section. |
| Any extension of credit that was outstanding on March 10, 1979, and that would violate the lending limits established under Regulation O if made on or after March 10, 1979, shall be reduced in amount by March 10, 1980, to comply with the lending limit. |
| Each member bank shall maintain records necessary for compliance with the requirements of this part. |
| Executive officers or directors of member banks with shares not publicly traded shall report annually to the board of directors of the member bank the outstanding amount of any credit that was extended to them and that is secured by shares of the member bank. |
| Any member bank, or any officer, director, employee, agent, or other person participating in the conduct of the affairs of the bank, that violates any provision of this part (other than the disclosure requirements) is subject to civil penalties as specified in section 29 of the Federal Reserve Act. |