**Table of contents**<a id='toc0_'></a>    
- [Load Document and Processing](#toc1_1_1_1_)    
        - [Try another chunking method to avoid too small chunks](#toc1_1_1_1_1_)    
      - [Load retrieval model](#toc1_1_1_2_)    
      - [Sample Agent](#toc1_1_1_3_)    
        - [Evaluation](#toc1_1_1_3_1_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

In [1]:
import os, sys

import pandas as pd
import numpy as np

from typing import Literal, TypedDict, List, Optional, Any

from pydantic import BaseModel, SecretStr

from datetime import datetime

from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import google.generativeai as genai #Use gg genai for gemini API
from langgraph.prebuilt import create_react_agent
from markitdown import MarkItDown

import mlflow

## RAG
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from docling.document_converter import DocumentConverter
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractOcrOptions # to the use of OCR in scanned pdfs
from langchain_docling.loader import DoclingLoader, ExportType
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
from langchain_chroma import Chroma
from langchain.schema import Document
from langgraph.prebuilt import create_react_agent
from langchain_community.vectorstores.utils import filter_complex_metadata
import json

from datetime import datetime

#for module import
import sys, os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)



In [5]:
class ModelConfig(BaseModel):
    openai_api_key: SecretStr
    googleai_api_key: SecretStr
    pinecone_api_key: SecretStr
    # openai_embedding_model: Literal['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'] #pick text-embedding-3-small for matching with pinecone
    googleai_embedding_model: Literal['gemini-embedding-001', 'gemini-embedding-002', 
                                      'text-embedding-005', 'text-multilingual-embedding-002', 'colbertv2o'] #pick gemini-embedding-001 or text-embedding-005 for matching with pinecone
    openai_chat_model: Literal['gpt-4o-mini', 'gpt-4o', 'gpt-4.1-mini', 'gpt-4.1', 'none'] #none for using gemini model
    # googleai_chat_model: Literal['gemini-2-turbo', 'gemini-2-pro', 'gemini-2.5-flash', 'gemini-2.5-pro'] #gemini models
    # embedding_as_judge: Literal['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'] 
    # embedding_as_judge: Literal['gemini-embedding-001', 'gemini-embedding-002', 
    #                                   'text-embedding-005', 'text-multilingual-embedding-002'] 
    # llm_as_judge: Literal['gpt-4o-mini', 'gpt-4o', 'gpt-4.1-mini', 'gpt-4.1']
    llm_as_judge: Literal['gemini-2-turbo', 'gemini-2-pro', 'gemini-2.5-flash', 'gemini-2.5-pro'] #gemini models
    
    corpus_type: Literal['gen_100', 'gen_hybrid']
    max_token: int = 1024
    use_ocr: bool = False
    chunker: Literal['hybrid', 'recursive', 'custom']
    mode: Literal['test', 'real'] #test for 1 sample dataset, real for full dataset
    model_setup: Literal['linear', 'agent']
    
config = ModelConfig.model_validate(
    dict(
        openai_api_key=os.environ.get("OPENAI_API_KEY"),
        googleai_api_key = os.environ.get("GOOGLE_API_KEY"),
        pinecone_api_key=os.environ.get("PINECONE_API_KEY"),
        # openai_embedding_model='text-embedding-3-small',
        googleai_embedding_model='colbertv2o',
        openai_chat_model='gpt-4o-mini', #none for using gemini model
        # googleai_chat_model='gemini-2.5-flash',
        # embedding_as_judge='gemini-embedding-001',
        llm_as_judge='gemini-2-turbo',
        corpus_type='gen_100',
        max_token=1024,
        use_ocr=False,
        chunker='hybrid',
        mode='test',
        model_setup='agent'
    )
)

In [6]:
import logging, warnings

# Suppress verbose logs
logging.getLogger("docling").setLevel(logging.WARNING)
logging.basicConfig(level=logging.ERROR)

# Suppress all warnings
warnings.filterwarnings("ignore")

In [7]:
run_name = f'{config.model_setup}_{config.corpus_type}_{datetime.now().strftime("%Y%m%d_%H%M%S")}'

In [8]:
#Setup MlFlow

import mlflow

experiment_name = "Legal_doc_Experiment"

# Create or get experiment
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    mlflow.create_experiment(experiment_name)

# Start a parent MLflow run
with mlflow.start_run(
    run_name=run_name,
    description="RAG Model Experiment with different configurations"
) as parent_run:
    
    run_id = parent_run.info.run_id  # Save the run id for future reference
    
    # Enable LangChain autologging
    mlflow.langchain.autolog()

    def config_to_dict(config: "ModelConfig") -> dict:
        """
        Convert a Pydantic ModelConfig to a flat dictionary suitable for MLflow logging.
        Unwraps SecretStr values automatically.
        """
        d = config.model_dump()
        for k, v in d.items():
            if isinstance(v, SecretStr):
                d[k] = v.get_secret_value()
        return d

    # Log all config parameters
    mlflow.log_params(config_to_dict(config))

#### <a id='toc1_1_1_1_'></a>[Load Document and Processing](#toc0_)

In [6]:
from shared_functions.global_functions import *
from shared_functions.gg_sheet import *

2025-10-01 10:30:26,252 - INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [7]:
list_files_recursive()

['luat_doanh_nghiep_2020.pdf',
 'luat_doanh_nghiep_2025.pdf',
 'luat_thue_tndn_2025.pdf',
 'luat_thue_ttdb_2025.pdf',
 'nghi_dinh_huongdan_VAT_2025.pdf',
 'nghi_quyet_giam_VAT_2024.pdf',
 'thong_tu_bai_bo_ve_thue_2024.pdf']

In [8]:
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
from transformers import AutoTokenizer

from docling_core.transforms.chunker.hierarchical_chunker import (
    ChunkingDocSerializer,
    ChunkingSerializerProvider,
)
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
from docling_core.transforms.serializer.markdown import MarkdownTableSerializer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from markitdown import MarkItDown
import tiktoken
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [9]:
documents = []

tokenizer = OpenAITokenizer(
    # tokenizer=tiktoken.encoding_for_model("gpt-4o"),
    tokenizer=tiktoken.get_encoding('cl100k_base'),
    max_tokens=1000,  # context window length required for OpenAI tokenizers
)

class MDTableSerializerProvider(ChunkingSerializerProvider):
    def get_serializer(self, doc):
        return ChunkingDocSerializer(
            doc=doc,
            table_serializer=MarkdownTableSerializer(),  # configuring a different table serializer
        )

sample_path = download_s3_to_temp('legaldocstorage/luat_thue_tndn_2025.pdf')
loader = DoclingLoader(
                file_path= sample_path,
                export_type=ExportType.DOC_CHUNKS,
                chunker=HybridChunker(preserve_formatting = True, 
                                      chunk_size = 1000,
                                      chunk_overlap = 100,
                                    #   tokenizer = tokenizer,
                                    #   serializer_provider=MDTableSerializerProvider(),
                )
            )

for chunk in loader.load(): #use lazy_load if file too big
    chunk.metadata.update({
        'source': sample_path.split('/')[-1],
        'file_type': 'pdf',
        'processing_method': 'docling',
        'chunker': 'hybrid',
        'file_size': len(chunk.page_content),
    })
    documents.append(chunk)

print(f"Successfully processed {sample_path.split('/')[-1]} - {len(documents)} chunks")


✅ Downloaded s3://legaldocstorage/luat_thue_tndn_2025.pdf to C:\Users\admin\AppData\Local\Temp\tmpu0m11oom.pdf


Token indices sequence length is longer than the specified maximum sequence length for this model (1193 > 512). Running this sequence through the model will result in indexing errors


Successfully processed C:\Users\admin\AppData\Local\Temp\tmpu0m11oom.pdf - 110 chunks


In [10]:
for doc in documents:
    print(doc.page_content)
    print('---'*20)

QUỐC HỘI
-------
Luật số: 67/2025/QH15
------------------------------------------------------------
CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM Độc lập -Tự do -Hạnh phúc
---------------
Hà Nội, ngày 14 tháng 6 năm 2025
------------------------------------------------------------
THUẾ THU NHẬP DOANH NGHIỆP
Căn cứ Hiến pháp nước Cộng hòa xã hội chủ nghĩa Việt Nam;
Quốc hội ban hành Luật Thuế thu nhập doanh nghiệp.
------------------------------------------------------------
Điều 1. Phạm vi điều chỉnh
Luật này quy định về người nộp thuế, thu nhập chịu thuế, thu nhập được miễn thuế, căn cứ tính thuế, phương pháp tính thuế và ưu đãi thuế thu nhập doanh nghiệp.
------------------------------------------------------------
Điều 2. Người nộp thuế
1. Người nộp thuế thu nhập doanh nghiệp là tổ chức hoạt động sản xuất, kinh doanh hàng hóa, dịch vụ có thu nhập chịu thuế theo quy định của Luật này (sau đây gọi là doanh nghiệp), bao gồm:
- a) Doanh nghiệp được thành lập theo quy định của pháp luật Việt Nam;
-

##### <a id='toc1_1_1_1_1_'></a>[Try another chunking method to avoid too small chunks](#toc0_)

In [11]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
mk = MarkItDown()

source_path = sample_path #temp path

#parse to text
markdown = mk.convert(source_path).markdown

#Convert to Document
doc_obj = Document(
    page_content = markdown,
    metadata = {'source': 'luat_thue_tndn_2025.pdf', 
                'file_type': 'pdf', 
                'processing_method': 'docling',
                'chunker': 'hybrid',
                'file_size': len(markdown)}
    )

#Require document parsing and Document Converter first
docs = splitter.split_documents([doc_obj])

count = 0
for doc in docs:
    count += 1

print(f"Successfully chunked the document {source_path.split('/')[-1]} to {count} chunks")

Successfully chunked the document C:\Users\admin\AppData\Local\Temp\tmpu0m11oom.pdf to 128 chunks


In [12]:
for doc in docs:
    print(doc.page_content)
    print('---'*20)

QUỐC HỘI
-------

CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM
Độc lập - Tự do - Hạnh phúc
---------------

Luật số: 67/2025/QH15

Hà Nội, ngày 14 tháng 6 năm 2025

LUẬT

THUẾ THU NHẬP DOANH NGHIỆP

Căn cứ Hiến pháp nước Cộng hòa xã hội chủ nghĩa Việt Nam;

Quốc hội ban hành Luật Thuế thu nhập doanh nghiệp.

Chương I

Điều 1. Phạm vi điều chỉnh

NHỮNG QUY ĐỊNH CHUNG
------------------------------------------------------------
Điều 1. Phạm vi điều chỉnh

NHỮNG QUY ĐỊNH CHUNG

Luật này quy định về người nộp thuế, thu nhập chịu thuế, thu nhập được miễn thuế, căn cứ tính
thuế, phương pháp tính thuế và ưu đãi thuế thu nhập doanh nghiệp.

Điều 2. Người nộp thuế

1. Người nộp thuế thu nhập doanh nghiệp là tổ chức hoạt động sản xuất, kinh doanh hàng hóa,
dịch vụ có thu nhập chịu thuế theo quy định của Luật này (sau đây gọi là doanh nghiệp), bao
gồm:

a) Doanh nghiệp được thành lập theo quy định của pháp luật Việt Nam;
------------------------------------------------------------
b) Doanh nghiệp được thàn

#### <a id='toc1_1_1_2_'></a>[Load retrieval model](#toc0_)

In [13]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
import faiss

MODEL_NAME = "colbert-ir/colbertv2.0"

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)

# Example encode
query = "climate change impact on Africa"
tokens = tokenizer(query, return_tensors="pt")

with torch.no_grad():
    outputs = model(**tokens)

print(outputs.last_hidden_state.shape)  # embeddings

2025-10-01 10:31:14,798 - INFO - Loading faiss with AVX2 support.
2025-10-01 10:31:14,836 - INFO - Successfully loaded faiss with AVX2 support.
2025-10-01 10:31:14,848 - INFO - Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes. This is only an error if you're trying to use GPU Faiss.


torch.Size([1, 7, 768])


In [14]:
def get_embedding(text: str):
    # tokenize
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    
    with torch.no_grad():
        emb = model(**tokens).last_hidden_state  # [1, seq_len, hidden_dim]
    
    # remove batch dim -> [seq_len, hidden_dim]
    emb = emb.squeeze(0)

    # normalize token embeddings
    emb = F.normalize(emb, p=2, dim=-1)

    # As Colbertv2o returns a 2-dim tensor of embedding, we need to pool it down to 1 dim array
    pooled = emb.mean(dim=0) 

    return pooled.numpy()

{'source': 'luat_thue_tndn_2025.pdf',
 'file_type': 'pdf',
 'processing_method': 'docling',
 'chunker': 'hybrid',
 'file_size': 47533}

In [16]:
### Creating FAISS index
all_embedding = []
all_documents = []
chunk_metadata = []

for doc in docs:
    all_embedding.append(get_embedding(doc.page_content))
    all_documents.append(doc)
    chunk_metadata.append({'file_name': doc.metadata['source'], 'file_type': doc.metadata['file_type'], 'chunker': doc.metadata['chunker']})
    
embeddings = np.array(all_embedding, dtype = 'float32')
dimension = embeddings.shape[1]

index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

In [17]:
@tool
def query_vectordb(query: str, top_k: int = 5):
    """
    Take all of the human input prompt to retrieve the top_k most similar document chunks.
    Parameters:
    query (str): The query string to search for similar documents.
    top_k (int): The number of top similar documents to retrieve. Default is 5.

    Returns:
    list: A list of dictionaries containing metadata and content for the top_k most similar chunks.
    """
    query_emb = np.array(get_embedding(query).astype('float32').reshape(1, -1))
    D, I = index.search(query_emb, top_k) 
    results = []
    for idx in I[0]:
        meta = chunk_metadata[idx]
        content = all_documents[idx].page_content
        results.append(
            {'content': content, 'file_name': meta['file_name']}
        )
    return results

#### <a id='toc1_1_1_3_'></a>[Sample Agent](#toc0_)

In [None]:
#If OpenAI API key is available

tools = [query_vectordb]

llm_temperature = 0
llm = ChatOpenAI(model=config.openai_chat_model, api_key=config.openai_api_key, temperature=llm_temperature)

prompt = """
You are a highly specialized Super Agent working for a legal agency. Your sole responsibility is to retrieve contexts that will help answer questions based strictly on verified documents retrieved from the vector database.

**Always use exact question from user to retrieve documents from vector database**

You are allowed to make multiple retrieval passes if necessary to ensure all relevant context has been surfaced. The relevant information may be spread across multiple chunks, and may require thoughtful synthesis or paraphrasing.

 Pay attention: Answers may not be in a single place — they can be fragmented across multiple sections. Be thorough in connecting these fragments, but always remain grounded in the retrieved context.

Question:
{question}
"""

pre_built_agent = create_react_agent(llm, tools=tools, prompt=prompt)

In [None]:
#If no API key, then use Chat model from HuggingFace

from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.chat_models import ChatHuggingFace

# Load tokenizer and model from Hugging Face
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Initialize ChatHuggingFace with the model object
llm = ChatHuggingFace(
    model=model,
    tokenizer=tokenizer,
    temperature=0,
)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [None]:
from langchain.prompts import PromptTemplate

prompt = """
You are a highly specialized Super Agent working for a legal agency. Your sole responsibility is to retrieve contexts that will help answer questions based strictly on verified documents retrieved from the vector database.

**Always use exact question from user to retrieve documents from vector database**

You are allowed to make multiple retrieval passes if necessary to ensure all relevant context has been surfaced. The relevant information may be spread across multiple chunks, and may require thoughtful synthesis or paraphrasing.

Pay attention: Answers may not be in a single place — they can be fragmented across multiple sections. Be thorough in connecting these fragments, but always remain grounded in the retrieved context.

Question:
{question}
"""

prompt_template = PromptTemplate(input_variables=["question"], template=prompt)

In [None]:
# Define your tools (e.g., a vector database retrieval tool)
tools = [Tool(name="query_vectordb", func=query_vectordb, description="Retrieve documents from the vector database.")]

# Create the ReAct agent
agent = create_react_agent(
    llm=llm,
    tools=tools,
    prompt=prompt_template
)

# Initialize the agent executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [19]:
from typing import Any
from pydantic import BaseModel, Field
from tqdm import tqdm

class RagResult(BaseModel):
    question: str = Field(..., description="The question that was asked")
    retrieved_context: list[str] | Any = Field(
        ..., description="The context chunks retrieved from the vector DB"
    )

def rag(question: str) -> RagResult:
    """
    Invokes the pre_built_agent with the given question and parses the retrieved context.
    """
    messages = [HumanMessage(content=question)]
    response = pre_built_agent.invoke({"messages": messages})

    retrieved_context = []

    if "messages" in response and response["messages"]:
        for message in response["messages"]:
            # Handle structured tool messages
            if hasattr(message, "type") and message.type == "tool":
                if getattr(message, "name", None) == "query_vectordb":
                    if getattr(message, "content", None):
                        retrieved_context.append(message.content)

            # Handle ToolMessage objects explicitly
            elif isinstance(message, ToolMessage):
                if getattr(message, "name", None) == "query_vectordb":
                    if getattr(message, "content", None):
                        retrieved_context.append(message.content)

    return RagResult(question=question, retrieved_context=retrieved_context)

##### <a id='toc1_1_1_3_1_'></a>[Evaluation](#toc0_)

In [20]:
df = pd.read_csv('D:/Study/Education/Projects/Group_Project/source/reference/qa_dataset_luat_thue_tndn_2025.csv')

In [21]:
df.head(1)

Unnamed: 0,doc_use_case,doc_source,user_input,reference,created_time,supporting_context,gen_method,annotation_method,question_type,question_category,cognitive,multi_clause,negative,key,question_id,doc_id,synthesizer_name
0,Corporate Tax Law 2025,Luật Thuế TNDN 2025,Doanh nghiệp nào phải nộp thuế thu nhập doanh ...,"Các tổ chức hoạt động sản xuất, kinh doanh hàn...",2025-10-01T03:02:01.122624,"Điều 2, Luật số 67/2025/QH15",manual_generation,ai_assisted,What,Factual,no,no,no,obs_1,q_1,luat_tndn_2025,manual


In [22]:
from ragas import SingleTurnSample
from ragas.testset import Testset
from tqdm import tqdm

def load_generated_testset() -> Testset:
    testset = Testset.from_pandas(df)
    print(f'📄 Loaded {len(testset.samples)} samples from original corpus')

    # In test mode, only take 2 samples for faster testing
    if config.mode == 'test':
        testset.samples = testset.samples[:2]
        print(f'⚠️ Test mode: Limited to {len(testset.samples)} samples for faster testing')

    # Process samples
    for test in tqdm(testset.samples):
        assert isinstance(test.eval_sample, SingleTurnSample)
        assert isinstance(test.eval_sample.user_input, str)
        result = rag(test.eval_sample.user_input)
        test.eval_sample.retrieved_contexts = result.retrieved_context

    return testset

In [23]:
from mlflow.data import from_pandas
from ragas import evaluate
from ragas.cache import DiskCacheBackend
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import (
    ContextPrecision,
    LLMContextRecall
)
from ragas.run_config import RunConfig

my_run_config = RunConfig(timeout=360)
metrics = [
    LLMContextRecall(),
    ContextPrecision()
]
if config.mode == 'test':
    metrics = metrics[:2]

In [24]:
from ragas.evaluation import EvaluationResult

def log_ragas_evaluation(
    ragas_result: EvaluationResult,
    corpus_type: str,
) -> None:
    df = ragas_result.to_pandas()

    for column in df.columns:
        if column not in ['user_input', 'retrieved_contexts', 'reference']:
            metric_values = df[column].dropna()
            if len(metric_values) > 0:
                mlflow.log_metric(f'{column}_mean', float(metric_values.mean()))

    mlflow.set_tag('evaluation_type', 'ragas')
    mlflow.set_tag('timestamp', datetime.now().isoformat())
    mlflow.log_metric('eval_dataset_size', len(df))

In [36]:
with mlflow.start_run(
    run_name=run_name,
    description=f'Draft Agent evaluation using {config.corpus_type} corpus',
    nested=True,  # allow nested run under parent
) as eval_run:
    run_id = eval_run.info.run_id

    # Log config
    for field_name, value in config.model_dump().items():
        if hasattr(value, 'get_secret_value'):  # skip secrets
            continue
        mlflow.log_param(field_name, value)

    mlflow.log_param('llm_temperature', llm_temperature)
    mlflow.log_param('system_prompt', prompt)

    mlflow.set_tag('agent_type', 'draft_agent')
    mlflow.set_tag('evaluation_framework', 'ragas')
    mlflow.set_tag('document_loader', 'markitdown')
    mlflow.set_tag('document_chunker', 'custom_chunk_text')

    # Load testset
    testset = load_generated_testset()
    eval_dataset = testset.to_evaluation_dataset()
    ds = from_pandas(eval_dataset.to_pandas(), name=f'{config.corpus_type}_testset')
    mlflow.log_input(ds, 'evaluation_dataset')

    # Run evaluation
    gen_result = evaluate(
        dataset=eval_dataset,
        metrics=metrics,
        run_config=my_run_config,
        batch_size=16,
    )
    log_ragas_evaluation(ragas_result=gen_result, corpus_type=config.corpus_type)

📄 Loaded 10 samples from original corpus
⚠️ Test mode: Limited to 2 samples for faster testing


  0%|          | 0/2 [00:00<?, ?it/s]2025-10-01 13:56:27,490 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-10-01 13:56:27,492 - INFO - Retrying request to /chat/completions in 0.482325 seconds
2025-10-01 13:56:28,598 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-10-01 13:56:28,600 - INFO - Retrying request to /chat/completions in 0.857716 seconds
2025-10-01 13:56:30,307 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
  0%|          | 0/2 [00:04<?, ?it/s]


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}