### imports

In [1]:
from pathlib import Path
from docling.document_converter import DocumentConverter
from langchain_core.documents import Document
from docling.chunking import HybridChunker
from transformers import AutoTokenizer

### Process all documents from a directory and save the resulted chunks to a list as langchain documents

In [2]:

def process_documents_to_langchain(documents_dir: str, max_tokens: int = 512):
    """Process multiple documents and return a list of LangChain Document objects.
    
    Docling automatically handles all supported file formats (.pdf, .md, .docx, .html, .txt, etc.)
    
    Args:
        documents_dir: Directory containing documents to process
        max_tokens: Maximum tokens per chunk
        
    Returns:
        List of LangChain Document objects with page_content and metadata
    """
    
    print("=" * 60)
    print("BATCH HYBRID CHUNKING - TO LANGCHAIN DOCUMENTS")
    print("=" * 60)
    
    # Get all files from directory (excluding directories)
    documents_path = Path(documents_dir)
    all_files = [f for f in documents_path.iterdir() if f.is_file()]
    all_files = sorted(all_files)  # Sort for consistent ordering
    
    if not all_files:
        print(f"\nâœ— No files found in {documents_dir}")
        return []
    
    print(f"\nFound {len(all_files)} documents to process")
    print(f"Max tokens per chunk: {max_tokens}\n")
    
    # Initialize tokenizer once (reuse for all documents)
    print("Initializing tokenizer...")
    model_id = "sentence-transformers/all-MiniLM-L6-v2"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    # Create chunker once (reuse for all documents)
    chunker = HybridChunker(
        tokenizer=tokenizer,
        max_tokens=max_tokens,
        merge_peers=True
    )
    
    langchain_documents = []
    total_chunks = 0
    successful_docs = 0
    failed_docs = []
    
    # Process each document
    for file_path in all_files:
        try:
            print(f"\nðŸ“„ Processing: {file_path.name}")
            
            # Convert document
            print("   Converting document...")
            converter = DocumentConverter()
            result = converter.convert(str(file_path))
            doc = result.document
            
            # Generate chunks
            print("   Generating chunks...")
            chunk_iter = chunker.chunk(dl_doc=doc)
            chunks = list(chunk_iter)
            
            print(f"   Creating {len(chunks)} LangChain Document objects...")
            
            # Convert each chunk to LangChain Document
            for i, chunk in enumerate(chunks):
                # Use contextualize to preserve headings and metadata
                contextualized_text = chunker.contextualize(chunk=chunk)
                
                # Create LangChain Document with metadata
                langchain_doc = Document(
                    page_content=contextualized_text,
                    metadata={
                        "source": str(file_path),
                        "source_name": file_path.name,
                        "chunk_index": total_chunks + i,
                        "document_chunk_index": i,
                        "total_chunks_in_document": len(chunks)
                    }
                )
                
                langchain_documents.append(langchain_doc)
            
            total_chunks += len(chunks)
            successful_docs += 1
            print(f"   âœ“ Success! Total chunks so far: {total_chunks}")
            
        except Exception as e:
            print(f"   âœ— Error processing {file_path.name}: {e}")
            failed_docs.append(file_path.name)
    
    # Final summary
    print("\n" + "=" * 60)
    print("PROCESSING COMPLETE")
    print("=" * 60)
    print(f"âœ“ Successfully processed: {successful_docs}/{len(all_files)} documents")
    print(f"âœ“ Total LangChain Documents created: {len(langchain_documents)}")
    
    if failed_docs:
        print(f"\nâœ— Failed documents ({len(failed_docs)}):")
        for doc in failed_docs:
            print(f"   - {doc}")
    
    print("\n" + "=" * 60)
    print("LANGCHAIN DOCUMENTS READY")
    print("=" * 60)
    print("âœ“ Each chunk is a LangChain Document object")
    print("âœ“ page_content: Contextualized chunk text with headings")
    print("âœ“ metadata: source, source_name, chunk_index, etc.")
    print("âœ“ Ready for vector store ingestion (Chroma, FAISS, Pinecone, etc.)")
    
    return langchain_documents

### usage

In [3]:
raw_documents_dir = "../documents/raw"
all_chunks = process_documents_to_langchain(documents_dir=raw_documents_dir)

BATCH HYBRID CHUNKING - TO LANGCHAIN DOCUMENTS

Found 9 documents to process
Max tokens per chunk: 512

Initializing tokenizer...


2025-11-01 11:44:00,542 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]



ðŸ“„ Processing: client-review-globalfinance.pdf
   Converting document...


2025-11-01 11:44:03,294 - INFO - Going to convert document batch...
2025-11-01 11:44:03,297 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 4f2edc0f7d9bb60b38ebfecf9a2609f5
2025-11-01 11:44:03,322 - INFO - Loading plugin 'docling_defaults'
2025-11-01 11:44:03,326 - INFO - Registered picture descriptions: ['vlm', 'api']
2025-11-01 11:44:03,388 - INFO - Loading plugin 'docling_defaults'
2025-11-01 11:44:03,402 - INFO - Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2025-11-01 11:44:03,586 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-11-01 11:44:03,587 - INFO - easyocr cannot be used because it is not installed.
2025-11-01 11:44:04,471 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-11-01 11:44:04,505 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:44:04,576 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineerin

   Generating chunks...
   Creating 24 LangChain Document objects...
   âœ“ Success! Total chunks so far: 24

ðŸ“„ Processing: company-overview.md
   Converting document...


2025-11-01 11:45:11,917 - INFO - Finished converting document company-overview.md in 0.36 sec.
2025-11-01 11:45:11,979 - INFO - detected formats: [<InputFormat.MD: 'md'>]
2025-11-01 11:45:11,980 - INFO - Going to convert document batch...
2025-11-01 11:45:11,981 - INFO - Initializing pipeline for SimplePipeline with options hash 995a146ad601044538e6a923bea22f4e
2025-11-01 11:45:11,982 - INFO - Processing document implementation-playbook.md


   Generating chunks...
   Creating 8 LangChain Document objects...
   âœ“ Success! Total chunks so far: 32

ðŸ“„ Processing: implementation-playbook.md
   Converting document...


2025-11-01 11:45:12,938 - INFO - Finished converting document implementation-playbook.md in 0.97 sec.
2025-11-01 11:45:13,115 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-11-01 11:45:13,119 - INFO - Going to convert document batch...
2025-11-01 11:45:13,119 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 4f2edc0f7d9bb60b38ebfecf9a2609f5
2025-11-01 11:45:13,121 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-11-01 11:45:13,123 - INFO - easyocr cannot be used because it is not installed.
2025-11-01 11:45:13,124 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-11-01 11:45:13,142 [RapidOCR] base.py:22: Using engine_name: torch[0m


   Generating chunks...
   Creating 27 LangChain Document objects...
   âœ“ Success! Total chunks so far: 59

ðŸ“„ Processing: meeting-notes-2025-01-08.docx
   Converting document...


[32m[INFO] 2025-11-01 11:45:13,181 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-11-01 11:45:13,182 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-11-01 11:45:13,413 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:45:13,416 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-11-01 11:45:13,417 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0

   Generating chunks...
   Creating 23 LangChain Document objects...
   âœ“ Success! Total chunks so far: 82

ðŸ“„ Processing: meeting-notes-2025-01-15.docx
   Converting document...


[32m[INFO] 2025-11-01 11:45:51,584 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:45:51,588 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-11-01 11:45:51,589 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-11-01 11:45:51,697 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:45:51,772 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-11-01 11:45:51,773 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langcha

   Generating chunks...
   Creating 23 LangChain Document objects...
   âœ“ Success! Total chunks so far: 105

ðŸ“„ Processing: mission-and-goals.md
   Converting document...


2025-11-01 11:46:17,989 - INFO - Finished converting document mission-and-goals.md in 0.82 sec.
2025-11-01 11:46:18,154 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-11-01 11:46:18,158 - INFO - Going to convert document batch...
2025-11-01 11:46:18,159 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 4f2edc0f7d9bb60b38ebfecf9a2609f5
2025-11-01 11:46:18,160 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-11-01 11:46:18,162 - INFO - easyocr cannot be used because it is not installed.
2025-11-01 11:46:18,164 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-11-01 11:46:18,190 [RapidOCR] base.py:22: Using engine_name: torch[0m


   Generating chunks...
   Creating 16 LangChain Document objects...
   âœ“ Success! Total chunks so far: 121

ðŸ“„ Processing: q4-2024-business-review.pdf
   Converting document...


[32m[INFO] 2025-11-01 11:46:18,237 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:18,238 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:18,545 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:46:18,550 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:18,551 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0

   Generating chunks...
   Creating 25 LangChain Document objects...
   âœ“ Success! Total chunks so far: 146

ðŸ“„ Processing: team-handbook.md
   Converting document...


2025-11-01 11:46:47,803 - INFO - Finished converting document team-handbook.md in 0.73 sec.
2025-11-01 11:46:47,929 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-11-01 11:46:47,932 - INFO - Going to convert document batch...
2025-11-01 11:46:47,933 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 4f2edc0f7d9bb60b38ebfecf9a2609f5
2025-11-01 11:46:47,935 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-11-01 11:46:47,936 - INFO - easyocr cannot be used because it is not installed.
2025-11-01 11:46:47,937 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-11-01 11:46:47,959 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:46:48,000 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:48,001 [RapidOCR] torch.py:54: Using D

   Generating chunks...
   Creating 21 LangChain Document objects...
   âœ“ Success! Total chunks so far: 167

ðŸ“„ Processing: technical-architecture-guide.pdf
   Converting document...


[32m[INFO] 2025-11-01 11:46:48,472 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:46:48,475 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:48,476 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:48,576 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-11-01 11:46:48,666 [RapidOCR] download_file.py:60: File exists and is valid: D:\Documents\Learning\Software_Engineering\RAG\langchain_docling_postgres\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-11-01 11:46:48,667 [RapidOCR] torch.py:54: Using D:\Documents\Learning\Software_Engineering\RAG\langcha

   Generating chunks...
   Creating 23 LangChain Document objects...
   âœ“ Success! Total chunks so far: 190

PROCESSING COMPLETE
âœ“ Successfully processed: 9/9 documents
âœ“ Total LangChain Documents created: 190

LANGCHAIN DOCUMENTS READY
âœ“ Each chunk is a LangChain Document object
âœ“ page_content: Contextualized chunk text with headings
âœ“ metadata: source, source_name, chunk_index, etc.
âœ“ Ready for vector store ingestion (Chroma, FAISS, Pinecone, etc.)


### Vector storage -> Postgres/pgvector

In [24]:
# import basics
import os
from dotenv import load_dotenv

from langchain_postgres import PGVector
from langchain_openai import OpenAIEmbeddings

# Load environment variables from .env file
load_dotenv()

# initiate embeddings model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Connection string
CONNECTION_STRING = "postgresql+psycopg://raguser:ragpass@localhost:5555/ragdb"

# Initialize vector store
vectorstore = PGVector(
    connection=CONNECTION_STRING,
    embeddings=embeddings,
    collection_name="my_documents",  # table name
    use_jsonb=True,
)

# Add documents
vectorstore.add_documents(all_chunks)

2025-11-01 12:36:13,804 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


['f886f464-c690-48a9-986a-2851f7c68b36',
 'a1fff7a1-3dd9-4182-918f-3661de49a538',
 'de8b9dc8-514b-481a-8914-80b4a140d666',
 '59d2667a-a581-4024-aaf9-a317961a889b',
 'e1aef5f8-9e42-4a4c-8aa0-efc52da148ce',
 '5bd5090f-79d5-42ea-817c-463dc66437f0',
 '64e8f544-fb1b-4be9-adfd-d0c0f547c49f',
 '5f90edc4-21b4-423b-bae2-21ad8459ba4b',
 'f7295db2-a334-4a0d-b7f9-ac9427a3ae47',
 '6725805f-5f16-4cd2-b07e-7da44b89c9db',
 'dd79354e-77e1-4600-b8d2-826dd89ac297',
 'd90f574d-be22-4ba3-b590-0d9b6090af82',
 '3fa6d380-21d9-40ab-a812-a0e3b334ba42',
 'ce6010ec-e33b-4e0b-8e0e-c4eb7eb3cd5b',
 '90f7a6f3-83d2-4320-916d-c50c40e8fc8a',
 '4f488732-c840-49f3-bfea-9a72ea760885',
 '44a15954-81d5-4f49-aae6-4c16a6542041',
 '2fdbb43c-41f0-4eab-b7ff-2ff38a9de1eb',
 'd6c0beec-f415-4cdc-9432-cfb88e3d412c',
 'a8a40405-58d4-4cc9-a352-bae5c7da2217',
 'd08e5b19-1785-4148-b500-bc4e80af0e4e',
 '274114e3-74fe-40a4-bac9-81f2722f3502',
 'd0cc6c2f-a524-44cc-aff1-6209a27cd1e3',
 'cbf2f045-af28-4a80-a443-1d97c42c1446',
 '7c68e686-c53d-

### Querying it

In [29]:
query = "What is the Q1 2025 revenue target?"

# Query
results = vectorstore.similarity_search(query, k=5)

print("Retrieved Document:")
for doc in results:
    print("=" * 60)
    print(f"* {doc.page_content} [{doc.metadata}]")

2025-11-01 12:44:13,473 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Retrieved Document:
* Financial Targets Q1 2025
Metric, 1 = Q4 2024 Actual. Metric, 2 = Q1 2025 Target. Metric, 3 = Growth. Revenue, 1 = $2.8M. Revenue, 2 = $3.4M. Revenue, 3 = +21%. New Clients, 1 = 14. New Clients, 2 = 16. New Clients, 3 = +14%. MRR, 1 = $890K. MRR, 2 = $1.15M. MRR, 3 = +29%. Gross Margin, 1 = 68%. Gross Margin, 2 = 70%. Gross Margin, 3 = +2pp [{'source': '..\\documents\\raw\\q4-2024-business-review.pdf', 'chunk_index': 144, 'source_name': 'q4-2024-business-review.pdf', 'document_chunk_index': 23, 'total_chunks_in_document': 25}]
* Q1 Revenue Targets
Product, 1 = Target New Customers. Product, 2 = Target MRR Growth. DocFlow AI, 1 = 12. DocFlow AI, 2 = $180K â†’$270K. ConversePro, 1 = 15. ConversePro, 2 = $0 â†’$225K. Custom Solutions, 1 = 8. Custom Solutions, 2 = $710K â†’$855K. Total, 1 = 35. Total, 2 = $890K â†’$1.35M [{'source': '..\\documents\\raw\\meeting-notes-2025-01-08.docx', 'chunk_index': 78, 'source_name': 'meeting-notes-2025-01-08.docx', 'document_chunk_i

### Vector store -> Supabase

In [4]:
# import basics
import os
from dotenv import load_dotenv

# import langchain
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_openai import OpenAIEmbeddings

# import supabase
from supabase.client import Client, create_client

# load environment variables
load_dotenv()  

# initiate supabase db
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)

# initiate embeddings model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# store chunks in vector store
vector_store = SupabaseVectorStore.from_documents(
    all_chunks,
    embeddings,
    client=supabase,
    table_name="documents",
    query_name="match_documents",
    chunk_size=1000,
)

2025-10-31 00:54:42,939 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-10-31 00:54:46,249 - INFO - HTTP Request: POST https://lrnjqowxzqyuqwdejzyk.supabase.co/rest/v1/documents?columns=%22metadata%22%2C%22id%22%2C%22embedding%22%2C%22content%22 "HTTP/2 201 Created"


### Querying supabase

In [41]:
import os
from supabase import Client, create_client
from langchain_openai import OpenAIEmbeddings

# initiate embeddings model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# initiate supabase db
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)

def query_vector_store(query: str, top_k: int = 5):
    """Query the Supabase vector store and return top_k similar documents.
    
    Args:
        query: The input query string
        top_k: Number of top similar documents to retrieve
    """
    # 1. Embed the query
    query_embedding = embeddings.embed_query(query)

    # 2. Query the Supabase vector store
    resp = (
        supabase.rpc(
            "match_documents",
            {
                "query_embedding": query_embedding,
                "match_count": top_k,
                "match_threshold": 0.0,
                "filter": {}  # optional jsonb filter
            }
        )
        .execute()
    )

    # if resp.raise_when_api_error():
    #     raise Exception(resp.raise_when_api_error())

    matches = resp.data  # list of rows returned by the function
    for m in matches:
        # similarity is included in the returned row (see function)
        print("=" * 60)
        print(f"Document ID: {m['id']}\nSimilarity: {m['similarity']}\nMetadata: {m['metadata']}\nContent: {m['content']}")

In [42]:
# query = "What is the Q1 2025 revenue target?"
# query = "When was NeuralFlow AI founded"
query = "What ROI did GlobalFinance achieve?"

query_vector_store(query)

2025-10-31 14:18:05,033 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-10-31 14:18:05,732 - INFO - HTTP Request: POST https://lrnjqowxzqyuqwdejzyk.supabase.co/rest/v1/rpc/match_documents "HTTP/2 200 OK"


Document ID: 194e9e5e-3da1-4a77-97ac-0d0cb5c7b872
Similarity: 0.615534534847961
Metadata: {'source': '..\\documents\\raw\\client-review-globalfinance.pdf', 'chunk_index': 1, 'source_name': 'client-review-globalfinance.pdf', 'document_chunk_index': 1, 'total_chunks_in_document': 24}
Content: Execuve Summary
GlobalFinance Corp has achieved exceponal results in the first six months of our partnership. The DocFlow AI implementaon has exceeded all success metrics, delivering  $2.4M in annualized cost savings  and  94% reducon in loan applicaon processing me . Customer sasfacon scores have improved 37%, and the team has eliminated their applicaon backlog enrely.
Document ID: 33bfd9c6-e422-4016-9de7-383b14cf65ea
Similarity: 0.494471282425969
Metadata: {'source': '..\\documents\\raw\\q4-2024-business-review.pdf', 'chunk_index': 131, 'source_name': 'q4-2024-business-review.pdf', 'document_chunk_index': 10, 'total_chunks_in_document': 25}
Content: Case Study: GlobalFinance Corp
Challenge: Manual