In [None]:
# pip install -r requirements.txt

In [1]:
from docling.document_converter import DocumentConverter
# import cv2

source = "https://arxiv.org/pdf/2408.09869"  # file path or URL
converter = DocumentConverter()
doc = converter.convert(source).document
print(doc.export_to_markdown())  # output: "### Docling Technical Report[...]"

  from .autonotebook import tqdm as notebook_tqdm
2025-12-15 07:36:53,977 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 07:36:53,996 - INFO - Going to convert document batch...
2025-12-15 07:36:53,997 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 07:36:54,008 - INFO - Loading plugin 'docling_defaults'
2025-12-15 07:36:54,010 - INFO - Registered picture descriptions: ['vlm', 'api']
2025-12-15 07:36:54,026 - INFO - Loading plugin 'docling_defaults'
2025-12-15 07:36:54,029 - INFO - Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2025-12-15 07:36:54,030 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 07:36:54,030 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 07:36:54,215 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 07:36:54,235 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[I

<!-- image -->

## Docling Technical Report

## Version 1.0

Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar

AI4K Group, IBM Research R¬® uschlikon, Switzerland

## Abstract

This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models.

## 1 Introduction

Converting PDF documents back into a machine-processable format has been a major challenge for decades due to their huge var

In [3]:
%%writefile system_resources.py
import psutil
import torch

def check_system_resources():
    cpu_count = psutil.cpu_count(logical=False)
    cpu_percent = psutil.cpu_percent(interval=0.2)

    gpu_available = torch.cuda.is_available()
    gpu_mem_free = None

    if gpu_available:
        free, total = torch.cuda.mem_get_info()
        gpu_mem_free = free // (1024 ** 2)  # MB

    return {
        "cpu_count": cpu_count,
        "cpu_percent": cpu_percent,
        "gpu_available": gpu_available,
        "gpu_mem_free_mb": gpu_mem_free
    }


Writing system_resources.py


In [4]:
%%writefile worker.py
from docling.document_converter import DocumentConverter

def convert_to_markdown(source: str, use_gpu: bool) -> str | None:
    try:
        converter = DocumentConverter()
        doc = converter.convert(source).document
        return doc.export_to_markdown()
    except Exception as e:
        return f"[ERROR] {source}: {e}"


Writing worker.py


In [None]:
%%writefile parallel_exec.py
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from system_resources import check_system_resources
from worker import convert_to_markdown

BASE_OUTPUT_DIR = "output/docling"

def create_storage():
    uid = datetime.now().strftime("%Y%m%d_%H%M%S")
    run_dir = os.path.join(BASE_OUTPUT_DIR, uid)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir, uid

def init_storage(run_dir: str, sources: list[str]):
    output_path = os.path.join(run_dir, "results.json")
    data = {
        "meta": {
            "total": len(sources),
            "start_time": datetime.now().isoformat()
        },
        "documents": []
    }
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    return output_path

def append_result(output_json: str, record: dict):
    with open(output_json, "r+", encoding="utf-8") as f:
        data = json.load(f)
        data["documents"].append(record)
        f.seek(0)
        json.dump(data, f, ensure_ascii=False, indent=2)
        f.truncate()

def convert_list(sources: list[str], batch_size: int = 4):

    run_dir, uid = create_storage()
    output_json = init_storage(run_dir, sources)

    print(f"Output folder: {run_dir}")

    resources = check_system_resources()
    cpu_count = resources["cpu_count"]
    gpu_available = resources["gpu_available"]

    max_workers = min(cpu_count, batch_size)

    print(f"Starting conversion with {max_workers} workers")
    print(f"GPU available: {gpu_available}")

    results = []

    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(convert_to_markdown, src, gpu_available): src
            for src in sources
        }

        for idx, future in enumerate(as_completed(futures), 1):
            src = futures[future]
            try:
                content = future.result()
                record = {
                    "source": src,
                    "status": "success",
                    "content": content
                }
                results.append(result)
                print(f"[{idx}/{len(sources)}] Done: {src}")
            except Exception as e:
                record = {
                    "source": src,
                    "status": "error",
                    "error": str(e)
                }
                print(f"Error processing {src}: {e}")
            
            append_result(output_json, record)

            # resource check m·ªói 3 file
            if idx % 3 == 0:
                res = check_system_resources()
                if res["cpu_percent"] > 90:
                    print("‚ö† CPU HIGH USAGE detected")

                if res["gpu_available"] and res["gpu_mem_free_mb"] < 2048:
                    print("‚ö† GPU memory low ‚Üí future jobs still on CPU")

    return results


Overwriting main.py


In [6]:
# import os
import psutil
import concurrent.futures
import torch
import time
from docling.document_converter import DocumentConverter

def check_system_resources():
    """Ki·ªÉm tra t√†i nguy√™n h·ªá th·ªëng, ƒë·∫∑c bi·ªát l√† CPU v√† GPU."""
    # Ki·ªÉm tra CPU
    cpu_count = psutil.cpu_count(logical=False)  # s·ªë l√µi CPU th·ª±c
    cpu_percent = psutil.cpu_percent(interval=1)  # T·ªâ l·ªá s·ª≠ d·ª•ng CPU trong 1 gi√¢y
    
    # Ki·ªÉm tra GPU (n·∫øu c√≥)
    gpu_available = torch.cuda.is_available()
    gpu_count = torch.cuda.device_count() if gpu_available else 0
    gpu_name = torch.cuda.get_device_name(0) if gpu_available else None
    gpu_memory = torch.cuda.memory_allocated(0) if gpu_available else 0
    
    print(f"CPU: {cpu_count} cores, {cpu_percent}% used")
    if gpu_available:
        print(f"GPU: {gpu_name}, {gpu_count} devices, {gpu_memory // (1024 * 1024)} MB memory used")
    else:
        print("No GPU available")
    
    return cpu_count, cpu_percent, gpu_available, gpu_memory

def convert_to_markdown(source:str,gpu_available: bool):
    """Chuy·ªÉn ƒë·ªïi t·ª´ng t√†i li·ªáu th√†nh markdown"""
    try:
        converter = DocumentConverter()
        if gpu_available:
            print(f"Processing {source} on GPU...")
        else:
            print(f"Processing {source} on CPU...")
        doc = converter.convert(source).document
        return doc.export_to_markdown()
    except Exception as e:
        print(f"Error converting {source}: {str(e)}")
        return None

def convertList(sources:list[str]):
    """H√†m ch√≠nh ƒë·ªÉ x·ª≠ l√Ω danh s√°ch c√°c t√†i li·ªáu song song"""
    cpu_count, cpu_percent, gpu_available, gpu_memory = check_system_resources() # Ki·ªÉm tra t√†i nguy√™n h·ªá th·ªëng
    max_workers = cpu_count # S·ª≠ d·ª•ng t·ªëi ƒëa s·ªë worker b·∫±ng s·ªë l√µi CPU c√≥ s·∫µn (may be change into gpu_count, however i use cpu at that time)
    
    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        for i, result in enumerate(executor.map(lambda source: convert_to_markdown(source, gpu_available), sources)):
            results.append(result)
            
            
            print(f"Completed processing document {i + 1}") # Ki·ªÉm tra l·∫°i t√†i nguy√™n sau m·ªói l·∫ßn x·ª≠ l√Ω
            cpu_count, cpu_percent, gpu_available, gpu_memory = check_system_resources()

            
            if cpu_percent > 90: # ƒêi·ªÅu ch·ªânh s·ªë worker n·∫øu CPU ho·∫∑c GPU memory b·ªã qu√° t·∫£i
                print("CPU usage is high, reducing number of workers.")
                max_workers = max(1, cpu_count // 2)  # Gi·∫£m worker
                executor._max_workers = max_workers  # C·∫≠p nh·∫≠t worker
            elif gpu_available and gpu_memory < 2 * 1024 * 1024 * 1024:  # Ki·ªÉm tra n·∫øu b·ªô nh·ªõ GPU d∆∞·ªõi 2GB
                print("GPU memory is low, switching to CPU.")
                gpu_available = False  # Chuy·ªÉn qua s·ª≠ d·ª•ng CPU

            time.sleep(1) # ƒê·ªÉ tr√°nh ki·ªÉm tra qu√° nhanh, th√™m m·ªôt kho·∫£ng d·ª´ng nh·ªè

    return results

In [None]:
# V√≠ d·ª• s·ª≠ d·ª•ng
sources = [
    "https://arxiv.org/pdf/2408.09869",  # Paper 1
    "https://arxiv.org/pdf/2311.04155",  # Paper 2
    "https://arxiv.org/pdf/1706.03762",  # Paper 3
    "https://arxiv.org/pdf/2302.09664",  # Paper 4
    "https://arxiv.org/pdf/2003.12771",  # Paper 5
    "https://arxiv.org/pdf/1910.02707",  # Paper 6
    "https://arxiv.org/pdf/1705.04510",  # Paper 7
    "https://arxiv.org/pdf/1506.01497",  # Paper 8
    "https://arxiv.org/pdf/1802.05365",  # Paper 9
    "https://arxiv.org/pdf/2004.09602",  # Paper 10
    "https://arxiv.org/pdf/1602.02235",  # Paper 11
    "https://arxiv.org/pdf/1801.06538",  # Paper 12
    "https://arxiv.org/pdf/1611.09060",  # Paper 13
    "https://arxiv.org/pdf/1912.05483",  # Paper 14
    "https://arxiv.org/pdf/1709.08624",  # Paper 15
    "https://arxiv.org/pdf/1810.04805",  # Paper 16
    "https://arxiv.org/pdf/1502.03167",  # Paper 17
    "https://arxiv.org/pdf/1807.02547",  # Paper 18
    "https://arxiv.org/pdf/2103.00634",   # Paper 19
    "https://arxiv.org/pdf/2103.00634"
]


# CPU times: user 294 ms, sys: 198 ms, total: 492 ms
# Wall time: 13min 30s

In [None]:
%%time
from parallel_exec import convert_list

converted_docs = convert_list(sources)

# In k·∫øt qu·∫£ markdown c·ªßa t·ª´ng t√†i li·ªáu
for i, doc in enumerate(converted_docs):
    if doc:
        print(f"Document {i + 1}:")
        print(doc)

üöÄ Starting conversion with 2 workers
GPU available: False


2025-12-15 07:47:41,242 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 07:47:41,260 - INFO - Going to convert document batch...
2025-12-15 07:47:41,262 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 07:47:41,269 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 07:47:41,278 - INFO - Loading plugin 'docling_defaults'
2025-12-15 07:47:41,283 - INFO - Registered picture descriptions: ['vlm', 'api']
2025-12-15 07:47:41,288 - INFO - Going to convert document batch...
2025-12-15 07:47:41,290 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 07:47:41,300 - INFO - Loading plugin 'docling_defaults'
2025-12-15 07:47:41,306 - INFO - Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2025-12-15 07:47:41,307 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 07

‚úÖ [1/20] Done: https://arxiv.org/pdf/2408.09869


[32m[INFO] 2025-12-15 07:49:03,951 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:49:03,952 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:49:03,956 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:49:03,957 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:49:04,067 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:49:04,069 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:49:04,094 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:49:04,097 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [2/20] Done: https://arxiv.org/pdf/1706.03762


[32m[INFO] 2025-12-15 07:50:30,339 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:50:30,342 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:50:30,344 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:30,346 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:30,456 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:50:30,461 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:50:30,489 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:30,495 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [3/20] Done: https://arxiv.org/pdf/2311.04155
‚ö† CPU HIGH USAGE detected


[32m[INFO] 2025-12-15 07:50:46,163 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:46,172 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:46,857 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:50:46,860 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:50:46,866 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:46,868 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:50:47,012 [RapidOCR] base.py:22: Using engine_name: torch[0m

‚úÖ [4/20] Done: https://arxiv.org/pdf/2003.12771


2025-12-15 07:51:20,461 - INFO - Finished converting document 2302.09664v3.pdf in 50.76 sec.
2025-12-15 07:51:20,582 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 07:51:20,584 - INFO - Going to convert document batch...
2025-12-15 07:51:20,585 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 07:51:20,586 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 07:51:20,587 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 07:51:20,588 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 07:51:20,613 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:51:20,614 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:51:20,627 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-1

‚úÖ [5/20] Done: https://arxiv.org/pdf/2302.09664


[32m[INFO] 2025-12-15 07:51:20,817 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:51:20,819 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:51:20,821 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:51:20,822 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:51:20,900 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:51:20,901 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:51:20,924 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:51:20,925 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [6/20] Done: https://arxiv.org/pdf/1705.04510


2025-12-15 07:52:28,313 - INFO - Going to convert document batch...
2025-12-15 07:52:28,320 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 07:52:28,326 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 07:52:28,328 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 07:52:28,330 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 07:52:28,365 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:52:28,368 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:52:28,385 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:28,389 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m


‚ö† CPU HIGH USAGE detected


[32m[INFO] 2025-12-15 07:52:28,786 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:52:28,791 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:52:28,794 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:28,798 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:28,973 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:52:28,976 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:52:29,004 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:29,009 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [7/20] Done: https://arxiv.org/pdf/1910.02707


[32m[INFO] 2025-12-15 07:52:51,668 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:52:51,673 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:52:51,699 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:51,701 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:52,315 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:52:52,320 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:52:52,324 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:52:52,327 [RapidOCR] main.py:50: Using /usr/local/python/3

‚úÖ [8/20] Done: https://arxiv.org/pdf/1802.05365


[32m[INFO] 2025-12-15 07:54:02,016 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:54:02,023 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:54:02,025 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:54:02,026 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:54:02,156 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:54:02,158 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:54:02,186 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:54:02,189 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [9/20] Done: https://arxiv.org/pdf/1506.01497


[32m[INFO] 2025-12-15 07:54:23,258 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:54:23,263 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:54:23,271 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:54:23,274 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:54:23,459 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:54:23,463 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:54:23,495 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:54:23,499 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [10/20] Done: https://arxiv.org/pdf/1602.02235


2025-12-15 07:55:12,345 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 07:55:12,357 - INFO - Going to convert document batch...
2025-12-15 07:55:12,360 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 07:55:12,362 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 07:55:12,363 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 07:55:12,364 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 07:55:12,394 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:55:12,397 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:55:12,411 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:55:12,418 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-pack

‚úÖ [11/20] Done: https://arxiv.org/pdf/2004.09602


[32m[INFO] 2025-12-15 07:56:15,389 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:56:15,391 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:56:15,409 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:56:15,416 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 07:56:16,033 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:56:16,036 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:56:16,044 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:56:16,046 [RapidOCR] main.py:50: Using /usr/local/python/3

‚úÖ [12/20] Done: https://arxiv.org/pdf/1611.09060


[32m[INFO] 2025-12-15 07:57:41,822 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:57:41,824 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:57:41,827 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:57:41,829 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:57:41,950 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:57:41,952 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:57:41,978 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:57:41,980 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [13/20] Done: https://arxiv.org/pdf/1912.05483


[32m[INFO] 2025-12-15 07:58:03,161 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:58:03,163 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:58:03,166 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:58:03,167 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:58:03,285 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:58:03,287 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:58:03,312 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:58:03,314 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [14/20] Done: https://arxiv.org/pdf/1801.06538


[32m[INFO] 2025-12-15 07:58:29,112 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:58:29,117 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:58:29,119 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:58:29,122 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:58:29,261 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:58:29,264 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:58:29,296 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:58:29,300 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [15/20] Done: https://arxiv.org/pdf/1709.08624


[32m[INFO] 2025-12-15 07:59:26,798 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:59:26,800 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:59:26,802 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:59:26,803 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:59:26,908 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:59:26,910 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:59:26,936 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:59:26,937 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [16/20] Done: https://arxiv.org/pdf/1810.04805


[32m[INFO] 2025-12-15 07:59:55,714 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:59:55,717 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:59:55,720 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:59:55,722 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 07:59:55,824 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 07:59:55,827 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 07:59:55,854 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 07:59:55,857 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [17/20] Done: https://arxiv.org/pdf/1502.03167


[32m[INFO] 2025-12-15 08:00:16,389 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:00:16,392 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:00:16,395 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:00:16,402 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:00:16,548 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:00:16,551 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:00:16,582 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:00:16,588 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [18/20] Done: https://arxiv.org/pdf/2103.00634


[32m[INFO] 2025-12-15 08:00:47,434 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:00:47,436 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:00:47,440 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:00:47,441 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:00:47,560 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:00:47,562 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:00:47,586 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:00:47,593 [RapidOCR] main.py:50: Using /usr/local

‚úÖ [19/20] Done: https://arxiv.org/pdf/1807.02547


2025-12-15 08:01:11,328 - INFO - Finished converting document 2103.00634v4.pdf in 24.43 sec.


‚úÖ [20/20] Done: https://arxiv.org/pdf/2103.00634
Document 1:
<!-- image -->

## Docling Technical Report

## Version 1.0

Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar

AI4K Group, IBM Research R¬® uschlikon, Switzerland

## Abstract

This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models.

## 1 Introduction

Converting PDF documents back into a machine-processable form

In [None]:
%%time
from worker import convert_to_markdown

for src in sources:
    doc = convert_to_markdown(src,False)
    print(doc)

2025-12-15 08:01:54,959 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 08:01:54,983 - INFO - Going to convert document batch...
2025-12-15 08:01:54,984 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 08:01:55,005 - INFO - Loading plugin 'docling_defaults'
2025-12-15 08:01:55,007 - INFO - Registered picture descriptions: ['vlm', 'api']
2025-12-15 08:01:55,030 - INFO - Loading plugin 'docling_defaults'
2025-12-15 08:01:55,035 - INFO - Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2025-12-15 08:01:55,036 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 08:01:55,037 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 08:01:55,206 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 08:01:55,226 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:01:55,229 [RapidOCR] device_con

2025-12-15 08:02:39,473 - INFO - Finished converting document 2408.09869v5.pdf in 44.63 sec.
2025-12-15 08:02:39,601 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 08:02:39,604 - INFO - Going to convert document batch...
2025-12-15 08:02:39,605 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 08:02:39,606 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 08:02:39,606 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 08:02:39,607 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 08:02:39,625 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:02:39,626 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:02:39,638 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-1

<!-- image -->

## Docling Technical Report

## Version 1.0

Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar

AI4K Group, IBM Research R¬® uschlikon, Switzerland

## Abstract

This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models.

## 1 Introduction

Converting PDF documents back into a machine-processable format has been a major challenge for decades due to their huge var

[32m[INFO] 2025-12-15 08:02:39,824 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:02:39,825 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:02:39,827 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:02:39,827 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:02:39,915 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:02:39,916 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:02:39,939 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:02:39,940 [RapidOCR] main.py:50: Using /usr/local

## Black-Box Prompt Optimization: Aligning Large Language Models without Model Training

Jiale Cheng 1 , 2 * , Xiao Liu 3 , 2 * , Kehan Zheng 1 , Pei Ke 1 , Hongning Wang 1 , Yuxiao Dong 3 , Jie Tang 3 , Minlie Huang 1 ‚Ä†

1 The Conversational Artificial Intelligence (CoAI) Group, Tsinghua University 2 Zhipu AI

3

The Knowledge Engineering Group (KEG), Tsinghua University chengjl23@mails.tsinghua.edu.cn, shawliu9@gmail.com, aihuang@tsinghua.edu.cn

## Abstract

Large language models (LLMs) have shown impressive success in various applications. However, these models are often not well aligned with human intents, which calls for additional treatments on them; that is, the alignment problem. To make LLMs better follow user instructions, existing alignment methods primarily focus on further training them. However, the extra training of LLMs is usually expensive in terms of GPU computing; even worse, some LLMs are not accessible for userdemanded training, such as GPTs. In this work, we ta

[32m[INFO] 2025-12-15 08:04:36,918 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:04:36,919 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:04:36,921 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:04:36,921 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:04:37,008 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:04:37,008 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:04:37,031 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:04:37,031 [RapidOCR] main.py:50: Using /usr/local

Provided proper attribution is provided, Google hereby grants permission to reproduce the tables and figures in this paper solely for use in journalistic or scholarly works.

## Attention Is All You Need

Ashish Vaswani ‚àó Google Brain avaswani@google.com

Noam Shazeer ‚àó Google Brain noam@google.com

Llion Jones ‚àó Google Research llion@google.com

Niki Parmar ‚àó Google Research nikip@google.com

Aidan N. Gomez ‚àó ‚Ä† University of Toronto aidan@cs.toronto.edu

Jakob Uszkoreit ‚àó Google Research usz@google.com

≈Åukasz Kaiser ‚àó Google Brain lukaszkaiser@google.com

Illia Polosukhin ‚àó ‚Ä°

illia.polosukhin@gmail.com

## Abstract

The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing

[32m[INFO] 2025-12-15 08:05:36,508 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:05:36,508 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:05:36,510 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:05:36,511 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:05:36,590 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:05:36,590 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:05:36,613 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:05:36,613 [RapidOCR] main.py:50: Using /usr/local

## SEMANTIC UNCERTAINTY: LINGUISTIC INVARIANCES FOR UNCERTAINTY ESTIMATION IN NATURAL LANGUAGE GENERATION

Lorenz Kuhn, Yarin Gal, Sebastian Farquhar

OATML Group, Department of Computer Science, University of Oxford lorenz.kuhn@cs.ox.ac.uk

## ABSTRACT

We introduce a method to measure uncertainty in large language models. For tasks like question answering, it is essential to know when we can trust the natural language outputs of foundation models. We show that measuring uncertainty in natural language is challenging because of 'semantic equivalence'-different sentences can mean the same thing. To overcome these challenges we introduce semantic entropy -an entropy which incorporates linguistic invariances created by shared meanings. Our method is unsupervised, uses only a single model, and requires no modifications to 'off-the-shelf' language models. In comprehensive ablation studies we show that the semantic entropy is more predictive of model accuracy on question answering data sets

[32m[INFO] 2025-12-15 08:06:14,057 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:06:14,058 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:06:14,060 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:06:14,061 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:06:14,145 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:06:14,146 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:06:14,168 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:06:14,169 [RapidOCR] main.py:50: Using /usr/local

## Effective scalar-tensor description of regularized Lovelock gravity in four dimensions

Tsutomu Kobayashi 1, ‚àó

1 Department of Physics, Rikkyo University, Toshima, Tokyo 171-8501, Japan

We reformulate the recently proposed regularized version of Lovelock gravity in four dimensions as a scalar-tensor theory. By promoting the warp factor of the internal space to a scalar degree of freedom by means of Kaluza-Klein reduction, we show that regularized Lovelock gravity can be described effectively by a certain subclass of the Horndeski theory. Cosmological aspects of this particular scalar-tensor theory are studied. It is found that the background with a scalar charge is generically allowed. The consequences of this scalar charge are briefly discussed.

## I. INTRODUCTION

Lovelock gravity [1] is the most general metric theory of gravity in higher dimensions retaining the second-order nature of field equations for the metric. The action for Lovelock gravity in D dimensions is given by

2025-12-15 08:06:25,390 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-15 08:06:25,416 - INFO - Going to convert document batch...
2025-12-15 08:06:25,417 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2025-12-15 08:06:25,418 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2025-12-15 08:06:25,419 - INFO - easyocr cannot be used because it is not installed.
2025-12-15 08:06:25,419 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2025-12-15 08:06:25,439 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:06:25,440 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:06:25,452 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_det_infer.pth[0m
[32m[INFO] 2025-12-15 08:06:25,453 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-pack

## Structural and magnetic properties of 3d transition metal oxide chains on the (001) surfaces of Ir and Pt

Martin Schmitt, 1 Chong H. Park, 1, 2 Paula Weber, 1 Andreas J¬® ager, 1 Jeannette Kemmer, 1 Matthias Vogt, 1 and Matthias Bode 1, 3, ‚àó

1

Physikalisches Institut, Experimentelle Physik II, Universit¬® at W¬® urzburg, Am Hubland, 97074 W¬® urzburg, Germany University of British Columbia, 2329 West Mall, Vancouver, BC Canada Wilhelm Conrad R¬® ontgen-Center for Complex Material Systems (RCCM), Universit¬® at W¬® urzburg, Am Hubland, 97074 W¬® urzburg, Germany (Dated: October 8, 2019)

We present a survey of the structural and magnetic properties of submonolayer transition metal dioxides on the (001) surfaces of the heavy face-centered cubic (fcc) noble metals Ir and Pt performed by spin-averaged scanning tunneling microscopy (STM) and spin-polarized (SP-)STM. Our STM results confirm that deposition of Co, Fe, Mn, and Cr on the (2 √ó 1) oxygen-reconstructed Ir(001) surface lea

[32m[INFO] 2025-12-15 08:07:15,488 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:07:15,489 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:07:15,491 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:07:15,491 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:07:15,568 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:07:15,569 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:07:15,591 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:07:15,592 [RapidOCR] main.py:50: Using /usr/local

## Formalizing Timing Diagram Requirements in Discrete Duration Calulus

Raj Mohan Matteplackel 1 , Paritosh K. Pandya 1 , and Amol Wakankar 2

1

Tata Institute of Fundamental Research, Mumbai 400005, India. { raj.matteplackel,pandya } @tifr.res.in 2 Bhabha Atomic Research Centre, Mumbai, India.

amolk@barc.gov.in

Abstract. Several temporal logics have been proposed to formalise timing diagram requirements over hardware and embedded controllers. These include LTL [CF05], discrete time MTL [AH93] and the recent industry standard PSL [EF16]. However, succintness and visual structure of a timing diagram are not adequately captured by their formulae [CF05]. Interval temporal logic QDDC is a highly succint and visual notation for specifying patterns of behaviours [Pan00].

In this paper, we propose a practically useful notation called SeCeNL which enhances negation free fragment of QDDC with features of nominals and limited liveness . We show that timing diagrams can be naturally (composi

[32m[INFO] 2025-12-15 08:07:58,837 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:07:58,838 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:07:58,840 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:07:58,840 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:07:58,942 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:07:58,943 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:07:58,965 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:07:58,966 [RapidOCR] main.py:50: Using /usr/local

1

## INTRODUCTION

Recent advances in object detection are driven by the success of region proposal methods ( e.g ., [4]) and region-based convolutional neural networks (RCNNs) [5]. Although region-based CNNs were computationally expensive as originally developed in [5], their cost has been drastically reduced thanks to sharing convolutions across proposals [1], [2]. The latest incarnation, Fast R-CNN [2], achieves near real-time rates using very deep networks [3], when ignoring the time spent on region proposals . Now, proposals are the test-time computational bottleneck in state-of-the-art detection systems.

Region proposal methods typically rely on inexpensive features and economical inference schemes. Selective Search [4], one of the most popular methods, greedily merges superpixels based on engineered low-level features. Yet when compared to efficient detection networks [2], Selective Search is an order of magnitude slower, at 2 seconds per image in a CPU implementation. EdgeBox

[32m[INFO] 2025-12-15 08:09:08,952 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:09:08,953 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:09:08,955 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:09:08,955 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:09:09,035 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:09:09,036 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:09:09,059 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:09:09,059 [RapidOCR] main.py:50: Using /usr/local

## Deep contextualized word representations

Matthew E. Peters ‚Ä† , Mark Neumann ‚Ä† , Mohit Iyyer ‚Ä† , Matt Gardner ‚Ä† ,

{ matthewp,markn,mohiti,mattg } @allenai.org

Christopher Clark ‚àó , Kenton Lee ‚àó , Luke Zettlemoyer ‚Ä†‚àó

{ csquared,kentonl,lsz } @cs.washington.edu

‚Ä†

Paul G. Allen School of Computer Science &amp; Engineering, University of Washington

Allen Institute for Artificial Intelligence ‚àó

## Abstract

We introduce a new type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Our word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pretrained on a large text corpus. We show that these representations can be easily added to existing models and significantly improve the state of the art across six challenging NLP problems, including questio

[32m[INFO] 2025-12-15 08:09:56,500 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:09:56,502 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:09:56,504 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:09:56,505 [RapidOCR] main.py:50: Using /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2025-12-15 08:09:56,590 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2025-12-15 08:09:56,592 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2025-12-15 08:09:56,615 [RapidOCR] download_file.py:60: File exists and is valid: /usr/local/python/3.12.1/lib/python3.12/site-packages/rapidocr/models/ch_PP-OCRv4_rec_infer.pth[0m
[32m[INFO] 2025-12-15 08:09:56,616 [RapidOCR] main.py:50: Using /usr/local