### Batch API folder fetch response Example


In [15]:
# Install the libraries (ipython is used for displaying markdown in this demo)
# !pip3 install --upgrade ipython
# !pip3 install --upgrade any-parser

In [16]:
import json
import logging
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

from dotenv import load_dotenv

from any_parser import AnyParser

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv(override=True)

MAX_WORKER = 10

In [17]:
# Get API key and create parser
api_key = os.environ.get("CAMBIO_API_KEY")
if not api_key:
    raise ValueError("CAMBIO_API_KEY is not set")
ap = AnyParser(api_key)

Read responses from JSONL file

In [18]:
# Change to your real output json from parse_batch_upload.py
response_file = "./sample_data_20250102103047.jsonl"
with open(response_file, "r") as f:
    responses = [json.loads(line) for line in f]

In [19]:
# Process responses concurrently
def process_response(response):
    """Process a single response by retrieving markdown content"""
    request_id = response["requestId"]
    try:
        markdown = ap.batches.retrieve(request_id)
        if markdown:
            response["result"] = [markdown.result[0] if markdown.result else ""]
            response["requestStatus"] = "COMPLETED"
            response["completionTime"] = markdown.completionTime
    except Exception as e:
        logger.error(f"Error processing {request_id}: {str(e)}")
        response["error"] = [str(e)]
    return response

In [20]:
# Process responses concurrently
with ThreadPoolExecutor(max_workers=MAX_WORKER) as executor:
    future_to_response = {
        executor.submit(process_response, response): response
        for response in responses
    }

    updated_responses = []
    for future in as_completed(future_to_response):
        updated_response = future.result()
        updated_responses.append(updated_response)

# Write all updated responses back to file
with open(response_file, "w") as f:
    for response in updated_responses:
        f.write(json.dumps(response) + "\n")

print(f"Updated all responses in {response_file} with markdown content")

Updated all responses in ./sample_data_20250102103047.jsonl with markdown content


Print out the first row from the updated file

In [21]:
# Read and print first row from the updated file
with open(response_file, "r") as f:
    first_row = json.loads(f.readline())
    print("First row from updated file:")
    print(json.dumps(first_row, indent=2))

First row from updated file:
{
  "fileName": "Earnings-Presentation-Q2-2024.pdf",
  "requestId": "cfb556cb-e5f9-4b6c-a2f7-6ba982858a92",
  "requestStatus": "COMPLETED",
  "result": [
    "## Meta Earnings Presentation\n## Q2 2024\n\ninvestor.fb.com Meta logo, consisting of a stylized infinity symbol next to the text \"Meta\""
  ],
  "completionTime": "2025-01-02T04:34:56.494827+00:00"
}


## End of the notebook

Check more [case studies](https://www.cambioml.com/blog) of CambioML!

<a href="https://www.cambioml.com/" title="Title">
    <img src="./sample_data/cambioml_logo_large.png" style="height: 100px; display: block; margin-left: auto; margin-right: auto;"/>
</a>