In [None]:
# 02_evaluation.ipynb

## Purpose
"""
This notebook performs lightweight automated evaluation of the DocVision
pipeline by validating output structure, rule-based constraints, and
measuring latency. It complements the declarative experiments defined
in the experiments/ directory.
"""

In [2]:
import sys
import os

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)


In [12]:
import os
import re
import time
import asyncio
from pprint import pprint

from src.vision import classify_image
from src.textextraction import extract_text_from_image_async


In [5]:
EVAL_FILES = [
    "C:/Users/bplsy/Documents/DOCVISION_IQ/notebooks/uploadss/driving license .jpg",
    "C:/Users/bplsy/Documents/DOCVISION_IQ/notebooks/uploadss/ss.jpg"
]

for f in EVAL_FILES:
    assert os.path.exists(f), f"Missing file: {f}"

print("Evaluation files loaded:", len(EVAL_FILES))


Evaluation files loaded: 2


In [None]:
async def evaluate_file(path: str):
    start = time.time()

    # Run OCR explicitly for evaluation
    ocr_text = await extract_text_from_image_async(path)

    # Run Vision LLM classification
    result = await classify_image(path)

    latency = round(time.time() - start, 2)

    checks = {
        "valid_json": all(
            k in result for k in ["document_type", "reasoning", "extracted_textfields"]
        ),
        "latency_sec": latency,
    }


    if result["document_type"] == "aadhaar_card":
        checks["aadhaar_rule_passed"] = bool(
            re.search(r"\b\d{12}\b", ocr_text) or "UIDAI" in ocr_text.upper()
        )
    else:
        checks["aadhaar_rule_passed"] = "N/A"

    return result, checks


In [14]:
results = await evaluate_all(EVAL_FILES)

for file, metrics in results:
    print(file)
    pprint(metrics)




Evaluating: C:/Users/bplsy/Documents/DOCVISION_IQ/notebooks/uploadss/driving license .jpg


2026-01-29 13:04:45,587 - INFO - HTTP Request: POST https://api.cloud.llamaindex.ai/api/parsing/upload "HTTP/1.1 200 OK"


Started parsing the file under job_id 8dc83a68-9209-4b25-a474-e4be84fc7aaf


2026-01-29 13:04:46,942 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/8dc83a68-9209-4b25-a474-e4be84fc7aaf "HTTP/1.1 200 OK"
2026-01-29 13:04:49,232 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/8dc83a68-9209-4b25-a474-e4be84fc7aaf "HTTP/1.1 200 OK"
2026-01-29 13:04:49,626 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/8dc83a68-9209-4b25-a474-e4be84fc7aaf/result/text "HTTP/1.1 200 OK"


Error while parsing the file '<bytes/buffer>': Event loop is closed


2026-01-29 13:04:52,313 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"



Evaluating: C:/Users/bplsy/Documents/DOCVISION_IQ/notebooks/uploadss/ss.jpg


2026-01-29 13:05:04,679 - INFO - HTTP Request: POST https://api.cloud.llamaindex.ai/api/parsing/upload "HTTP/1.1 200 OK"


Started parsing the file under job_id da70766a-4bf1-4341-9beb-3bbe4068374f


2026-01-29 13:05:05,961 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/da70766a-4bf1-4341-9beb-3bbe4068374f "HTTP/1.1 200 OK"
2026-01-29 13:05:08,212 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/da70766a-4bf1-4341-9beb-3bbe4068374f "HTTP/1.1 200 OK"
2026-01-29 13:05:08,584 - INFO - HTTP Request: GET https://api.cloud.llamaindex.ai/api/parsing/job/da70766a-4bf1-4341-9beb-3bbe4068374f/result/text "HTTP/1.1 200 OK"


Error while parsing the file '<bytes/buffer>': Event loop is closed


2026-01-29 13:05:09,971 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


C:/Users/bplsy/Documents/DOCVISION_IQ/notebooks/uploadss/driving license .jpg
{'aadhaar_rule_passed': 'N/A', 'latency_sec': 19.71, 'valid_json': True}
C:/Users/bplsy/Documents/DOCVISION_IQ/notebooks/uploadss/ss.jpg
{'aadhaar_rule_passed': True, 'latency_sec': 13.3, 'valid_json': True}


In [15]:
print("All evaluations completed successfully.")


All evaluations completed successfully.


In [16]:
## Summary
"""
This notebook demonstrates automated evaluation of DocVision outputs,
including JSON validity, rule-based constraints, and latency measurement.
Results support the evaluation protocol described in the README.
"""


'\nThis notebook demonstrates automated evaluation of DocVision outputs,\nincluding JSON validity, rule-based constraints, and latency measurement.\nResults support the evaluation protocol described in the README.\n'