# Using DocQA

Test out the functionality for the document answering python package

In [1]:
from docqa.config import Settings
from docqa.pipeline.engine import QAEngine

## Set the config

In [2]:
settings = Settings(
        llm_provider="ollama",
        llm_model="qwen2.5:7b",
        embed_provider="ollama",
        embed_model="nomic-embed-text",
        faiss_index_dir="./.local/faiss_index",
        retrieval_type="mmr",
        retrieval_k=4,
    )

Validate that the value provided for the settings are correct

In [3]:
settings.validate()

## Question-Answer Engine

In [4]:
engine = QAEngine(settings)

We will ingest the pdf document first

In [5]:
ingest_stats = engine.ingest_pdf("../../documents/soc2-type2.pdf")
print("Ingest:", ingest_stats)

Ingest: {'ingested_pages': 56, 'chunks_added': 193, 'index_dir': './.local/faiss_index'}


Next we will ingest the sample json 

In [6]:
ingest_stats = engine.ingest_json("../../documents/sample_json.json")
print("Ingest:", ingest_stats)

Ingest: {'ingested_pages': 19, 'chunks_added': 19, 'index_dir': './.local/faiss_index'}


In [7]:
result = engine.answer("Which cloud providers do you rely on?")

In [9]:
print("\nAnswer:", result["answer"])
print("\nTop source:", result["sources"][0] if result["sources"] else None)


Answer: Amazon Web Services Inc. (AWS)

Top source: {'source': '../../documents/soc2-type2.pdf', 'source_type': 'pdf', 'page': 4, 'chunk_index': 4, 'doc_id': None, 'text_snippet': '2  INDEPENDENT SERVICE AUDITOR’S REPORT    To Board of Directors  Product Fruits s.r.o.    Scope    We have examined the accompanying “ Description of Product Fruits , a cloud -hosted software application ”  provided by Product Fruits s.r.o. throughout the period July 24, 2024 to July 23, 2025  (the description) and the  suitability of the design and operating effectiveness of controls to meet Product Fruits s.r.o. ’s service  commitments and system requirements based on the criteria for Security, Confidentiality, Availability, Processing  Integrity & Privacy principles set forth in TSP Section  100 Principles and Criteria, Trust Services Principles and  Criteria for Security, Confidentiality and Availability (applicable trust services criteria) throughout the period July  24, 2024 to July 23, 2025.    Prod