## Demo Financial Report - Extract *with* agent 

### Provide api-keys manually

In [None]:
import os
from getpass import getpass

if "LLAMA_CLOUD_API_KEY" not in os.environ:
    os.environ["LLAMA_CLOUD_API_KEY"] = getpass("Enter your Llama Cloud API Key: ")
    os.environ["OPENAI_KEY"] = getpass("Enter your OpenAI API Key: ")

### Create instance of extractor

In [None]:
from llama_cloud_services import LlamaExtract

# Optionally, provide your project id, if not, it will use the 'Default' project
llama_extract = LlamaExtract()
# llama_extract = LlamaExtract(api_key="YOUR_API_KEY")

### Define basic schema

In [None]:
schema = {
    "type": "object",
    "properties": {
        "company_name": {"type": "string", "description": "Name of the company"},
        "revenue": {"type": "number", "description": "Annual revenue in USD"}
    }
}

### Define extraction configuration

In [None]:
from llama_cloud import ExtractConfig, ExtractMode, ChunkMode, ExtractTarget

config = ExtractConfig(
    
    # Schema alignment
    extraction_target=ExtractTarget.PER_DOC,   # PER_DOC, PER_PAGE

    # Model settings
    extraction_mode=ExtractMode.MULTIMODAL,    # FAST, BALANCED, MULTIMODAL, PREMIUM
    parse_model=None,                          # Optional: override parse model
    extract_model=None,                        # Optional: override extract model

    # System prompt
    system_prompt="Focus on the most recent financial data",

    # Page range and context
    page_range="1-5,10-15",                    # Extract from specific pages
    num_pages_context=3,                       # Number of context pages for long docs

    # Metadata extensions (see Metadata Extensions page for details)
    cite_sources=True,                         # Enable citations
    use_reasoning=True,                        # Enable reasoning (not available in FAST mode)
    confidence_scores=True,                    # Enable confidence scores (MULTIMODAL/PREMIUM only)

    # Advanced options
    chunk_mode=ChunkMode.PAGE,                 # PAGE, SECTION
    high_resolution_mode=True,                 # Enable for better OCR
    invalidate_cache=False,                    # Set to True to bypass cache

    # Other params/args
    priority=None,
    multimodal_fast_mode=False,
    citation_bbox=False,
)

### Create extraction Agent

In [None]:
agent = llama_extract.create_agent(
    name="Financial Data Extractor",
    data_schema=schema,
    config=config
)

---
### Testing
---

In [None]:
# Extract from a document
result = agent.extract("/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/resumes/ai_researcher.pdf")
print(result.data)