## Structured Tab example (gear racks)

### Provide api-keys manually

In [1]:
import os
from getpass import getpass

if "LLAMA_CLOUD_API_KEY" not in os.environ:
    os.environ["LLAMA_CLOUD_API_KEY"] = getpass("Enter your Llama Cloud API Key: ")
    os.environ["OPENAI_KEY"] = getpass("Enter your OpenAI API Key: ")

### Create instance of extractor

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from llama_cloud_services import (
    LlamaExtract,
    EU_BASE_URL,
)

# Optionally, provide your project id, if not, it will use the 'Default' project
llama_extract = LlamaExtract(base_url=EU_BASE_URL)

print(EU_BASE_URL)

https://api.cloud.eu.llamaindex.ai


### Define the data schema

In [10]:
from pydantic import BaseModel, Field

class GearRack(BaseModel):

    gear_rack_material: str = Field(description="The material from which the gear rack was manufactured (e.g. Steel, Stainless Steel, Plastics (Polyketon (PK), Polyacetal (POM)), etc.)")
    straight_toothed: bool = Field(description="It indicates whether, the teeth are aligned longitudinally with the shaft, meaning there is no \"helix angle\".")
    angle_of_engagement: int = Field(description="It refers to the angular position, or the arc, during which two gear teeth are in contact and transmitting power. It is often written in Degrees (°).")
    module: float = Field(description="The gear module of a gear represents the ratio of the pitch (distance between teeth) to pi (\\(\\pi \\)), effectively defining how thick a gear tooth is and, consequently, how strong it is.")
    HG: float = Field(description="Gear rack related dimension indicated in millimeters [mm]")
    HTK: float = Field(description="Gear rack related dimension indicated in millimeters [mm]")
    HFK: float = Field(description="Gear rack related dimension indicated in millimeters [mm]")
    ZB: float = Field(description="Gear rack related dimension indicated in millimeters [mm]")
    L: float = Field(description="The Length of the gear rack indicated in millimeters [mm]")
    G: float = Field(description="The weight of the gear rack indicated in unit of gramms ([g]).")
    art_nr: str = Field(description="'Art.-Nr.' is an abbreviation for the German term Artikelnummer, which translates to Article Number. It distinguishes a particular rack based on its specifications.")
  

---
### Testing - With ``extraction_target``: 'PER_TABLE_ROW'
---

In [11]:
from llama_cloud_services.extract import ExtractConfig, ExtractMode, ExtractTarget

result_per_tab_row = await llama_extract.aextract(
    data_schema=GearRack,
    files="/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/tabs/gear_rack.pdf",
    config=ExtractConfig(
        extraction_mode=ExtractMode.PREMIUM,
        extraction_target=ExtractTarget.PER_TABLE_ROW,
        parse_model="anthropic-sonnet-4.5",
        system_prompt="You are an expert at extracting specifications of gear racks from catalog documents",
    ),
)

In [12]:
len(result_per_tab_row.data)

8

In [13]:
result_per_tab_row.data

[{'gear_rack_material': 'Polyketon (PK)',
  'straight_toothed': True,
  'angle_of_engagement': 20,
  'module': 0.5,
  'HG': 4.5,
  'HTK': 4.0,
  'HFK': 3.4,
  'ZB': 4.0,
  'L': 250.0,
  'G': 4.62,
  'art_nr': 'ZPK05250PK'},
 {'gear_rack_material': 'Polyketon (PK)',
  'straight_toothed': True,
  'angle_of_engagement': 20,
  'module': 0.5,
  'HG': 6.0,
  'HTK': 5.5,
  'HFK': 4.9,
  'ZB': 4.0,
  'L': 250.0,
  'G': 6.76,
  'art_nr': 'ZPK05250PK/5'},
 {'gear_rack_material': 'Polyketon (PK)',
  'straight_toothed': True,
  'angle_of_engagement': 20,
  'module': 0.7,
  'HG': 6.7,
  'HTK': 6.0,
  'HFK': 5.1,
  'ZB': 6.0,
  'L': 250.0,
  'G': 10.58,
  'art_nr': 'ZPK07250PK'},
 {'gear_rack_material': 'Polyketon (PK)',
  'straight_toothed': True,
  'angle_of_engagement': 20,
  'module': 1.0,
  'HG': 9.0,
  'HTK': 8.0,
  'HFK': 6.8,
  'ZB': 9.0,
  'L': 250.0,
  'G': 21.81,
  'art_nr': 'ZPK10250PK'},
 {'gear_rack_material': 'Polyketon (PK)',
  'straight_toothed': True,
  'angle_of_engagement': 20,
 

---
### Testing - With ``extraction_target``: 'PER_Document'
---

In [21]:
from llama_cloud_services.extract import ExtractConfig, ExtractMode, ExtractTarget

result_per_document = await llama_extract.aextract(
    data_schema=GearRack,
    files="/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/tabs/gear_rack.pdf",
    config=ExtractConfig(
        extraction_mode=ExtractMode.PREMIUM,
        extraction_target=ExtractTarget.PER_DOC,
        parse_model="anthropic-sonnet-4.5",
        system_prompt="You are an expert at extracting specifications of gear racks from catalog documents",
    ),
)

In [22]:
len(result_per_document.data)

11

In [None]:
result_per_document.data

{'gear_rack_material': 'Polyketon (PK)',
 'straight_toothed': True,
 'angle_of_engagement': 20,
 'module': 0.7,
 'HG': 6.7,
 'HTK': 6.0,
 'HFK': 5.1,
 'ZB': 6.0,
 'L': 250.0,
 'G': 10.58,
 'art_nr': 'ZPK07250PK'}

In [26]:
result_per_document.extraction_metadata

{'field_metadata': {},
 'usage': {'num_pages_extracted': 1,
  'num_document_tokens': 663,
  'num_output_tokens': 75},
 'parse_job_id': 'fe67fc09-c356-4d98-9304-c2f986892891'}

In [27]:
result_per_document

ExtractRun(config=ExtractConfig(chunk_mode=<DocumentChunkMode.PAGE: 'PAGE'>, citation_bbox=False, cite_sources=False, confidence_scores=False, extract_model=<ExtractModels.OPENAI_GPT_41: 'openai-gpt-4-1'>, extraction_mode=<ExtractMode.PREMIUM: 'PREMIUM'>, extraction_target=<ExtractTarget.PER_DOC: 'PER_DOC'>, high_resolution_mode=False, invalidate_cache=False, multimodal_fast_mode=False, num_pages_context=None, page_range=None, parse_model=<PublicModelName.ANTHROPIC_SONNET_45: 'anthropic-sonnet-4.5'>, priority=None, system_prompt='You are an expert at extracting specifications of gear racks from catalog documents', use_reasoning=False), created_at=datetime.datetime(2026, 1, 19, 23, 56, 43, 891927, tzinfo=datetime.timezone.utc), data={'gear_rack_material': 'Polyketon (PK)', 'straight_toothed': True, 'angle_of_engagement': 20, 'module': 0.7, 'HG': 6.7, 'HTK': 6.0, 'HFK': 5.1, 'ZB': 6.0, 'L': 250.0, 'G': 10.58, 'art_nr': 'ZPK07250PK'}, data_schema={'additionalProperties': False, 'propertie

---
### Testing - With ``extraction_target``: PER_Document + ``CHUNK_MODE``: SECTIONS
---

In [34]:
from llama_cloud_services.extract import ExtractConfig, ExtractMode, ExtractTarget

result_per_document_SECTION = await llama_extract.aextract(
    data_schema=GearRack,
    files="/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/tabs/gear_rack.pdf",
    config=ExtractConfig(
        extraction_mode=ExtractMode.PREMIUM,
        extraction_target=ExtractTarget.PER_DOC,
        parse_model="anthropic-sonnet-4.5",
        system_prompt="You are an expert at extracting specifications of gear racks from catalog documents",
        chunk_mode = 'SECTION'
    ),
)

In [36]:
len(result_per_document_SECTION.data)

11

In [None]:
result_per_document_SECTION.data

{'gear_rack_material': 'Polyketon (PK)',
 'straight_toothed': True,
 'angle_of_engagement': 20,
 'module': 0.5,
 'HG': 4.5,
 'HTK': 4.0,
 'HFK': 3.4,
 'ZB': 4.0,
 'L': 250.0,
 'G': 4.62,
 'art_nr': 'ZPK05250PK'}

In [50]:

result_per_document_SECTION.data.items()

dict_items([('gear_rack_material', 'Polyketon (PK)'), ('straight_toothed', True), ('angle_of_engagement', 20), ('module', 0.5), ('HG', 4.5), ('HTK', 4.0), ('HFK', 3.4), ('ZB', 4.0), ('L', 250.0), ('G', 4.62), ('art_nr', 'ZPK05250PK')])

---
### Testing - With ``extraction_target``: 'PER_PAGE'
---

In [15]:
from llama_cloud_services.extract import ExtractConfig, ExtractMode, ExtractTarget

result_per_page = await llama_extract.aextract(
    data_schema=GearRack,
    files="/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/tabs/gear_rack.pdf",
    config=ExtractConfig(
        extraction_mode=ExtractMode.PREMIUM,
        extraction_target=ExtractTarget.PER_PAGE,
        parse_model="anthropic-sonnet-4.5",
        system_prompt="You are an expert at extracting specifications of gear racks from catalog documents",
    ),
)

In [16]:
len(result_per_page.data)

1

In [17]:
result_per_page.data

[{'gear_rack_material': 'Polyketon (PK)',
  'straight_toothed': True,
  'angle_of_engagement': 20,
  'module': 0.5,
  'HG': 4.5,
  'HTK': 4.0,
  'HFK': 3.4,
  'ZB': 4.0,
  'L': 250.0,
  'G': 4.62,
  'art_nr': 'ZPK05250PK'}]

## Result

-``PER_TABLE_ROW``: All gear racks (8) were extracted<br>
-``PER_DOC``: 11 redundant gear racks (only first row) were extracted<br>
-``PER_PAGE``: Only 1 gear rack was extracted