## Demo - Extract with agent 
---
Default Config:
* `` Schema alignment``:
    * extraction_target= PER_DOC 
* ``Model settings``: 
    * extraction_mode= BALANCED 
* ``Metadata extensions``:
    * use_reasoning= False
    * cite_sources= False
    * confidence_scores= False
* ``Advanced options``:
    * chunk_mode= PAGE 
---

### Provide api-keys manually

In [None]:
import os
from getpass import getpass

if "LLAMA_CLOUD_API_KEY" not in os.environ:
    os.environ["LLAMA_CLOUD_API_KEY"] = getpass("Enter your Llama Cloud API Key: ")
    os.environ["OPENAI_KEY"] = getpass("Enter your OpenAI API Key: ")

### Create instance of extractor

In [None]:
from llama_cloud_services import LlamaExtract

# Optionally, provide your project id, if not, it will use the 'Default' project
llama_extract = LlamaExtract()
# llama_extract = LlamaExtract(api_key="YOUR_API_KEY")

### Define the data schema

In [4]:
from pydantic import BaseModel, Field
from typing import List, Optional

class TechnicalSkills(BaseModel):
    programming_languages: List[str] = Field(
        description="The programming languages the candidate is proficient in."
    )
    frameworks: List[str] = Field(
        description="The tools/frameworks the candidate is proficient in, e.g. React, Django, PyTorch, etc."
    )
    skills: List[str] = Field(
        description="Other general skills the candidate is proficient in, e.g. Data Engineering, Machine Learning, etc."
    )

class Education(BaseModel):
    institution: str = Field(description="The institution of the candidate")
    degree: str = Field(description="The degree of the candidate")
    start_date: Optional[str] = Field(
        default=None, description="The start date of the candidate's education"
    )
    end_date: Optional[str] = Field(
        default=None, description="The end date of the candidate's education"
    )

class Experience(BaseModel):
    company: str = Field(description="The name of the company")
    title: str = Field(description="The title of the candidate")
    description: Optional[str] = Field(
        default=None, description="The description of the candidate's experience"
    )
    start_date: Optional[str] = Field(
        default=None, description="The start date of the candidate's experience"
    )
    end_date: Optional[str] = Field(
        default=None, description="The end date of the candidate's experience"
    )

class Resume(BaseModel):
    name: str = Field(description="The name of the candidate")
    email: str = Field(description="The email address of the candidate")
    links: List[str] = Field(
        description="The links to the candidate's social media profiles"
    )
    experience: List[Experience] = Field(description="The candidate's experience")
    education: List[Education] = Field(description="The candidate's education")
    technical_skills: TechnicalSkills = Field(
        description="The candidate's technical skills"
    )
    key_accomplishments: str = Field(
        description="Summarize the candidates highest achievements."
    )

### Create extraction Agent

In [5]:
from llama_cloud.core.api_error import ApiError

try:
    existing_agent = llama_extract.get_agent(name="resume-screening")
    if existing_agent:
        print("============== Agent exists already ==============")
        llama_extract.delete_agent(existing_agent.id)
    else:
        print("============== Creating Agent from scratch ==============")
except ApiError as e:
    if e.status_code == 404:
        pass
    else:
        raise

agent = llama_extract.create_agent(name="resume-screening", data_schema=Resume)



---
### Testing
---

#### List the agents

In [5]:
llama_extract.list_agents()

[ExtractionAgent(id=9f0c603e-952f-442b-be15-bdeeda409fe5, name=resume-screening)]

#### Extract information

In [None]:
resume = agent.extract("/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/resumes/ai_researcher.pdf")
print("============== Extraction finishied successfully ==============")

Uploading files: 100%|██████████| 1/1 [00:01<00:00,  1.94s/it]
Creating extraction jobs: 100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
Extracting files: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]






#### Print results

In [9]:
resume.data

{'name': 'Dr. Rachel Zhang, Ph.D.',
 'email': 'rachel.zhang@email.com',
 'links': ['linkedin.com/in/rachelzhang',
  'github.com/rzhang-ai',
  'scholar.google.com/rachelzhang'],
 'experience': [{'company': 'DeepMind',
   'title': 'Senior Research Scientist',
   'description': 'Lead researcher on large-scale multi-task learning systems, developing novel architectures that improve cross-task generalization by 40%. Pioneered new approach to zero-shot learning using contrastive training, published in NeurIPS 2023. Built and led team of 6 researchers working on foundational ML models. Developed novel regularization techniques for large language models, reducing catastrophic forgetting by 35%.',
   'start_date': '2019',
   'end_date': 'Present'},
  {'company': 'Google Research',
   'title': 'Research Scientist',
   'description': 'Developed probabilistic frameworks for robust ML, published in ICML 2018. Created novel attention mechanisms for computer vision models, improving accuracy by 25%. 

#### Save extraction template for later use

In [7]:
agent.save()
print("============== Saved extraction agent's schema and config to the database ==============")

agent = llama_extract.get_agent("resume-screening")
agent.data_schema  # Latest schema should be returned



{'additionalProperties': False,
 'properties': {'name': {'description': 'The name of the candidate',
   'type': 'string'},
  'email': {'description': 'The email address of the candidate',
   'type': 'string'},
  'links': {'description': "The links to the candidate's social media profiles",
   'items': {'type': 'string'},
   'type': 'array'},
  'experience': {'description': "The candidate's experience",
   'items': {'additionalProperties': False,
    'properties': {'company': {'description': 'The name of the company',
      'type': 'string'},
     'title': {'description': 'The title of the candidate', 'type': 'string'},
     'description': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
      'description': "The description of the candidate's experience"},
     'start_date': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
      'description': "The start date of the candidate's experience"},
     'end_date': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
      'description': "The 

### Test extracted data_schema on my personal resume

#### 1. Short & basic CV (one pager)

In [12]:
from llama_cloud.core.api_error import ApiError

try:
    existing_agent = llama_extract.get_agent(name="resume-screening")
    if existing_agent:
        print("============== Agent exists already ==============")
        print(existing_agent.data_schema)
        print(existing_agent.config)

        new_cv_path = "/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/resumes/Lebenslauf_basic.pdf"
        my_resume = existing_agent.extract(new_cv_path)

except ApiError as e:
    if e.status_code == 404:
        pass
    else:
        raise

{'additionalProperties': False, 'properties': {'name': {'description': 'The name of the candidate', 'type': 'string'}, 'email': {'description': 'The email address of the candidate', 'type': 'string'}, 'links': {'description': "The links to the candidate's social media profiles", 'items': {'type': 'string'}, 'type': 'array'}, 'experience': {'description': "The candidate's experience", 'items': {'additionalProperties': False, 'properties': {'company': {'description': 'The name of the company', 'type': 'string'}, 'title': {'description': 'The title of the candidate', 'type': 'string'}, 'description': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'description': "The description of the candidate's experience"}, 'start_date': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'description': "The start date of the candidate's experience"}, 'end_date': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'description': "The end date of the candidate's experience"}}, 'required': ['company', 'ti

Uploading files: 100%|██████████| 1/1 [00:02<00:00,  2.68s/it]
Creating extraction jobs: 100%|██████████| 1/1 [00:00<00:00,  2.43it/s]
Extracting files: 100%|██████████| 1/1 [00:12<00:00, 12.12s/it]


In [13]:
my_resume.data

{'name': 'Abderraouf Ayadi',
 'email': 'ayadi_raouf@outlook.com',
 'links': [],
 'experience': [{'company': 'AIESEC Tunesien',
   'title': 'Summercamp-Programm Teilnehmer',
   'description': None,
   'start_date': '2013',
   'end_date': '2014'},
  {'company': 'Elite Gymnasium Sousse',
   'title': 'Vorpraktikum',
   'description': None,
   'start_date': '2015',
   'end_date': '2015'},
  {'company': 'Bund der tunesischen Akademiker in Hannover e.V.',
   'title': 'People and Culture Manager',
   'description': None,
   'start_date': '2016',
   'end_date': '2018'},
  {'company': 'Leibniz Universität Hannover',
   'title': 'Wissenschaftliche Hilfskraft',
   'description': None,
   'start_date': '2021',
   'end_date': '2024'}],
 'education': [{'institution': 'Elite Gymnasium Sousse',
   'degree': '/OEET',
   'start_date': '2011',
   'end_date': '2015'},
  {'institution': 'Niedersächsisches Studienkolleg Hannover',
   'degree': 'Sprachen',
   'start_date': '2015',
   'end_date': '2016'},
  {'

#### 2. Complex CV (3 pages)

In [15]:
from llama_cloud.core.api_error import ApiError

try:
    existing_agent = llama_extract.get_agent(name="resume-screening")
    if existing_agent:
        print("============== Agent exists already ==============")
        print(existing_agent.data_schema)
        print(existing_agent.config)

        new_cv_path = "/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/data/resumes/Lebenslauf_complex.pdf"
        my_resume = existing_agent.extract(new_cv_path)

except ApiError as e:
    if e.status_code == 404:
        pass
    else:
        raise

{'additionalProperties': False, 'properties': {'name': {'description': 'The name of the candidate', 'type': 'string'}, 'email': {'description': 'The email address of the candidate', 'type': 'string'}, 'links': {'description': "The links to the candidate's social media profiles", 'items': {'type': 'string'}, 'type': 'array'}, 'experience': {'description': "The candidate's experience", 'items': {'additionalProperties': False, 'properties': {'company': {'description': 'The name of the company', 'type': 'string'}, 'title': {'description': 'The title of the candidate', 'type': 'string'}, 'description': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'description': "The description of the candidate's experience"}, 'start_date': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'description': "The start date of the candidate's experience"}, 'end_date': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'description': "The end date of the candidate's experience"}}, 'required': ['company', 'ti

Uploading files: 100%|██████████| 1/1 [00:02<00:00,  2.91s/it]
Creating extraction jobs: 100%|██████████| 1/1 [00:00<00:00,  2.41it/s]
Extracting files: 100%|██████████| 1/1 [00:42<00:00, 42.51s/it]


In [16]:
my_resume.data

{'name': 'Abderraouf Ayadi',
 'email': 'ayadi_raouf@outlook.com',
 'links': ['LinkedIn Profile'],
 'experience': [{'company': 'Leibniz Universität Hannover | Institut für Produktentwicklung und Gerätebau (iPeG)',
   'title': 'Wissenschaftliche Hilfskraft',
   'description': 'Aufbau eines RAG-basierten Systems zum effizienten Durchsuchen von Nachschlagewerken zur mechanischen Konstruktionstechnik. Testen und Bewerten vortrainierter LLM-Modelle zur Generierung parametrischer 3D-CAD-Modelle.',
   'start_date': '11.2025',
   'end_date': '12.2025'},
  {'company': 'Leibniz Universität Hannover | Institut für Montagetechnik und Industrierobotik (Match)',
   'title': 'Wissenschaftliche Hilfskraft',
   'description': 'Entwicklung eines ROS-basierten Simulations- und Steuerungsframeworks für Multikopter, mit Integration von PX4 und MAVROS. Integration, Test und Benchmarking moderner SLAM-Algorithmen zur Indoor-Kartierung industrieller Umgebungen.',
   'start_date': '07.2025',
   'end_date': '09.