## Libraries + API Keys + Parameters

In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [2]:
from crewai_tools import DirectoryReadTool, PDFSearchTool, FileWriterTool
from crewai import Agent, Crew, Process, Task
from crewai.tasks.task_output import TaskOutput

from langchain_openai import ChatOpenAI
from IPython.display import Markdown
import pandas as pd

  warn(


In [3]:
inputs = {
    'folder': 'invoices',
    'requirements': 'all invoices from Simonis and Braun',
    'columns': ['Date', "Description", 'Total Amount', 'Issuer', "Receiver"],
    'question' : 'what is the total amount invoiced and all the names of personnel involved in the latest invoice file'
}

## Assistant Agent - retrieve necessary documents

In [7]:
assistant_agent = Agent(
    role="Invoice Retrieval Assistant",
    goal=f"""
        Your task is to search the files within the folder **{inputs['folder']}** and locate the correct document based on the following criteria:
        **{inputs['requirements']}**
        Once identified, extract and retrieve the relevant data from the document.
    """,
    backstory="""
        You are an efficient and detail-oriented assistant specializing in organizing and retrieving relevant data from documents.
    """,
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8),  # Using GPT-4 model
    tools=[DirectoryReadTool(), PDFSearchTool()]
)

/Users/mdashikadnan/Documents/adnanedu/python/ztm/crewai/crewai_venv/lib/python3.11/site-packages/chromadb/types.py:144: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  return self.model_fields  # pydantic 2.x


In [8]:
assistant_task = Task(
    description=f"""
        Search for the correct file within **{inputs['folder']}** that matches the following criteria:
        **{inputs['requirements']}**
        Once found, extract all the data from the relevant file.
    """,
    expected_output="""
        The extracted data from the relevant file, formatted for clarity and completeness.
    """,
    agent=assistant_agent
)


## Organizer Agent - Format into CSV

In [9]:
organizer_agent = Agent(
    role="Data Organization Assistant",
    goal=f"""
        Process the provided data and format it as a CSV file with the specified columns:
        **{inputs['columns']}**
        Save the results strictly in CSV format without any additional text or formatting.
    """,
    backstory="""
        You are an efficient and detail-oriented assistant specializing in structuring and organizing data into CSV format.
    """,
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8),  # Using GPT-4 model
)


In [10]:
organizer_task = Task(
    description=f"""
        Format the provided data into a CSV file with the specified columns:
        **{inputs['columns']}**
        Ensure that all numeric values use a decimal point (.) instead of a comma (,).
        Date should be formatted as DD-MM-YYYY.
        Save the results strictly in CSV format without any additional text.
    """,
    expected_output="""
        A properly formatted CSV file containing the correct values in the specified columns.
    """,
    agent=organizer_agent,
)


## Analyst Agent - Answer query

In [11]:
analyst_agent = Agent(
    role="Data Analyst Assistant",
    goal=f"""
        Analyze the provided data and accurately answer the following question:
        **{inputs['question']}**
        Base your response strictly on data and factual analysis.
    """,
    backstory="""
        You are a detail-oriented data analyst, skilled at extracting insights and answering questions based on facts and data.
    """,
    llm=ChatOpenAI(model_name="gpt-4o", temperature=0.8),  # Using GPT-4 model
)


In [12]:
analyst_task = Task(
    description=f"""
        Analyze the provided data and answer the following question:
        **{inputs['question']}**
        Ensure your response is based strictly on data-driven insights.
    """,
    expected_output="""
        A correct, data-backed answer along with the name of the file used for analysis.
    """,
    agent=analyst_agent,
)


## Assemble crew

In [13]:
crew = Crew(
    agents=[assistant_agent, organizer_agent, analyst_agent],  # List of agents involved
    tasks=[assistant_task, organizer_task, analyst_task],  # List of tasks to execute
    verbose=True,
    process=Process.sequential  # Ensuring tasks are processed in order
)
result = crew.kickoff()  # Start the task execution

[1m[95m# Agent:[00m [1m[92mInvoice Retrieval Assistant[00m
[95m## Task:[00m [92m
        Search for the correct file within **invoices** that matches the following criteria:
        **all invoices from Simonis and Braun**
        Once found, extract all the data from the relevant file.
    [00m




[1m[95m# Agent:[00m [1m[92mInvoice Retrieval Assistant[00m
[95m## Thought:[00m [92mThought: To find all invoices from Simonis and Braun, I need to first list all files in the "invoices" directory to identify potential documents to search.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \"invoices\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-invoices/Braun_2022-10.pdf
- invoices/.DS_Store
- invoices/Hammes_and_sons_2024-12.pdf
- invoices/Gulgowski_and_sons_2025-02.pdf
- invoices/Torphy_2025-02.pdf
- invoices/simonis_2025-03.pdf[00m


Inserting batches in chromadb:   0%|          | 0/1 [00:00<?, ?it/s]/Users/mdashikadnan/Documents/adnanedu/python/ztm/crewai/crewai_venv/lib/python3.11/site-packages/chromadb/types.py:144: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  return self.model_fields  # pydantic 2.x
Inserting batches in chromadb: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]




[1m[95m# Agent:[00m [1m[92mInvoice Retrieval Assistant[00m
[95m## Thought:[00m [92mThought: I have identified two potential files that could be invoices from Simonis and Braun: "Braun_2022-10.pdf" and "simonis_2025-03.pdf". I will search these PDFs for invoice details.[00m
[95m## Using tool:[00m [92mSearch a PDF's content[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"invoice\", \"pdf\": \"invoices/Braun_2022-10.pdf\"}"[00m
[95m## Tool Output:[00m [92m
Relevant Content:


Inserting batches in chromadb:   0%|          | 0/1 [00:00<?, ?it/s]/Users/mdashikadnan/Documents/adnanedu/python/ztm/crewai/crewai_venv/lib/python3.11/site-packages/chromadb/types.py:144: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  return self.model_fields  # pydantic 2.x
Inserting batches in chromadb: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]




[1m[95m# Agent:[00m [1m[92mInvoice Retrieval Assistant[00m
[95m## Thought:[00m [92mThought: I have successfully extracted data from the Braun invoice. Now I need to do the same for the Simonis invoice in the file "simonis_2025-03.pdf".[00m
[95m## Using tool:[00m [92mSearch a PDF's content[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"invoice\", \"pdf\": \"invoices/simonis_2025-03.pdf\"}"[00m
[95m## Tool Output:[00m [92m
Relevant Content:


You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:

Tool Name: List files in directory
Tool Arguments: {'directory': {'description': 'Mandatory directory to list content', 'type': 'str'}}
Tool Description: A tool that can be used to recursively list a directory's content.
Tool Name: Search a PDF's content
Tool Arguments: {'query': {'description': "Mandatory query you want to use to search the PDF's content", 'type': 'str'}, 'pdf': {'description': 'Mandatory pdf path you want t



[1m[95m# Agent:[00m [1m[92mInvoice Retrieval Assistant[00m
[95m## Final Answer:[00m [92m
Braun Invoice:
```
Date: 2022-10-04
Invoice No.: 31500
Customer: Mr. Cormier
Address: Stehr-Heaney, Donato Cormier, Leopold Forks, 711 24085 McCluremouth, Saint Pierre and Miquelon
Items:
1. Amount: 73 Pcs, Description: Quis ut excepturi quia odit., Price: 474,92 €, Total: 34.669,16 €
2. Amount: 79 Pcs, Description: Et qui voluptatem libero., Price: 554,69 €, Total: 43.820,51 €
3. Amount: 92 Pcs, Description: Accusamus ea autem., Price: 830,53 €, Total: 76.408,76 €
4. Amount: 67 Pcs, Description: Consectetur ratione explicabo., Price: 196,93 €, Total: 13.194,31 €
Price net: 168.092,74 €
Plus 10% VAT: 16.809,27 €
Invoice total: 184.902,01 €
Payment details: Payment within 90 days
CEO: Liliana Jast
Bank details: NL07LBKP8434276625 KPKMGASKW1P
```

Simonis Invoice:
```
Date: 05.02.2025
Invoice No.: 84951
Customer: Norwood Spinka
Address: Aurelia Wall 784, 42644-0051 South Maudieland
Company

[1m[95m# Agent:[00m [1m[92mData Organization Assistant[00m
[95m## Task:[00m [92m
        Format the provided data into a CSV file with the specified columns:
        **['Date', 'Description', 'Total Amount', 'Issuer', 'Receiver']**
        Ensure that all numeric values use a decimal point (.) instead of a comma (,).
        Date should be formatted as DD-MM-YYYY.
        Save the results strictly in CSV format without any additional text.
    [00m


[1m[95m# Agent:[00m [1m[92mData Organization Assistant[00m
[95m## Final Answer:[00m [92m
```
Date,Description,Total Amount,Issuer,Receiver
04-10-2022,Quis ut excepturi quia odit.,34669.16,Braun,Mr. Cormier
04-10-2022,Et qui voluptatem libero.,43820.51,Braun,Mr. Cormier
04-10-2022,Accusamus ea autem.,76408.76,Braun,Mr. Cormier
04-10-2022,Consectetur ratione explicabo.,13194.31,Braun,Mr. Cormier
05-02-2025,Deleniti consequatur hic omnis.,54209.20,Simonis LLC,Norwood Spinka
05-02-2025,Quis qui cum eligendi.,13338.94,Simonis

[1m[95m# Agent:[00m [1m[92mData Analyst Assistant[00m
[95m## Task:[00m [92m
        Analyze the provided data and answer the following question:
        **what is the total amount invoiced and all the names of personnel involved in the latest invoice file**
        Ensure your response is based strictly on data-driven insights.
    [00m


[1m[95m# Agent:[00m [1m[92mData Analyst Assistant[00m
[95m## Final Answer:[00m [92m
The total amount invoiced in the latest invoice file, which is the Simonis Invoice dated 05.02.2025, is 188.402,52 €. The personnel involved are Garry Kilback (CEO of Simonis LLC) and Norwood Spinka (Customer).[00m




In [14]:
# raw data from the invoices
Markdown(result.tasks_output[0].raw)

Braun Invoice:
```
Date: 2022-10-04
Invoice No.: 31500
Customer: Mr. Cormier
Address: Stehr-Heaney, Donato Cormier, Leopold Forks, 711 24085 McCluremouth, Saint Pierre and Miquelon
Items:
1. Amount: 73 Pcs, Description: Quis ut excepturi quia odit., Price: 474,92 €, Total: 34.669,16 €
2. Amount: 79 Pcs, Description: Et qui voluptatem libero., Price: 554,69 €, Total: 43.820,51 €
3. Amount: 92 Pcs, Description: Accusamus ea autem., Price: 830,53 €, Total: 76.408,76 €
4. Amount: 67 Pcs, Description: Consectetur ratione explicabo., Price: 196,93 €, Total: 13.194,31 €
Price net: 168.092,74 €
Plus 10% VAT: 16.809,27 €
Invoice total: 184.902,01 €
Payment details: Payment within 90 days
CEO: Liliana Jast
Bank details: NL07LBKP8434276625 KPKMGASKW1P
```

Simonis Invoice:
```
Date: 05.02.2025
Invoice No.: 84951
Customer: Norwood Spinka
Address: Aurelia Wall 784, 42644-0051 South Maudieland
Company: Simonis LLC, Barney Village 891, 24163-6226 Kaseytown
Items:
1. Description: Deleniti consequatur hic omnis., Price: 918,80 €, Amount: 59, Total: 54.209,20 €
2. Description: Quis qui cum eligendi., Price: 162,67 €, Amount: 82, Total: 13.338,94 €
3. Description: Aut error voluptatem voluptatem., Price: 976,13 €, Amount: 82, Total: 80.042,66 €
4. Description: Assumenda nemo., Price: 171,00 €, Amount: 29, Total: 4.959,00 €
5. Description: Qui ab., Price: 309,57 €, Amount: 76, Total: 23.527,32 €
Price net: 176.077,12 €
Plus 7% VAT: 12.325,40 €
Invoice total: 188.402,52 €
Payment details: Payment within 30 days
CEO: Garry Kilback
Bank details: RS97378587727026913595 AADDWABV
```

In [15]:
# table structured
print(result.tasks_output[1].raw)

```
Date,Description,Total Amount,Issuer,Receiver
04-10-2022,Quis ut excepturi quia odit.,34669.16,Braun,Mr. Cormier
04-10-2022,Et qui voluptatem libero.,43820.51,Braun,Mr. Cormier
04-10-2022,Accusamus ea autem.,76408.76,Braun,Mr. Cormier
04-10-2022,Consectetur ratione explicabo.,13194.31,Braun,Mr. Cormier
05-02-2025,Deleniti consequatur hic omnis.,54209.20,Simonis LLC,Norwood Spinka
05-02-2025,Quis qui cum eligendi.,13338.94,Simonis LLC,Norwood Spinka
05-02-2025,Aut error voluptatem voluptatem.,80042.66,Simonis LLC,Norwood Spinka
05-02-2025,Assumenda nemo.,4959.00,Simonis LLC,Norwood Spinka
05-02-2025,Qui ab.,23527.32,Simonis LLC,Norwood Spinka
```


In [16]:
# write to csv
from io import StringIO

df = pd.read_csv(StringIO(result.tasks_output[1].raw))
df.dropna(inplace=True)
df.to_csv('result.csv')

In [17]:
# answer to the question
Markdown(result.tasks_output[2].raw)

The total amount invoiced in the latest invoice file, which is the Simonis Invoice dated 05.02.2025, is 188.402,52 €. The personnel involved are Garry Kilback (CEO of Simonis LLC) and Norwood Spinka (Customer).