In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from langsmith import Client

load_dotenv(find_dotenv())

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = f"Underwriting project"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
client = Client()


In [2]:
payment_date_prompt = """You are provided with the text extracted from financial pay stubs. Provide the start and end date of the pay stub in the following format DD/MM/YYYY as:\n
(Start date) ; (End date)\n
\n
For example, for a pay stub that starts on March 1st, 2024 and ends on March 15th, 2024 the output should be:\n
01/03/2024 ; 15/03/2024\n
\n
Directions:\n
- The dates might not be clearly displayed, look for a word date and a number after it which may represent the date\n
- If you cannot find both dates, go through and try to find them again\n
- If you could only provide 1 date, provide that date. If you cannot find any dates, say that you could not find the dates.\n
- Make sure you provide the start and end dates of the pay stub and not the payment period when the salary was paid, look for terminology resembling 'start' and 'end'\n
\n
Pay stub text:\n
{text}\n
\n
Your response:\n
"""

with open(r"N:\Dev\AI\Underwriting\data\clients\Alex and Patricia\Alex\Alex 2023 Year End Paystub extracted_tables\Alex 2023 Year End Paystub extracted ocr.txt",'r') as file:
    document_text = file.read()

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4", temperature=0)

In [4]:
from crewai import Crew, Agent, Task, Process
from langchain.tools import tool

@tool("search text for dates")
def text_dates_search(prompt):
    """Provided a text from a document, retruns the dates as described in the system prompt"""
    print(f"in the text_dates_search\n{prompt}")
    p = prompt + document_text
    return str(llm.invoke(p).content)

In [5]:
date_extactor = Agent(
    role="Payslip date extractor",
    goal="Use the provided tools to search the document as many times as required to extract the dates",
    backstory="The document is a payslip of an employee. The pay period start and end date should show up in the text. You must extrapolate this information and adhear to the system prompt to provide the desired output. You must run the tools as many times as required to get the most accurate response",
    verbose=True,
    allow_delegation=False,
    llm=llm,
    max_iter=5,
    memory=True,
    tools=[text_dates_search]
)

In [6]:
extract_dates = Task(
    description='''You are provided with the text extracted from financial pay stubs. 
Directions:\n
- The dates might not be clearly displayed, look for a word date and a number after it which may represent the date\n
- If you cannot find both dates, go through and try to find them again\n
- If you could only provide 1 date, provide that date. If you cannot find any dates, say that you could not find the dates.\n
- Make sure you provide the start and end dates of the pay stub and not the payment period when the salary was paid, look for terminology resembling 'start' and 'end'\n
''',
    expected_output='''Provide the start and end date of the pay stub in the following format DD/MM/YYYY as:\n
(Start date) ; (End date)\n
\n
''',
    agent=date_extactor,
)

In [7]:
crew = Crew(
    agents=[date_extactor],
    tasks=[extract_dates],
    verbose=2,
    full_output=True,
)

In [8]:
results = crew.kickoff()

[1m[93m [DEBUG]: == Working Agent: Payslip date extractor[00m
[1m[93m [INFO]: == Starting Task: You are provided with the text extracted from financial pay stubs. 
Directions:

- The dates might not be clearly displayed, look for a word date and a number after it which may represent the date

- If you cannot find both dates, go through and try to find them again

- If you could only provide 1 date, provide that date. If you cannot find any dates, say that you could not find the dates.

- Make sure you provide the start and end dates of the pay stub and not the payment period when the salary was paid, look for terminology resembling 'start' and 'end'

[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI need to find the start and end dates of the pay period from the payslip text. I will use the 'search text for dates' tool to find these dates.

Action: search text for dates
Action Input: {"prompt": "Please provide the start and end dates of the pay period from t