In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv

from crewai_tools import PDFSearchTool, SerperDevTool, ScrapeWebsiteTool
from crewai import Agent, Task, Crew, Process

In [2]:
load_dotenv('.env')
os.environ['OPENAI_MODEL_NAME'] = "gpt-4o-mini"
API_KEY = os.getenv("OPENAI_API_KEY")
SERPER_API_KEY = os.getenv("SERPER_API_KEY")
client = OpenAI(api_key=API_KEY)

In [3]:
# tool_pdf = PDFSearchTool(pdf='./Files/AR_2024.pdf')
tool_pdf = PDFSearchTool()
tool_search = SerperDevTool(n_results=5, country="sg")
tool_webscrape = ScrapeWebsiteTool()

In [4]:
def extract_file_paths(folder_path):
  file_paths = []
  for root, dirs, files in os.walk(folder_path):
    for file in files:
      file_path = os.path.join(root, file)
      file_paths.append(file_path)
  return file_paths

folder_path = "./data"
file_paths = extract_file_paths(folder_path)
print(file_paths)

['./data\\Education Endwoment and Savings Schemes Act 1992.pdf', './data\\PSE.pdf']


In [5]:
topic = input("Enter the audit topic: ")
if topic:
    print("You entered:", topic)
else:
    print("Please enter an audit task.")

You entered: post seconday education account


In [6]:
auditor = Agent(
    role = "auditor",
    goal= "Return the Audit Planning Memo on {topic} based on inputs from researcher and past audit findings from the audit assistant.",
    backstory = """You are an auditor. You need to consider the inputs from the researcher and audit assistant to determine what to write for the audit planning memo and risk assessment""",
    allow_delegation = True,
    max_iter = 5,
    verbose = True
)

researcher = Agent(
    role="researcher",
    goal="From the internet, research and analyse and sieve out information related to {topic} only." ,
    backstory="""As a researcher, navigating and extracting critical information is crucial. You are assisting the auditor to research on topics relevant to {topic}. 
    """,
    allow_delegation=False,
    verbose=True,
    max_iter = 15
)

audit_asssitant = Agent(
    role = "audit assistant",
    goal ="From PDF extract the audit findings pertaining to {topic} only.",
    backstory = "You are an audit assistant. Your job is to extract the past audit findings relating to the {topic}.",
    allow_delegation=False,
    verbose=True,
    max_iter = 15
)

task_search = Task(
    description="""Returns a JSON list of top 5 websites that contain the most relevant information, in the context of Singapore, that are related to regulations on {topic} only. Ignore urls that ends with .pdf.
    """,
    expected_output="""\
    Returns top 5 websites that contain most relavant regulations related information, in the context of Singapore, on {topic} only. The output must be in JSON format with the description as the key and url as value.""",

    agent=researcher,
    tools=[tool_search],
)


task_websitesearch = Task(
    description="""Scrape the website for all regulations related to {topic} only
    """,
    expected_output="""\
    Summarise the website for all regulations related to {topic}..""",

    agent=researcher,
    tools=[tool_webscrape],
    context = [task_search]
)

task_extract_findings = Task(
    description = """For each file path in {file_paths}, read the pdf and extract all audit findings pertaining to {topic}. Provide citation""",
    expected_output= """A list of audit findings pertainint to {topic}""",
    tools=[tool_pdf],
    agent = audit_asssitant
)


task_write= Task(
    description="""\
    1. Use the content to craft an audit planning memo and risk assessment based on {topic} .
    2. Sections are properly named in an engaging manner.
    3. Proofread for grammatical errors and alignment with the common style used in audit planning memos.""",

    expected_output="""
    A well-written audit planning memo and risk assessment in markdown format. Each section should have minimum of 2 and maximum of 5 paragraphs.""",
    agent=auditor,
    context=[task_websitesearch, task_extract_findings],
    output_file="APM.md"
)

In [7]:
crew = Crew(
    agents=[auditor,researcher,audit_asssitant],
    tasks=[task_search,task_websitesearch,task_extract_findings,task_write],
    verbose=True
)

In [8]:
result = crew.kickoff(inputs={"topic": topic, "file_paths": file_paths})

[1m[95m [2024-10-11 22:38:57][DEBUG]: == Working Agent: researcher[00m
[1m[95m [2024-10-11 22:38:57][INFO]: == Starting Task: Returns a JSON list of top 5 websites that contain the most relevant information, in the context of Singapore, that are related to regulations on post seconday education account only. Ignore urls that ends with .pdf.
    [00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mAction: Search the internet  
Action Input: {"search_query": "regulations on post secondary education account Singapore"}  [0m[95m 


Search results: Title: Post-Secondary Education Account (PSEA): Overview - MOE
Link: https://www.moe.gov.sg/financial-matters/psea/overview
Snippet: Aged 17 to 20 in 2023. Not eligible for the Edusave top-up. A PSEA will be automatically created for Singapore Citizens if they are eligible for ...
---
Title: Post-Secondary Education Account (PSEA) - MOE
Link: https://www.moe.gov.sg/financial-matters/psea
Snippet: The Post-Secondary Educat

In [9]:
print(f"Raw Output: {result.raw}")
print(f"Token Usage: {result.token_usage}")
print("-----------------------------------------\n\n")
print(f"Tasks Output of Task 1: {result.tasks_output[0]}")
print("-----------------------------------------\n\n")
print(f"Tasks Output of Task 2: {result.tasks_output[1]}")
print("-----------------------------------------\n\n")
print(f"Tasks Output of Task 3: {result.tasks_output[2]}")
print("-----------------------------------------\n\n")

Raw Output: ```markdown
# Audit Planning Memo for the Post-Secondary Education Account (PSEA)

## 1. Introduction
The purpose of this audit is to evaluate the financial management and compliance of the Post-Secondary Education Account (PSEA) with applicable regulations and standards. This audit aims to ensure that funds are allocated and utilized effectively to support post-secondary education initiatives while identifying areas for improvement in internal controls and compliance mechanisms.

## 2. Overview of PSEA Regulations
The Post-Secondary Education Account operates under specific regulations designed to govern the allocation and use of funds for post-secondary education. Key points include:
- **Eligibility Criteria**: Establishes who can access funds from the PSEA, including criteria related to residency, enrollment status, and program types.
- **Funding Allocation**: Details the processes for distributing funds to educational institutions and students, requiring adherence to sp