In [1]:
import os
from crewai import LLM

os.environ["OPENAI_MODEL_NAME"] = "gpt-4o"

In [2]:
from pydantic import BaseModel
from typing import List

class filePath(BaseModel):
    filePath: List[str]

class associateFilePath(BaseModel):
    mainFile: str
    relatedFiles: List[str]
    imageFiles: List[str]

In [3]:
from crewai import Agent
from crewai_tools import FileReadTool, DirectoryReadTool

class Agents:
    def markdownPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the markdown files inside {file_path} path",
            backstory="You are fluent in Korean, and you are very good at finding markdown files.",
            allow_delegation=False,
            verbose=True,
            llm='gpt-4o-mini',
            tools=[
                DirectoryReadTool(),
            ],
        )

    def imgPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the img files inside {img_path} path",
            backstory="You are fluent in Korean, and you are very good at finding image files.",
            allow_delegation=False,
            verbose=True,
            llm='gpt-4o-mini',
            tools=[
                DirectoryReadTool(),
            ],
        )

    def mainFilesearcher(self):
        return Agent(
            role="mainFilesearcher",
            goal="Print out ONLY one document that can answer {question}. To use the Tool, The parameter MUST be file_path = `filepath`.",
            backstory="You are fluent in Korean. You are a bookworm. Read and grasp everything in the document delicately",
            allow_delegation=False,
            verbose=True,
            llm='gpt-4o',
            tools=[
                FileReadTool(),
            ],
            max_iter=3,
            max_execution_time=1,
        )

    def fileSelector(self):
        return Agent(
            role="fileSelector",
            goal="Find out the path of all other files that correspond to the document and print them out.",
            backstory="You are a file search expert and fluent in Korean. You have a great ability to read and analyze the details of the file.",
            llm='gpt-4o-mini',
            allow_delegation=False,
            verbose=True,
        )

In [4]:
from crewai import Task


class Tasks:
    def markdownPathSearch(self, agent):
        return Task(
            description="Finds ALL the markdown files and inside {file_path} path",
            expected_output="Your final answer MUST be markdown file path. The file path symbol must be '/'",
            agent=agent,
            output_json=filePath,
            output_file="MarkdownPath.md",
        )

    def imgPathSearch(self, agent):
        return Task(
            description="Finds ALL the image files and inside {img_path} path. but NOT Include svg Image.",
            expected_output="Your final answer MUST be image path. svg images should NEVER be included. The file path symbol must be '/'",
            agent=agent,
            output_json=filePath,
            output_file="ImgPath.md",
        )

    def mainFileSearch(self, agent, context):
        return Task(
            description="""
            Based on the markdownPathSearch, markdownPathSearch is in json format. Document Paths are stored in 'filePath' key value and is in List. Search Only one file path that can solve {question}.
            The path of the image NEVER ends with \\ and /.
            If the end of the path is \\ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.
            
            Read the entire contents of the file based on the file path and print it out.
            DON'T do this more than once
            """,
            expected_output="""
            Print out the entire contents of the file NEVER MODIFY.
            """,
            agent=agent,
            context=context,
            output_file="mainFileSearch.md",
        )

    def fileSelect(self, agent, context):
        return Task(
            description="""
            Based on the mainFileSearch, 
            There are other documents linked by the symbol '[[...]]' and '![[...]]' in that file NOT '[...]
            '[[...]]' symbol means a markdown file and '![[...]]' means an image file.
            
            Find all of the '[[...]]' and '![[...]]' and print out the ONLY file path associated with the word in it in markdownPathSearch or imgPathSearch. 
            All file paths should EXIST in that markdownPathSearch Output or imgPathSearch Output. 
            DON'T make it up and look for it.
            If the relevant document/image does not exist, JUST Return EMPTY List.",
            """,
            expected_output="""
            Your final answer MUST include the path of the first file and the path of other files within that file.
            It doesn't include ANYTHING other than file paths. 

            minaFile and relatedFiles Include ONLY markdown File!

            Example Answer 1
            {
                "mainFile": "./Algorithm/Algorithm Content/Tree/MST(Minimum Spanning Tree).md",
                "relatedFiles": [
                    "./Algorithm/Algorithm Content/Graph Theory/DFS(Depth-First Search).md",
                    "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                    "./Algorithm/Algorithm Content/Tree/Union Find.md",
                ],
                "imageFiles": ["./Algorithm/Reference/Tree Reference/MST Ref/MST Graph.png",]
            }

            Example Answer 2
            {
                "mainFile": "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                "relatedFiles": [],
                "imageFiles": [
                    "./Algorithm/Reference/Graph Theory Reference/BASE TREE.png",
                    "./Algorithm/Reference/Graph Theory Reference/BFS Ref/BFS Queue.png",
                ]
            }

            Example Answer 3
            {
                "mainFile": "./c/k.md",
                "relatedFiles": [
                    "./c/g.md",
                    "./c/c.md",
                    "./c/d.md",
                ],
                "imageFiles": []
            }             
            """,
            agent=agent,
            context=context,
            output_json=associateFilePath,
            output_file="associateFilePath.md",
        )

In [5]:
from crewai import Crew

agent = Agents()
tasks = Tasks()

In [6]:
markdownPathSearcher = agent.markdownPathSearcher()

markdownPathSearcher_task = tasks.markdownPathSearch(markdownPathSearcher)

filePathCrew = Crew(
    agents=[markdownPathSearcher],
    tasks=[markdownPathSearcher_task],
    verbose=True,
)

filePathResult = filePathCrew.kickoff(
    dict(
        file_path=".\Algorithm\Algorithm Content",
    )
)

  file_path=".\Algorithm\Algorithm Content",


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Task:[00m [92mFinds ALL the markdown files and inside .\Algorithm\Algorithm Content path[00m


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Thought:[00m [92mI need to list all the files in the directory .\Algorithm\Algorithm Content to find the markdown files.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \".\\\\Algorithm\\\\Algorithm Content\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-.\Algorithm\Algorithm Content/Array\Binary Search.md
- .\Algorithm\Algorithm Content/Array\MITM(Meet in the middle).md
- .\Algorithm\Algorithm Content/Array\PBS(Parallel Binary Search).md
- .\Algorithm\Algorithm Content/Graph Theory\Articulation Points And Bridges.md
- .\Algorithm\Algorithm Content/Graph Theory\BFS(Breadth-First Search).md
- .\Algorithm\Algorithm Content/Graph Theory\CCW(Counter Clock Wise).md
- .\Algorithm\Algorithm Content/Graph Theory

In [7]:
imgPathSearcher = agent.imgPathSearcher()

imgPathSearcher_task = tasks.imgPathSearch(imgPathSearcher)

imgPathCrew = Crew(
    agents=[imgPathSearcher],
    tasks=[imgPathSearcher_task],
    verbose=True,
)

imgPathResult = imgPathCrew.kickoff(
    dict(
        img_path=".\Algorithm\Reference",
    )
)

  img_path=".\Algorithm\Reference",


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Task:[00m [92mFinds ALL the image files and inside .\Algorithm\Reference path. but NOT Include svg Image.[00m


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Thought:[00m [92mI need to find all image files in the .\Algorithm\Reference directory, excluding SVG images. I'll start by listing the files in the specified directory to gather the necessary information.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \".\\\\Algorithm\\\\Reference\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-.\Algorithm\Reference/BAEKJOON.png
- .\Algorithm\Reference/Array Reference\Binary Search Ref\Binary Search Flowchart.md
- .\Algorithm\Reference/Array Reference\Binary Search Ref\Binary Search Flowchart.png
- .\Algorithm\Reference/Array Reference\Binary Search Ref\Binary Search Flowchart.svg
- .\Algorithm\Reference/Array Reference\MITM Recursion Ref\MITM Recursio

In [8]:
mainFileSearcher = agent.mainFilesearcher()
fileSelector = agent.fileSelector()

mainFileSearcher_task = tasks.mainFileSearch(
    mainFileSearcher, [markdownPathSearcher_task]
)
fileSelector_task = tasks.fileSelect(
    fileSelector,
    [mainFileSearcher_task, markdownPathSearcher_task, imgPathSearcher_task],
)

fileSelectorCrew = Crew(
    agents=[
        mainFileSearcher,
        fileSelector,
    ],
    tasks=[
        mainFileSearcher_task,
        fileSelector_task,
    ],
    verbose=True,
)

fileSelectorResult = fileSelectorCrew.kickoff(
    dict(
        question="Finweck Tree가 뭐야?",
    )
)



[1m[95m# Agent:[00m [1m[92mmainFilesearcher[00m
[95m## Task:[00m [92m
            Based on the markdownPathSearch, markdownPathSearch is in json format. Document Paths are stored in 'filePath' key value and is in List. Search Only one file path that can solve Finweck Tree가 뭐야?.
            The path of the image NEVER ends with \ and /.
            If the end of the path is \ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.
            
            Read the entire contents of the file based on the file path and print it out.
            DON'T do this more than once
            [00m


[1m[95m# Agent:[00m [1m[92mmainFilesearcher[00m
[95m## Thought:[00m [92mGiven the task of finding a document that can answer "Finweck Tree가 뭐야?" (which translates to "What is Finwick Tree?"), I need to search for the document related to the Fenwick Tree in the provided file paths.
The file path ".\Algorithm\Algorithm Content/Tree/Fenwick Tree.