In [1]:
import os

os.environ["OPENAI_MODEL_NAME"] = "gpt-4o-mini"

In [2]:
from pydantic import BaseModel
from typing import List

class filePath(BaseModel):
    filePath: List[str]

class associateFilePath(BaseModel):
    mainFile: str
    relatedFiles: List[str]
    imageFiles: List[str]

In [3]:
from crewai import Agent
from crewai_tools import FileReadTool, DirectoryReadTool, VisionTool


class Agents:
    def markdownPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the markdown files inside {file_path} path",
            backstory="You are fluent in Korean, and you are very good at finding markdown files.",
            allow_delegation=False,
            verbose=True,
            tools=[
                DirectoryReadTool(),
            ],
        )

    def imgPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the img files inside {img_path} path",
            backstory="You are fluent in Korean, and you are very good at finding image files.",
            allow_delegation=False,
            verbose=True,
            tools=[
                DirectoryReadTool(),
            ],
        )

    def mainFilesearcher(self):
        return Agent(
            role="mainFilesearcher",
            goal="Print out ONLY one document that can answer {question}. To use the Tool, The parameter MUST be file_path = `filepath`.",
            backstory="You are fluent in Korean. You are a bookworm. Read and grasp everything in the document delicately",
            allow_delegation=False,
            verbose=True,
            tools=[
                FileReadTool(),
            ],
            max_iter=3,
            max_execution_time=1,
        )

    def fileSelector(self):
        return Agent(
            role="fileSelector",
            goal="Find out the path of all other files that correspond to the document and print them out.",
            backstory="You are a file search expert and fluent in Korean. You have a great ability to read and analyze the details of the file.",
            allow_delegation=False,
            verbose=True,
        )

    def imgExtracter(self):
        return Agent(
            role="imgExtracter",
            goal="Extract the image files. and Make text file containing the contents of the image. For images performed once, DON'T perform any more. Work only once per image path in imageFiles.",
            backstory="You are fluent in Korean, and You have a good ability to read images and convert them into text.",
            allow_delegation=False,
            verbose=True,
            tools=[
                VisionTool(),
            ],
        )

    def textOrganizer(self):
        return Agent(
            role="textOrganizer",
            goal="Organize documents. Collect the core contents of each document without modification and make one document as detailed as possible. To use the Tool, The parameter MUST be file_path. NOT filepath.",
            backstory="You are fluent in Korean, and You have a great ability to put together several documents.",
            allow_delegation=False,
            verbose=True,
            tools=[
                FileReadTool(),
            ],
        )

    def researcher(self):
        return Agent(
            role="Researcher",
            goal="Finds and summarizes the markdown files that can solve the {question}",
            backstory="You are fluent in Korean, and you are very good at finding and summarizing files. Summarized information is essential for problem solving",
            verbose=True,
            tools=[
                FileReadTool(),
            ],
            max_iter=10,
        )

In [4]:
from crewai import Task


class Tasks:
    def markdownPathSearch(self, agent):
        return Task(
            description="Finds ALL the markdown files and inside {file_path} path",
            expected_output="Your final answer MUST be markdown file path. The file path symbol must be '/'",
            agent=agent,
            output_json=filePath,
            output_file="MarkdownPath.md",
        )

    def imgPathSearch(self, agent):
        return Task(
            description="Finds ALL the image files and inside {img_path} path. but NOT Include svg Image.",
            expected_output="Your final answer MUST be image path. svg images should NEVER be included. The file path symbol must be '/'",
            agent=agent,
            output_json=filePath,
            output_file="ImgPath.md",
        )

    def mainFileSearch(self, agent, context):
        return Task(
            description="""
            Based on the markdownPathSearch, markdownPathSearch is in json format. Document Paths are stored in 'filePath' key value and is in List. Search Only one file path that can solve {question}.
            The path of the image NEVER ends with \\ and /.
            If the end of the path is \\ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.
            
            Read the entire contents of the file based on the file path and print it out.
            DON'T do this more than once
            """,
            expected_output="""
            Print out the entire contents of the file NEVER MODIFY.
            """,
            agent=agent,
            context=context,
            output_file="mainFileSearch.md",
        )

    def fileSelect(self, agent, context):
        return Task(
            description="""
            Based on the mainFileSearch, 
            There are other documents linked by the symbol '[[...]]' and '![[...]]' in that file NOT '[...]
            '[[...]]' symbol means a markdown file and '![[...]]' means an image file.
            
            Find all of the '[[...]]' and '![[...]]' and print out the ONLY file path associated with the word in it in markdownPathSearch or imgPathSearch. 
            All file paths should EXIST in that markdownPathSearch Output or imgPathSearch Output. 
            DON'T make it up and look for it.
            If the relevant document/image does not exist, JUST Return EMPTY List.",
            """,
            expected_output="""
            Your final answer MUST include the path of the first file and the path of other files within that file.
            It doesn't include ANYTHING other than file paths. 

            minaFile and relatedFiles Include ONLY markdown File!

            Example Answer 1
            {
                "mainFile": "./Algorithm/Algorithm Content/Tree/MST(Minimum Spanning Tree).md",
                "relatedFiles": [
                    "./Algorithm/Algorithm Content/Graph Theory/DFS(Depth-First Search).md",
                    "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                    "./Algorithm/Algorithm Content/Tree/Union Find.md",
                ],
                "imageFiles": ["./Algorithm/Reference/Tree Reference/MST Ref/MST Graph.png",]
            }

            Example Answer 2
            {
                "mainFile": "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                "relatedFiles": [],
                "imageFiles": [
                    "./Algorithm/Reference/Graph Theory Reference/BASE TREE.png",
                    "./Algorithm/Reference/Graph Theory Reference/BFS Ref/BFS Queue.png",
                ]
            }

            Example Answer 3
            {
                "mainFile": "./c/k.md",
                "relatedFiles": [
                    "./c/g.md",
                    "./c/c.md",
                    "./c/d.md",
                ],
                "imageFiles": []
            }             
            """,
            agent=agent,
            context=context,
            output_json=associateFilePath,
            output_file="associateFilePath.md",
        )

    def imgExtract(self, agent, context):
        return Task(
            description="""
            Based on the fileSelect, fileSelect is in json format. Image Paths are stored in 'imageFiles' key value and is in List. Read all the img files and Extract them in text.
            The path of the image NEVER ends with \\ and /.
            If the end of the path is \\ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.

            """,
            expected_output="""
            Your final answer MUST be contents of the image in Korean. Don't summarize the contents of the image, JUST print it out as it is.
            """,
            agent=agent,
            context=context,
            output_file="ImgExtractContent.md",
        )

    def textOrganize(self, agent, context):
        return Task(
            description="""
            Based on the fileSelect, fileSelect is in json format. Document Paths are stored in 'mainFile' and `relatedFiles' key value and is in List. Read all the Document files and Organize them in text.
            The path of the image NEVER ends with \\ and /.
            If the end of the path is \\ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.

            `mainFile`'s file is the key document, and the documents in related Files are supplements of the key document.
            """,
            expected_output="""
            Your final answer MUST be contents of the document in Korean. Don't summarize the content, The document should be prepared by analyzing the contents in as much DETAIL as possible.
            
            The contents of the main file should include the contents of file name, concept, principle, and example.
            Each contents of Related files must contain the contents of file name, concept, and content associated with the main file.

            Example Answer
            Main file
            - file name
            - concept
            - principle
            - example
            Related files
            file 1
            - file name
            - concept
            - content associated with the main file
            file 2
            - file name
            - concept
            - content associated with the main file            
            file 3
            - file name
            - concept
            - content associated with the main file
            ....
            """,
            agent=agent,
            context=context,
            output_file="textOrganizeContent.md",
        )

    def research(self, agent, context):
        return Task(
            description="Based on the pathSearch, Gather and analyze the most relevant markdown files for troubleshooting {question}. Provide a summary of markdown files and what is an important point in files. If there is a symbol '[[file_name]]' in the markdown file, you should also look into the markdown file with the file_name.",
            expected_output="Your final answer MUST be a detailed summary of the markdown files in Korean. Include Path to all imported files, why the file was imported, what is an important point and If there is an example for {question}, include it. If you can't find the file, JUST Answer that you don't know",
            agent=agent,
            context=context,
            output_file="questionAnswer.md",
        )

In [5]:
from crewai import Crew

agent = Agents()
tasks = Tasks()


markdownPathSearcher = agent.markdownPathSearcher()
imgPathSearcher = agent.imgPathSearcher()
mainFileSearcher = agent.mainFilesearcher()
fileSelector = agent.fileSelector()
imgExtracter = agent.imgExtracter()
textOrganizer = agent.textOrganizer()

markdownPathSearcher_task = tasks.markdownPathSearch(markdownPathSearcher)
imgPathSearcher_task = tasks.imgPathSearch(imgPathSearcher)
mainFileSearcher_task = tasks.mainFileSearch(mainFileSearcher, [markdownPathSearcher_task])
fileSelector_task = tasks.fileSelect(
    fileSelector,
    [mainFileSearcher_task, markdownPathSearcher_task, imgPathSearcher_task],
)
imgExtracter_task = tasks.imgExtract(imgExtracter, [fileSelector_task])
textOrganizer_task = tasks.textOrganize(textOrganizer, [fileSelector_task])

crew = Crew(
    agents=[
        markdownPathSearcher,
        imgPathSearcher,
        mainFileSearcher,
        fileSelector,
        textOrganizer,
        imgExtracter,
    ],
    tasks=[
        markdownPathSearcher_task,
        imgPathSearcher_task,
        mainFileSearcher_task,
        fileSelector_task,
        textOrganizer_task,
        imgExtracter_task,
    ],
    verbose=True,
)

result = crew.kickoff(
    dict(
        file_path=".\Algorithm\Algorithm Content",
        img_path=".\Algorithm\Reference",
        question="HLD가 뭐야?",
    )
)

  file_path=".\Algorithm\Algorithm Content",
  img_path=".\Algorithm\Reference",


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Task:[00m [92mFinds ALL the markdown files and inside .\Algorithm\Algorithm Content path[00m


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Thought:[00m [92mI need to find all the markdown files in the specified directory .\Algorithm\Algorithm Content. To do that, I will list the contents of the directory so I can identify any markdown files present.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \".\\\\Algorithm\\\\Algorithm Content\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-.\Algorithm\Algorithm Content/Array\Binary Search.md
- .\Algorithm\Algorithm Content/Array\MITM(Meet in the middle).md
- .\Algorithm\Algorithm Content/Array\PBS(Parallel Binary Search).md
- .\Algorithm\Algorithm Content/Graph Theory\Articulation Points And Bridges.md
- .\Algorithm\Algorithm Content/Graph Theory\BFS(Breadth-First Search).md
- .\Algorithm\Algorithm 

  file_path=".\Algorithm\Algorithm Content",
  img_path=".\Algorithm\Reference",


KeyboardInterrupt: 