In [1]:
import os
from crewai import LLM

os.environ["OPENAI_MODEL_NAME"] = "gpt-4o"

In [2]:
from pydantic import BaseModel
from typing import List

class filePath(BaseModel):
    filePath: List[str]

class associateFilePath(BaseModel):
    mainFile: str
    relatedFiles: List[str]
    imageFiles: List[str]

In [12]:
from crewai import Agent
from crewai_tools import FileReadTool, DirectoryReadTool

class Agents:
    def markdownPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the markdown files inside {file_path} path",
            backstory="You are fluent in Korean, and you are very good at finding markdown files.",
            allow_delegation=False,
            verbose=True,
            llm='gpt-4o-mini',
            tools=[
                DirectoryReadTool(),
            ],
        )

    def imgPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the img files inside {img_path} path",
            backstory="You are fluent in Korean, and you are very good at finding image files.",
            allow_delegation=False,
            verbose=True,
            llm='gpt-4o-mini',
            tools=[
                DirectoryReadTool(),
            ],
        )

    def mainFilesearcher(self):
        return Agent(
            role="mainFilesearcher",
            goal="Print out ONLY one document that can answer {question}. To use the Tool, The parameter MUST be file_path = `filepath`.",
            backstory="You are fluent in Korean. You are a bookworm. Read and grasp everything in the document delicately",
            allow_delegation=False,
            verbose=True,
            llm='gpt-4o',
            tools=[
                FileReadTool(),
            ],
            max_iter=3,
            max_execution_time=1,
        )

    def fileSelector(self):
        return Agent(
            role="fileSelector",
            goal="Find out the path of all other files that correspond to the document and print them out.",
            backstory="You are a file search expert and fluent in Korean. You have a great ability to read and analyze the details of the file.",
            llm='gpt-4o',
            allow_delegation=False,
            verbose=True,
        )

In [4]:
from crewai import Task


class Tasks:
    def markdownPathSearch(self, agent):
        return Task(
            description="Finds ALL the markdown files and inside {file_path} path",
            expected_output="Your final answer MUST be markdown file path. The file path symbol must be '/'",
            agent=agent,
            output_json=filePath,
            output_file="MarkdownPath.md",
        )

    def imgPathSearch(self, agent):
        return Task(
            description="Finds ALL the image files and inside {img_path} path. but NOT Include svg Image.",
            expected_output="Your final answer MUST be image path. svg images should NEVER be included. The file path symbol must be '/'",
            agent=agent,
            output_json=filePath,
            output_file="ImgPath.md",
        )

    def mainFileSearch(self, agent, context):
        return Task(
            description="""
            Based on the markdownPathSearch, markdownPathSearch is in json format. Document Paths are stored in 'filePath' key value and is in List. Search Only one file path that can solve {question}.
            The path of the image NEVER ends with \\ and /.
            If the end of the path is \\ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.
            
            Read the entire contents of the file based on the file path and print it out.
            DON'T do this more than once
            """,
            expected_output="""
            Print out the entire contents of the file NEVER MODIFY.
            """,
            agent=agent,
            context=context,
            output_file="mainFileSearch.md",
        )

    def fileSelect(self, agent, context):
        return Task(
            description="""
            Based on the mainFileSearch, 
            There are other documents linked by the symbol '[[...]]' and '![[...]]' in that file NOT '[...]
            '[[...]]' symbol means a markdown file and '![[...]]' means an image file.
            
            Find all of the '[[...]]' and '![[...]]' and print out the ONLY file path associated with the word in it in markdownPathSearch or imgPathSearch. 
            All file paths should EXIST in that markdownPathSearch Output or imgPathSearch Output. 
            DON'T make it up and look for it.
            If the relevant document/image does not exist, JUST Return EMPTY List.",
            """,
            expected_output="""
            Your final answer MUST include the path of the first file and the path of other files within that file.
            It doesn't include ANYTHING other than file paths. 

            minaFile and relatedFiles Include ONLY markdown File!

            Example Answer 1
            {
                "mainFile": "./Algorithm/Algorithm Content/Tree/MST(Minimum Spanning Tree).md",
                "relatedFiles": [
                    "./Algorithm/Algorithm Content/Graph Theory/DFS(Depth-First Search).md",
                    "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                    "./Algorithm/Algorithm Content/Tree/Union Find.md",
                ],
                "imageFiles": ["./Algorithm/Reference/Tree Reference/MST Ref/MST Graph.png",]
            }

            Example Answer 2
            {
                "mainFile": "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                "relatedFiles": [],
                "imageFiles": [
                    "./Algorithm/Reference/Graph Theory Reference/BASE TREE.png",
                    "./Algorithm/Reference/Graph Theory Reference/BFS Ref/BFS Queue.png",
                ]
            }

            Example Answer 3
            {
                "mainFile": "./c/k.md",
                "relatedFiles": [
                    "./c/g.md",
                    "./c/c.md",
                    "./c/d.md",
                ],
                "imageFiles": []
            }
            """,
            agent=agent,
            context=context,
            output_json=associateFilePath,
            output_file="associateFilePath.md",
        )

In [5]:
from crewai import Crew

agent = Agents()
tasks = Tasks()

In [6]:
markdownPathSearcher = agent.markdownPathSearcher()

markdownPathSearcher_task = tasks.markdownPathSearch(markdownPathSearcher)

filePathCrew = Crew(
    agents=[markdownPathSearcher],
    tasks=[markdownPathSearcher_task],
    verbose=True,
)

filePathResult = filePathCrew.kickoff(
    dict(
        file_path=".\Algorithm\Algorithm Content",
    )
)

  file_path=".\Algorithm\Algorithm Content",


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Task:[00m [92mFinds ALL the markdown files and inside .\Algorithm\Algorithm Content path[00m


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Thought:[00m [92mI need to find all the markdown files within the specified directory: .\Algorithm\Algorithm Content.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \".\\\\Algorithm\\\\Algorithm Content\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-.\Algorithm\Algorithm Content/Array\Binary Search.md
- .\Algorithm\Algorithm Content/Array\MITM(Meet in the middle).md
- .\Algorithm\Algorithm Content/Array\PBS(Parallel Binary Search).md
- .\Algorithm\Algorithm Content/Graph Theory\Articulation Points And Bridges.md
- .\Algorithm\Algorithm Content/Graph Theory\BFS(Breadth-First Search).md
- .\Algorithm\Algorithm Content/Graph Theory\CCW(Counter Clock Wise).md
- .\Algorithm\Algorithm Content/Graph Theory\DF

In [7]:
imgPathSearcher = agent.imgPathSearcher()

imgPathSearcher_task = tasks.imgPathSearch(imgPathSearcher)

imgPathCrew = Crew(
    agents=[imgPathSearcher],
    tasks=[imgPathSearcher_task],
    verbose=True,
)

imgPathResult = imgPathCrew.kickoff(
    dict(
        img_path=".\Algorithm\Reference",
    )
)

  img_path=".\Algorithm\Reference",


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Task:[00m [92mFinds ALL the image files and inside .\Algorithm\Reference path. but NOT Include svg Image.[00m


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Thought:[00m [92mI need to find all image files in the .\Algorithm\Reference directory while excluding any SVG images.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \".\\\\Algorithm\\\\Reference\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-.\Algorithm\Reference/BAEKJOON.png
- .\Algorithm\Reference/Array Reference\Binary Search Ref\Binary Search Flowchart.md
- .\Algorithm\Reference/Array Reference\Binary Search Ref\Binary Search Flowchart.png
- .\Algorithm\Reference/Array Reference\Binary Search Ref\Binary Search Flowchart.svg
- .\Algorithm\Reference/Array Reference\MITM Recursion Ref\MITM Recursion.md
- .\Algorithm\Reference/Array Reference\MITM Recursion Ref\MITM Recursion.png
- .\

In [13]:
mainFileSearcher = agent.mainFilesearcher()
fileSelector = agent.fileSelector()

mainFileSearcher_task = tasks.mainFileSearch(
    mainFileSearcher, [markdownPathSearcher_task]
)
fileSelector_task = tasks.fileSelect(
    fileSelector,
    [mainFileSearcher_task, markdownPathSearcher_task, imgPathSearcher_task],
)

fileSelectorCrew = Crew(
    agents=[
        mainFileSearcher,
        fileSelector,
    ],
    tasks=[
        mainFileSearcher_task,
        fileSelector_task,
    ],
    verbose=True,
)

fileSelectorResult = fileSelectorCrew.kickoff(
    dict(
        question="Finweck Tree가 뭐야?",
    )
)



[1m[95m# Agent:[00m [1m[92mmainFilesearcher[00m
[95m## Task:[00m [92m
            Based on the markdownPathSearch, markdownPathSearch is in json format. Document Paths are stored in 'filePath' key value and is in List. Search Only one file path that can solve Finweck Tree가 뭐야?.
            The path of the image NEVER ends with \ and /.
            If the end of the path is \ or /, remove this and explore file path.
            NEVER modify the file path in fileSelect.
            
            Read the entire contents of the file based on the file path and print it out.
            DON'T do this more than once
            [00m


[1m[95m# Agent:[00m [1m[92mmainFilesearcher[00m
[95m## Thought:[00m [92mThe task is to find and read a file that can answer the question "Finweck Tree가 뭐야?" from a list of file paths. Given its similarity, it is likely that "Fenwick Tree" is another name for "Finweck Tree". Therefore, I should read the file related to "Fenwick Tree".
The file 

In [24]:
from langchain_openai import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler

llm = ChatOpenAI(
    temperature=0.1,
    model="gpt-4o-mini",
)

In [1]:
from langchain_unstructured import UnstructuredLoader
from langchain.document_loaders import UnstructuredImageLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def img_split(file_path):
    loader = UnstructuredImageLoader(file_path=file_path)
    splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500,
        chunk_overlap=60,
    )
    docs = loader.load()
    print(docs)

def document_split(file_path, includeCode = True):
    loader = UnstructuredLoader(file_path=file_path)
    splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500,
        chunk_overlap=60,
    )
    docs = loader.load()

    content = ""
    codeDocs = []
    codeDoc = ""
    flag = False
    for doc in docs:
        if "```" in doc.page_content:
            if flag:
                codeDocs.append(codeDoc)
                doc.page_content.replace("```"," ")
                flag = False
            else:
                codeDoc = ""
                flag = True

        if flag:
            codeDoc += doc.page_content + " "
        else:
            content += doc.page_content + " "
    textDocs = splitter.split_text(content)
    if includeCode:
        textDocs.extend(codeDocs)
    return textDocs

In [None]:
import json
import os.path

fileSelector_Json = json.loads(fileSelectorResult.raw)

relatedDocs = []
mainDocs = []

mainFilePath = fileSelector_Json["mainFile"]

if os.path.isfile(mainFilePath):
    mainDocs = document_split(mainFilePath)


for filePath in fileSelector_Json["relatedFiles"]:
    if os.path.isfile(filePath):
        relatedDocs.extend(document_split(filePath, False))



['Concept Node 값에 배열의 범위를 가진 이진 Tree Struct의 일종이다. 배열의 범위는 말 그대로 배열(Array)의 범위를 말한다.ex) [0:n-1], [2:3], [(n-1)/2+1:n] 보통 배열의 범위 구간의 합, 차, 곱을 빠르게 계산할 때 사용한다. 배열의 길이를 N이라 하고 쿼리(구간 연산을 계산하는 횟수)를 M이라고 할 때, O(logN)의 시간복잡도가 소요된다. Segment Tree 원리 배열의 길이를 N이라 할 때 Segment Tree의 root는 배열 0~n-1에 대한 정보를 가지고 있다. root의 child는 각각 0 ~ N/2, N/2 + 1 ~ N-1까지의 정보를 가지고 있고 그의',
 '0 ~ N/2, N/2 + 1 ~ N-1까지의 정보를 가지고 있고 그의 자식들도 각각 부모의 1 / 2의 정보를 나눠 가지고 있다. 보통, node의 index를 0번부터 시작하지만 Segment Tree의 경우 node의 index가 1번부터 시작하게 된다. root index를 1로 설정할 경우, node의 left child index = node * 2, right child index = node * 2 + 1가 되기 때문에 구현을 보다 편하게 할 수 있다. Segment Tree에 필요한 기능은 크게 3가지가 있다. init() : 배열에 있는 값들을 범위에 맞춰 Segment Tree에 넣는다. update() : 배열 값에 변화가 있는 경우 이에 맞춰 Segmet Tree 값을 바꾼다. calculation() : 배열 범위에 맞는 값을',
 '이에 맞춰 Segmet Tree 값을 바꾼다. calculation() : 배열 범위에 맞는 값을 Segment Tree에서 찾아 연산해 Return 한다. 🖼️그림으로 이해하기 ![[Segment Tree Graph.svg]] Segment Tree CODE (📑2042 - 구간 합 구하기) Tree Height는 log2(n)이고 Tree Size의 경우 (Tree H

In [25]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

first_prompt = ChatPromptTemplate.from_template(
    """
    Your job is to find the right answer to the {question}.
    You are very good at using Korean and English.
    We have provied an existing answer to a certain point : {existing_content}
    We have the opportunity to refine the existing answer (only if needed) with some more context below.
    ------
    {context}
    ------
    Given the new context, refine the original answer.
    If the context ins't useful, RETURN the original answer.    
    """
)

first_chain = first_prompt | llm | StrOutputParser()

answer = ""
question = "Fenwick Tree의 원리에 대해 알려줘"

for doc in mainDocs:
    answer = first_chain.invoke({"question" : question, "existing_content" : answer, "context" : doc})

"Fenwick Tree, 또는 Binary Indexed Tree (BIT)는 구간의 합을 효율적으로 계산할 수 있는 자료구조입니다. 이 구조는 Segment Tree의 변형으로, 시간 복잡도는 O(log N)으로 Segment Tree와 동일하지만, 공간 복잡도는 O(n)으로 더 적습니다. 실제로는 Segment Tree보다 더 빠르게 작동하는 경우가 많습니다. 예를 들어, Lazy Segment Tree는 Segment Tree보다 약 두 배 느리고, Segment Tree는 Fenwick Tree보다 약 두 배 느린 경향이 있습니다.\n\nFenwick Tree의 원리는 다음과 같습니다. Fenwick Tree는 Segment Tree에서 홀수 인덱스만을 사용하여 구간의 합을 계산합니다. 각 노드는 특정 구간의 합을 저장하며, BIT 연산을 통해 0이 아닌 최하위 비트(같은 높이의 맨 좌측 비트)를 이용하여 빠르게 업데이트 및 쿼리를 수행합니다. 특정 비트(i)에 대한 최하위 비트를 구하는 공식은 `i & -i`입니다. 예를 들어, `i = (1101)2`일 때, `~i = (0010)2`, `-i = (0011)2`가 되고, `i & -i = (0001)2`가 됩니다.\n\nFenwick Tree에 필요한 기능은 크게 두 가지가 있습니다. 첫째, `sum(idx)`는 [1~idx] 범위에 있는 값들의 합을 반환합니다. 이때, 구간의 합을 구하기 위해서는 `i -= (i & -i)`를 사용하여 최하위 비트가 0이 될 때까지 반복합니다. 둘째, `update(idx, val)`는 배열의 idx번째와 해당 idx에 해당되는 모든 구간 값을 업데이트합니다. 이 과정은 `i += (i & -i)`를 통해 특정 비트가 특정 값이 될 때까지 진행됩니다.\n\n특정 구간 [l, r]의 합을 계산할 때는 `sum(r) - sum(l-1)`로 구할 수 있으며, 이 과정은 오른쪽 대각선으로 올라가는 것으로 이해할 수 있습니다. 또한, Range Update 즉, [l, 

In [30]:
refine_prompt  = ChatPromptTemplate.from_template(
    """
    Your job is to  add supplementary content to understand the contents of the existing answer.
    You are very good at using Korean and English.
    We have provied an existing answer to a certain point : {existing_content}
    We have the opportunity to refine the existing answer (only if needed) with some more context below.
    ------
    {context}
    ------
    Given the new context, refine the original answer.
    If the context ins't useful, RETURN the original answer.    
    """
)

refine_chain = refine_prompt | llm | StrOutputParser()

for doc in relatedDocs:
    answer = refine_chain.invoke({"existing_content" : answer, "context" : doc})

answer

"The new context provided does not directly relate to the functionalities or operations of the Fenwick Tree (or Binary Indexed Tree, BIT). The original answer focuses on the structure, operations, and efficiency of the Fenwick Tree in handling range sum queries and updates, which is a distinct topic from the context provided.\n\nTherefore, the original answer remains relevant and does not require refinement based on the provided context. \n\nHere is the original answer for reference:\n\nFenwick Tree, 또는 Binary Indexed Tree (BIT)는 구간의 합을 효율적으로 계산할 수 있는 자료구조입니다. 이 구조는 Segment Tree의 변형으로, 시간 복잡도는 O(log N)으로 Segment Tree와 동일하지만, 공간 복잡도는 O(n)으로 더 적습니다. 실제로는 Segment Tree보다 더 빠르게 작동하는 경우가 많습니다. 예를 들어, Lazy Segment Tree는 Segment Tree보다 약 두 배 느리고, Segment Tree는 Fenwick Tree보다 약 두 배 느린 경향이 있습니다.\n\nFenwick Tree의 원리는 다음과 같습니다. Fenwick Tree는 Segment Tree에서 홀수 인덱스만을 사용하여 구간의 합을 계산합니다. 각 노드는 특정 구간의 합을 저장하며, BIT 연산을 통해 0이 아닌 최하위 비트(같은 높이의 맨 좌측 비트)를 이용하여 빠르게 업데이트 및 쿼리를 수행합니다. 특정 비트(i)에 대한 최하위 비트를 구하는 