In [1]:
import os

os.environ["OPENAI_MODEL_NAME"] = "gpt-4o-mini"

In [2]:
from pydantic import BaseModel
from typing import List

class filePath(BaseModel):
    filePath: List[str]

class imgPath(BaseModel):
    imgPath: List[str]

class associateFilePath(BaseModel):
    mainFile: str
    relatedFiles: List[str]
    imageFiles: List[str]

In [3]:
from crewai import Agent
from crewai_tools import DirectoryReadTool


class Agents:
    def markdownPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the markdown files inside {file_path} path",
            backstory="You are fluent in Korean, and you are very good at finding markdown files.",
            allow_delegation=False,
            verbose=True,
            tools=[
                DirectoryReadTool(),
            ],
        )

    def imgPathSearcher(self):
        return Agent(
            role="pathSearcher",
            goal="Finds the img files inside {img_path} path",
            backstory="You are fluent in Korean, and you are very good at finding image files.",
            allow_delegation=False,
            verbose=True,
            tools=[
                DirectoryReadTool(),
            ],
        )

In [4]:
from crewai import Task


class Tasks:
    def markdownPathSearch(self, agent):
        return Task(
            description="Finds ALL the markdown files and inside {file_path} path",
            expected_output="Your final answer MUST be markdown file path. The file path symbol must be '/' Other than that, should NEVER modify the path of the file.",
            agent=agent,
            output_json=filePath,
            output_file="MarkdownPath.md",
        )

    def imgPathSearch(self, agent):
        return Task(
            description="Finds ALL the image files and inside {img_path} path. but NOT Include svg Image.",
            expected_output="Your final answer MUST be image path. svg images should NEVER be included. The file path symbol must be '/' Other than that, should NEVER modify the path of the file.",
            agent=agent,
            output_json=imgPath,
            output_file="ImgPath.md",
        )

In [5]:
from crewai import Crew

agent = Agents()
tasks = Tasks()


markdownPathSearcher = agent.markdownPathSearcher()
imgPathSearcher = agent.imgPathSearcher()

markdownPathSearcher_task = tasks.markdownPathSearch(markdownPathSearcher)
imgPathSearcher_task = tasks.imgPathSearch(imgPathSearcher)

filePathCrew = Crew(
    agents=[
        markdownPathSearcher,
    ],
    tasks=[
        markdownPathSearcher_task,
    ],
    verbose=True,
)

imgPathCrew = Crew(
    agents=[
        imgPathSearcher,
    ],
    tasks=[
        imgPathSearcher_task,
    ],
    verbose=True,
)

filePathResult = filePathCrew.kickoff(
    dict(
        file_path=".\Algorithm\Algorithm Content",
    )
)

imgPathResult = imgPathCrew.kickoff(
    dict(
        img_path=".\Algorithm\Reference",
    )
)

  file_path=".\Algorithm\Algorithm Content",
  img_path=".\Algorithm\Reference",


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Task:[00m [92mFinds ALL the markdown files and inside .\Algorithm\Algorithm Content path[00m


[1m[95m# Agent:[00m [1m[92mpathSearcher[00m
[95m## Thought:[00m [92mI need to find all the markdown files inside the .\Algorithm\Algorithm Content directory. I will start by listing the files in that directory.[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \".\\\\Algorithm\\\\Algorithm Content\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-.\Algorithm\Algorithm Content/Array\Binary Search.md
- .\Algorithm\Algorithm Content/Array\MITM(Meet in the middle).md
- .\Algorithm\Algorithm Content/Array\PBS(Parallel Binary Search).md
- .\Algorithm\Algorithm Content/Graph Theory\Articulation Points And Bridges.md
- .\Algorithm\Algorithm Content/Graph Theory\BFS(Breadth-First Search).md
- .\Algorithm\Algorithm Content/Graph Theory\CCW(Counter Clock Wise).md
- .\A

In [6]:
from langchain_openai import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate
import json

llm = ChatOpenAI(
    temperature=0.1,
    model="gpt-4o-mini",
    streaming=True,
)

fileChoosePrompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are a helpful assistant that is role playing as a Searcher and You are very good at Korean and English.
            Based ONLY on the following context. find ONLY one path that you think will solve that question.

            Example Input:
            ```json
            {{
                "filePath": [
                    "./Algorithm/Algorithm Content/Array/Binary Search.md",
                    "./Algorithm/Algorithm Content/Array/MITM(Meet in the middle).md",
                    "./Algorithm/Algorithm Content/Array/PBS(Parallel Binary Search).md",
                    "./Algorithm/Algorithm Content/Graph Theory/Articulation Points And Bridges.md",
                    "./Algorithm/Algorithm Content/Graph Theory/BFS(Breadth-First Search).md",
                    "./Algorithm/Algorithm Content/Graph Theory/CCW(Counter Clock Wise).md",
                    "./Algorithm/Algorithm Content/Graph Theory/DFS(Depth-First Search).md",
                    "./Algorithm/Algorithm Content/Graph Theory/Dijkstra's Algorithm.md",
                    "./Algorithm/Algorithm Content/Math/2-SAT(2-Satisfiability).md",
                    "./Algorithm/Algorithm Content/String/Tire.md",
                    "./Algorithm/Algorithm Content/Tree/ETT(Euler Tour Technique).md",
                    "./Algorithm/Algorithm Content/Tree/Fenwick Tree.md",
                    "./Algorithm/Algorithm Content/Tree/HLD(Heavy Light Decomposition).md",
                ],
                "question" : "HLD가 뭐야?"
            }}
            ```

            Example Output:
            ./Algorithm/Algorithm Content/Tree/HLD(Heavy Light Decomposition).md

            Your turn!

            Question : {context}
            """,
        )
    ]
)


filePathData = json.loads(filePathResult.raw)
filePathData.update({"question": "Finwick Tree가 뭐야?"})
filePathDataStr = json.dumps(filePathData)

fileChooseChain = fileChoosePrompt | llm

mainFIlePath = fileChooseChain.invoke({"context" : filePathDataStr}).content

In [7]:
context = ""

with open(mainFIlePath, "r") as f:
    for text in f:
        context += text

context

'# Concept\n- `Binary Indexed Tree(BIT)`라고도 불린다.\n- [[Segment Tree]]의 변형 트리로 구간의 합을 빠르게 구할 수 있다는 특징이 있다.\n- 시간복잡도는 Segment Tree와 같은 `O(logN)`이지만 공간복잡도는 `O(n)`으로 Segment Tree보다 더 적다.\n- 시간복잡도 자체는 Segment Tree와 같다고 해도 실제론 조금 더 빠르게 작동하게 되는데 선형적으로 `Lazy Segment Tree ≒ 2 * Segment Tree / Segment Tree ≒ 2 * Fenwick Tree`이다.\n# Fenwick Tree 원리\n- Fenwick Tree는 Segment Tree에서 홀수 인덱스만 표기한다.(밑 그림 참조)\n- 모든 구간들은  BIT 연산을 통해 0이 아닌 최하위 비트(같은 높이의 맨좌측 비트)를 구함으로써 해결할 수 있다. \n- 특정 비트(I)를 통해 최하위 비트를 구하는 공식은 `i & -i (-i = ~i + 1)`이다.\n- ex) i = (1101)2 -> ~i = (0010)2 -> -i = (0011)2 -> i & -i = (0001)2\n#### 🖼️Segment Tree와 Fenwick Tree 구조 비교\n![[Fenwick Tree Struct Graph.svg]]\n- Fenwick Tree에 필요한 기능은 크게 2가지가 있다.\n\t1. sum(idx) : `[1~idx]` 범위에 있는 값들의 합을 Return 한다.\n\t2. update(idx, val) :  배열의 idx번째와 해당 idx에 해당되는 모든 구간 값을 업데이트 한다.\n- 특정 비트(i)에 최하위 비트가 0이 되기 전까지 빼면 구간의 합을 구할 수 있다. `i -= (i & -i)`\n- 특정 비트(i)에 최하위 비트가 특정 값(m) 될 때까지 더하면 구간을 업데이트  할 수 있다. `i += (i & -i)`\n- 특정 구간 `[l,r]`의 합을 구하기 위해서 **sum(

In [24]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.storage import LocalFileStore
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema.runnable import RunnableLambda


def embed_file():
    loader = UnstructuredFileLoader(mainFIlePath)
    splitter = CharacterTextSplitter.from_tiktoken_encoder(
        separator="\n\n",
        chunk_size=500,
        chunk_overlap=60,
    )
    documents = loader.load_and_split(text_splitter=splitter)
    cache_dir = LocalFileStore(f"./.cache/embeddings/file")
    embedder = OpenAIEmbeddings()
    cache_embedder = CacheBackedEmbeddings.from_bytes_store(embedder, cache_dir)
    vectorStore = Chroma.from_documents(documents, cache_embedder)
    retriever = vectorStore.as_retriever()
    return retriever

def format_doc(documents):
    return "\n\n".join(doc.page_content for doc in documents)



symbolDistinguishPrompt = ChatPromptTemplate.from_messages
(
    [
        (
            "system",
            """
            You are a powerful formatting algorithm.
            You format document into JSON format.

            Based ONLY on the following context. Find symbol [[...]] and ![[...]] and print out the Symbol.
            Don't make it up and Answer a blank if you can't find it.

            Example Output:
            ```json
            {{
                "fileSymbol": [
                    Segment Tree,
                    PBS(Parallel Binary Search),
                    Lazy Segment Tree,
                ],
                "imgSymbol": [
                    Fenwick Tree Struct Graph,
                    Fenwick Tree Partial Sum Graph,
                    Fenwick Tree Range Update & Point Query Graph,
                ],
            }}
            ```

            Your turn!
            
            Question : {context}
            """,
        )
    ]
)

retriever = embed_file()




In [27]:
symbolDistinguishChain = {"context": retriever | format_doc } | symbolDistinguishPrompt | llm

TypeError: unsupported operand type(s) for |: 'dict' and 'method'

In [23]:
symbolDistinguishChain.invoke({"context" : retriever} )

AIMessage(content='It seems like you might be looking for information or clarification on a specific topic. Could you please provide more details or specify what context you are referring to? This will help me assist you better!', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_01aeff40ea'}, id='run-67e94bc3-7cb4-458a-a7ae-4e51775bd1ab-0')