In [24]:
import os
from state.state import ParseState
from module.split_pdf import SplitPDFFilesNode
from module.upstage_parser import DocumentParseNode, PostDocumentParseNode, WorkingQueueNode, continue_parse
from langgraph.graph import StateGraph, START, END


# PDF Parser 테스트 중
## 노드 생성

In [25]:
# 노드 생성
split_pdf_node = SplitPDFFilesNode(batch_size=3, test_page=None, verbose=True)
document_parse_node = DocumentParseNode(
    api_key=os.environ["UPSTAGE_API_KEY"], verbose=True
)
post_document_parse_node = PostDocumentParseNode(verbose=True)
working_queue_node = WorkingQueueNode(verbose=True)

# 워크플로우 설정
workflow = StateGraph(ParseState)
workflow.add_node("split_pdf_node", split_pdf_node)
workflow.add_node("document_parse_node", document_parse_node)
workflow.add_node("post_document_parse_node", post_document_parse_node)
workflow.add_node("working_queue_node", working_queue_node)

# 각 노드들을 연결합니다.
workflow.add_edge("split_pdf_node", "working_queue_node")
workflow.add_conditional_edges(
    "working_queue_node",
    continue_parse,
    {True: "document_parse_node", False: "post_document_parse_node"},
)
workflow.add_edge("document_parse_node", "working_queue_node")

workflow.set_entry_point("split_pdf_node")

test = workflow.compile()

In [26]:
test.invoke({"filepath":"/home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험.pdf"})

[SplitPDFNode] 파일의 전체 페이지 수: 12 Pages.
[SplitPDFNode] 분할 PDF 생성: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0000_0002.pdf
[SplitPDFNode] 분할 PDF 생성: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0003_0005.pdf
[SplitPDFNode] 분할 PDF 생성: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0006_0008.pdf
[SplitPDFNode] 분할 PDF 생성: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0009_0011.pdf
[DocumentParseNode] Start Parsing: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0000_0002.pdf
[DocumentParseNode] Finished Parsing in 4.05 seconds
[DocumentParseNode] Start Parsing: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0003_0005.pdf
[DocumentParseNode] Finished Parsing in 5.50 seconds
[DocumentParseNode] Start Parsing: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0006_0008.pdf
[DocumentParseNode] Finished Parsing in 8.35 seconds
[DocumentParseNode] Start Parsing: /home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0009_0011.pdf
[DocumentParse

{'filepath': '/home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험.pdf',
 'split_filepaths': ['/home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0000_0002.pdf',
  '/home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0003_0005.pdf',
  '/home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0006_0008.pdf',
  '/home/kevin/projects/langgraph/source/data/(무)아이사랑첫보험_0009_0011.pdf'],
 'working_filepath': '<<FINISHED>>',
 'metadata': [{'api': '2.0',
   'model': 'document-parse-240910',
   'usage': {'pages': 3}},
  {'api': '2.0', 'model': 'document-parse-240910', 'usage': {'pages': 3}},
  {'api': '2.0', 'model': 'document-parse-240910', 'usage': {'pages': 3}},
  {'api': '2.0', 'model': 'document-parse-240910', 'usage': {'pages': 3}}],
 'total_cost': 0.12,
 'raw_elements': [[{'category': 'heading1',
    'content': {'html': "<h1 id='0' style='font-size:20px'>무배당 아이사랑 첫보험</h1>",
     'markdown': '# 무배당 아이사랑 첫보험',
     'text': '무배당 아이사랑 첫보험'},
    'coordinates': [{'x': 0.1004, 'y': 0.1591},
 