In [27]:
import os
from dotenv import load_dotenv
from langchain_core.runnables import RunnableConfig
from langchain_teddynote.messages import random_uuid

import yaml
import json

from graph import DataExtractor
from utils import outputs2pprint

In [2]:
# .env 파일 로드
load_dotenv(dotenv_path=".env")

# API 키 가져오기
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "false"
os.environ["LANGCHAIN_PROJECT"] = "Multi-agent Collaboration"

In [88]:
def load_question(question_path:str="./config/questions/250115-SY-question.yaml"):
    with open(question_path, 'r') as file:
        questions = yaml.safe_load(file)
    
    question_list = []
    for i in range(1, 5):
        if i == 3 or i == 4:
           temp_question = f"""
{questions["main_question"]}{questions[f"add_question{i}"]}
{json.dumps(questions[f"example{i}"], ensure_ascii=False, indent=4)}
""" 
        else: 
            temp_question = f"""
{questions["main_question"]}
{json.dumps(questions[f"example{i}"], ensure_ascii=False, indent=4)}
"""

        question_list.append(temp_question)        

    return question_list

In [89]:
question_list = load_question()

In [91]:
print(question_list[3])


Below are instructions for filling out items by referring to the examples.
The values shown to the right of the colon (":") are examples;
please delete them after reviewing and rewrite them with the values found in the PDF.
If any item is not mentioned in the PDF, do not remove it—write "None."
List all discharge capacities (mAh/g) at different conditions (e.g., initial capacity, after cycling, different C-rates, cutoff voltages). Include the electrode state and any specified conditions such as current density, cycling stage, or voltage range.
List all discharge capacities (mAh/g) for pristine, doped, and coated electrodes at all specified C-rates (e.g., 0.1C, 5.0C). Include electrode state (e.g., pristine, doped, coated) and any relevant conditions or performance metrics.

[
    {
        "Cathode Performance": {
            "Capacity at all C-rate, mAh/g (with electrode state)": [
                {
                    "200.2 mAh/g": "@0.1C, Pristine"
                },
             

In [None]:
total_outputs = {}
for file_num in [56]:
    print(f"#####   {file_num}번째 논문    #####")
    voltai_graph = DataExtractor(
        # folder_path, 
        file_number=file_num
    ).graph

    # config 설정(재귀 최대 횟수, thread_id)
    config = RunnableConfig(
        recursion_limit=20, 
        configurable={"thread_id": random_uuid()}
    )

    # 4개의 질문에 대해 그래프 실행 및 출력
    results = []
    for i, question in enumerate(question_list):
        # inputs = GraphState(question=question)
        print(f"    {i+1}번째 질문")
        # print(question)
        result = voltai_graph.invoke(
            input={"question":question},
            config=config,
        )
        results.append(result)
    
    total_outputs[file_num] = results
    
    outputs2pprint(total_outputs)

#####   56번째 논문    #####
    vectordb-paper_056을 불러왔습니다.
1번째 질문
        RELEVANCE CHECK : yes
2번째 질문
        RELEVANCE CHECK : yes
3번째 질문
        RELEVANCE CHECK : yes
4번째 질문
        RELEVANCE CHECK : yes
##  56번째 논문 결과
{'CAM (Cathode Active Material)': {'Stoichiometry information': {'NCM-811': {'Li ratio': '1',
                                                                             'Ni ratio': '0.8',
                                                                             'Co ratio': '0.1',
                                                                             'Mn ratio': '0.1',
                                                                             'O ratio': '2'}},
                                   'Commercial NCM': 'Yes',
                                   'Lithium source': 'LiOH',
                                   'Synthesis method': 'co-precipitation',
                                   'Crystallization method': 'None',
                                   'Cr

In [None]:
import time
import pandas as pd

def save_data2output_folder(output_folder: str, data, filename: str):
    """
    Save data as either a CSV or JSON file in a specified output folder with a timestamped filename.

    Args:
        output_folder (str): The path to the output folder.
        data: The data to save (pandas DataFrame for CSV, dict for JSON).
        filename (str): The base name of the file (without timestamp and extension).
    """
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    ## 파일 이름 중 시간 설정
    timestamp = time.strftime("%y%m%d%H%M%S")
    
    ## 파일 유형에 따라 결정
    if isinstance(data, pd.DataFrame):
        file_path = os.path.join(output_folder, f"{timestamp}-{filename}.csv")
        data.to_csv(file_path, index=False, encoding='utf-8-sig')
        print(f"    CSV 파일 {file_path}에 저장되었습니다.")
        
    elif isinstance(data, dict):
        file_path = os.path.join(output_folder, f"{filename}-{timestamp}.json")
        with open(file_path, 'w', encoding='utf-8') as json_file:
            json.dump(data, json_file, ensure_ascii=False, indent=4)
        print(f"    JSON 파일 {file_path}에 저장되었습니다.")
        
    else:
        print("    데이터 형식이 지원되지 않습니다. pandas DataFrame 또는 dict만 저장 가능합니다.")

In [58]:
def outputs2json(total_outputs:dict):
    for file_num in list(total_outputs.keys()):
        outputs = total_outputs[file_num]
        answers = outputs[0]["answer"][0] | outputs[1]["answer"][0] | outputs[2]["answer"][0] | outputs[3]["answer"][0]
        
        ## 파일 이름 설정
        if file_num < 10:
            json_file_num = f"00{file_num}"
        elif file_num < 100:
            json_file_num = f"0{file_num}"
        else:
            json_file_num = f"{file_num}"
            
        json_name = f"paper_{json_file_num}_output"
        
        save_data2output_folder(output_folder="./output/json/", df=answers, filename=json_name)

{56: [{'question': 'Below are instructions for filling out items by referring to the examples.\n    The values shown to the right of the colon (“:”) are examples;\n    please delete them after reviewing and rewrite them with the values found in the PDF.\n    If any item is not mentioned in the PDF, do not remove it—write “None.”\n    [\n        {\n            "CAM (Cathode Active Material)": {\n                "Stoichiometry information": {\n                    "NCM-622" : {\n                        "Li ratio": "1", \n                        "Ni ratio": "0.6", \n                        "Co ratio": "0.2", \n                        "Mn ratio": "0.2", \n                        "O ratio": "2"\n                    }\n                },\n                "Commercial NCM": "No",\n                "Lithium source": "LiOH",\n                "Synthesis method": "co-precipitation",\n                "Crystallization method": "Hydrothermal",\n                "Crystallization temperature": "100°C",\n 

In [57]:
# # 4개의 질문 입력
# ori_question_list = [
#     """Below are instructions for filling out items by referring to the examples.
#     The values shown to the right of the colon (“:”) are examples;
#     please delete them after reviewing and rewrite them with the values found in the PDF.
#     If any item is not mentioned in the PDF, do not remove it—write “None.”
#     [
#         {
#             "CAM (Cathode Active Material)": {
#                 "Stoichiometry information": {
#                     "NCM-622" : {
#                         "Li ratio": "1", 
#                         "Ni ratio": "0.6", 
#                         "Co ratio": "0.2", 
#                         "Mn ratio": "0.2", 
#                         "O ratio": "2"
#                     }
#                 },
#                 "Commercial NCM": "No",
#                 "Lithium source": "LiOH",
#                 "Synthesis method": "co-precipitation",
#                 "Crystallization method": "Hydrothermal",
#                 "Crystallization temperature": "100°C",
#                 "Crystallization time": "12 hr",
#                 "Doping": "Zr4+ doping",
#                 "Coating": "ZrO2 coating",
#                 "Additional treatment": "None"
#             }
#         }
#     ]
#     """,    
    
#     """Below are instructions for filling out items by referring to the examples.
#     The values shown to the right of the colon (“:”) are examples;
#     please delete them after reviewing and rewrite them with the values found in the PDF.
#     If any item is not mentioned in the PDF, do not remove it—write “None.”
#     [
#         {
#             "Electrode (only for coin-cell (half-cell))": {
#                 "Active material : Conductive additive : Binder ratio": "90 : 5 : 5",
#                 "Electrolyte": "LiPF6 (EC, EMC, DEC mixture in a 1:1:1 volume ratio)",
#                 "Additive": "FEC 10% addition",
#                 "Electrode thickness": "100 µm",
#                 "Only Cathode Electrode diameter": "14π",
#                 "Loading density (mass loading of NCM)": "0.005 g/cm^2",
#                 "Additional treatment for electrode": "None"
#             }
#         }
#     ]
#     """,

#     """Below are instructions for filling out items by referring to the examples.
#     The values shown to the right of the colon (“:”) are examples;
#     please delete them after reviewing and rewrite them with the values found in the PDF.
#     If any item is not mentioned in the PDF, do not remove it—write “None.”
#     [
#         {        
#             "Morphological results": {
#                 "Explanation of SEM results": "Fig. 2a, b; the NCM-622 seems to have more or less a spherical morphology with a diameter of 3–5 µm, composed of densely packed primary particles",
#                 "Explanation of TEM results": "None"
#             }    
#         }
#     ]
#     """,

#     """Below are instructions for filling out items by referring to the examples.
#     The values shown to the right of the colon (“:”) are examples;
#     please delete them after reviewing and rewrite them with the values found in the PDF.
#     If any item is not mentioned in the PDF, do not remove it—write “None.”
#     [
#         {            
#             "Cathode Performance": {
#                 "Capacity at all C-rate, mAh/g (with electrode state)": [
#                     {
#                         "214.5 mAh/g": "@0.1C, ZrO2-coated",
#                         "200.8 mAh/g": "@0.5C, ZrO2-coated"
#                     }
#                 ],
#                 "Voltage range": "2.8–4.3 V",
#                 "Temperature": "Room temperature and 55°C"
#             }
#         }
#     ]
#     """
# ]
