In [1]:
from tools import question_generator
from utils import load_config, load_system_prompt
from graph import get_rag_instance

In [3]:
question_generator(category_number=1, sample_names=["NCM622"])

##  ./configs/questions/multiagent.yaml template를 불러왔습니다.
    - NCM622에 대한 question을 생성했습니다.


'Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field. If the information is not mentioned in the PDF, write `null` (not `None`).\nUse all of the NCM cathode sample names (e.g., "NCM-622") provided in the electrochemical performance section.\nFill out in the `null`, `[]` and `{}` values based on the example format:\n[{"CAM (Cathode Active Material)": {"Stoichiometry information": {"NCM622": {}}, "Commercial NCM used": {"NCM622": {}}, "Lithium source": None, "Synthesis method": None, "Crystallization method": None, "Crystallization final temperature": None, "Crystallization final duration (hours)": None, "Doping": None, "Coating": None}}]'

ModuleNotFoundError: No module named 'tools.tools'

In [1]:
import os
from dotenv import load_dotenv
from pprint import pprint

from langchain_teddynote import logging

from utils import *

In [2]:
# .env 파일 로드
load_dotenv(dotenv_path=".env")

# API 키 가져오기
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [3]:
def main(
    data_folder:str="./data",
    file_num_list:list=[11],
    category_number:int=1, 
    chunk_size:int=500, 
    chunk_overlap:int=100, 
    search_k:int=10,       
    config_folder:str="./config",
    rag_method:str="multiagent-rag", 
    model_name:str="gpt-4o", 
    save_graph_png:bool=False, 
):
    category_names = ["CAM (Cathode Active Material)", "Electrode (half-cell)", "Morphological Properties", "Cathode Performance"]
    
    ## system_prompt 와 invoke_input 불러오기
    system_prompt = load_system_prompt(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
    invoke_input = load_invoke_input(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
    
    total_answer = []
    
    ## 각 논문에 대해 반복
    for file_number in file_num_list:
        print(f"#####    {file_number}번째 논문    #####")
        print(f"##       rag method     : {rag_method}")
        print(f"##       category name  : {category_names[category_number-1]}")
        
        ## graph 호출
        voltai_graph = get_rag_instance(
            rag_method=rag_method, 
            file_folder=f"{data_folder}/input_data/", 
            file_number=file_number, 
            chunk_size=chunk_size, 
            chunk_overlap=chunk_overlap, 
            search_k=search_k, 
            system_prompt=system_prompt,
            model_name=model_name, 
            save_graph_png=save_graph_png,
        ).graph
        
        ## 질문이 딕셔너리 형태일 경우와 아닌 경우를 처리
        if isinstance(invoke_input, dict):
            result = voltai_graph.invoke(**invoke_input)
        else:
            result = voltai_graph.invoke(*invoke_input)

        ## RAG method에 따른 결과 확인
        if result.get("answer"):
            temp_answer = result["answer"][0][category_names[category_number-1]]
        elif result.get("discussion"):
            temp_answer = result["discussion"][category_names[category_number-1]]
        elif result.get("messages"):
            temp_answer = result["messages"][-1][category_names[category_number-1]]
        
        print(f"##       print {file_number} result")
        print("------------------------------------")
        pprint(temp_answer, sort_dicts=False)
        
        ## json 저장하는 코드
        save_output2json(each_answer=temp_answer,file_num=file_number, rag_method=rag_method, category_number=category_number)
        
        total_answer.append(temp_answer)
        
    return total_answer

In [4]:
file_num_list = [11]

### Relevance RAG

In [5]:
relevance_rag_c1_answer = main(file_num_list=file_num_list, category_number=1, rag_method="relevance-rag")

##       ./config/relevance-rag/c1-system-prompt.yaml를 불러왔습니다.
##       ./config/relevance-rag/c1-question.yaml를 불러왔습니다.
#####    8번째 논문    #####
##       rag method     : relevance-rag
##       category name  : CAM (Cathode Active Material)
##       paper_008 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
        RELEVANCE CHECK : yes
##       print 8 result
------------------------------------
{'Stoichiometry information': {'NCM111': {'Li ratio': 1.02,
                                          'Ni ratio': 0.3333333333,
                                          'Co ratio': 0.3333333333,
                                          'Mn ratio': 0.3333333333,
                                          'O ratio': 2.0},
                               'NCM523': {'Li ratio': 1.02,
                                          'Ni ratio': 0.5,
                                          'Co ratio': 0.2,
                              

In [11]:
relevance_rag_c2_answer = main(file_num_list=file_num_list, category_number=2, rag_method="relevance-rag")

##       ./config/relevance-rag/c2-system-prompt.yaml를 불러왔습니다.
##       ./config/relevance-rag/c2-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : relevance-rag
##       category name  : Electrode (half-cell)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
        RELEVANCE CHECK : yes
##       print 11 result
------------------------------------
{'Active material to Conductive additive to Binder ratio': '83:7:10',
 'Electrolyte': [{'Salt': 'LiPF6',
                  'Concentration': '1M',
                  'Solvent': 'EC:DMC',
                  'Solvent ratio': '1:1'}],
 'Additive': 'None',
 'Loading density (mass loading of NCM)': 'None',
 'Additional treatment for electrode': 'None'}


In [16]:
relevance_rag_c3_answer = main(file_num_list=file_num_list, category_number=3, rag_method="relevance-rag")

##       ./config/relevance-rag/c3-system-prompt.yaml를 불러왔습니다.
##       ./config/relevance-rag/c3-question.yaml를 불러왔습니다.
#####    8번째 논문    #####
##       rag method     : relevance-rag
##       category name  : Morphological Properties
##       paper_008 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
        RELEVANCE CHECK : yes
##       print 8 result
------------------------------------
{'ParticleSize': {'SC-NCM': '-', 'N-NCM': '-'},
 'ParticleShape': {'SC-NCM': '-', 'N-NCM': '-'},
 'ParticleDistribution': {'SC-NCM': '-', 'N-NCM': '-'},
 'CoatingLayerCharacteristics': {'NCM-RGO': '-'},
 'CrystalStructureAndLatticeCharacteristics': {'SC-NCM': '-',
                                               'WN92': 'The NCM materials '
                                                       'exhibit a '
                                                       'well-developed layered '
                                              

In [13]:
relevance_rag_c4_answer = main(file_num_list=file_num_list, category_number=4, rag_method="relevance-rag")

##       ./config/relevance-rag/c4-system-prompt.yaml를 불러왔습니다.
##       ./config/relevance-rag/c4-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : relevance-rag
##       category name  : Cathode Performance
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
        RELEVANCE CHECK : yes
##       print 11 result
------------------------------------
[{'Sample Name': 'NR0',
  'Data': [{'Voltage range': '2.5 - 4.3',
            'Temperature': 'None',
            'C-rate and Specific capacity': [{'C-rate': 0.1,
                                              'Capacity': '155.3'},
                                             {'C-rate': 0.2,
                                              'Capacity': 'None'},
                                             {'C-rate': 0.5,
                                              'Capacity': 'None'},
                                             {'C-rate

### Ensemble RAG

In [9]:
ensemble_rag_answer = main(file_num_list=file_num_list, category_number=4, rag_method="ensemble-rag")

##       ./config/ensemble-rag/c4-system-prompt.yaml를 불러왔습니다.
##       ./config/ensemble-rag/c4-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : ensemble-rag
##       category name  : Cathode Performance
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
        RELEVANCE CHECK for ANSWER 2 : yes
        RELEVANCE CHECK for ANSWER 3 : yes
        RELEVANCE CHECK for ANSWER 1 : yes
        Success Discussion!
##       print 11 result
------------------------------------
[{'Sample Name': 'NR0',
  'Data': [{'Voltage range': '2.5 - 4.3',
            'Temperature': 'None',
            'C-rate and Specific capacity': [{'C-rate': 0.1,
                                              'Capacity': '155.3'},
                                             {'C-rate': 0.2,
                                              'Capacity': 'None'},
                                             {'C-rate': 0

### Multiagent RAG

In [7]:
# multiagent_rag_c1_answer = main(file_num_list=file_num_list, category_number=1, rag_method="multiagent-rag")

##       ./config/multiagent-rag/c1-system-prompt.yaml를 불러왔습니다.
##       ./config/multiagent-rag/c1-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : CAM (Cathode Active Material)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'Stoichiometry information': {'LiNi1/3Co1/3Mn1/3O2': {'Li ratio': 1.0,
                                                       'Ni ratio': 0.33,
                                                       'Co ratio': 0.33,
                                                       'Mn ratio': 0.33,
                                                       'O ratio': 2.0}},
 'Commercial NCM used': {'LiNi1/3Co1/3Mn1/3O2': 'no'},
 'Lithium source': 'LiNO3',
 'Synthesis method': 'solution combustion',
 'Crystallization method': 'calcination',
 'Crystallization final

In [None]:
# multiagent_rag_c2_answer = main(file_num_list=file_num_list, category_number=2, rag_method="multiagent-rag")

##       ./config/multiagent-rag/c2-system-prompt.yaml를 불러왔습니다.
##       ./config/multiagent-rag/c2-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Electrode (half-cell)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'Active material to Conductive additive to Binder ratio': '87:3:10',
 'Electrolyte': [{'Salt': 'LiPF6',
                  'Concentration': '1M',
                  'Solvent': 'EC:DMC',
                  'Solvent ratio': '1:1'}],
 'Additive': 'RGO, 5%',
 'Loading density (mass loading of NCM)': 'None',
 'Additional treatment for electrode': 'None'}


In [5]:
multiagent_rag_c3_answer = main(file_num_list=file_num_list, category_number=3, rag_method="multiagent-rag")

## ./config/multiagent-rag/c3-system-prompt.yaml를 불러왔습니다.
## ./config/multiagent-rag/c3-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Morphological Properties
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'ParticleSize': {'NCM': '200-300 nm'},
 'ParticleShape': {'NCM': 'faceted morphology, regular polyhedrons with smooth '
                          'surfaces'},
 'ParticleDistribution': {'NCM': 'uniform distribution with no significant '
                                 'agglomeration'},
 'CoatingLayerCharacteristics': {'NCMeRGO': 'RGO sheets wrapped around NCM '
                                            'nanoparticles forming a '
                                            'core-shell-like structure'},
 'CrystalStructureAndLatticeCharacteristics': {'NCM': 'hexagona

In [7]:
multiagent_rag_c4_answer = main(file_num_list=file_num_list, category_number=4, rag_method="multiagent-rag")

## ./config/multiagent-rag/c4-system-prompt.yaml를 불러왔습니다.
## ./config/multiagent-rag/c4-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Cathode Performance
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'NR0': [{'Voltage range': '2.5 - 4.3',
          'Temperature': 25,
          'C-rate and Specific capacity': [{'C-rate': 0.1, 'Capacity': 155.3},
                                           {'C-rate': 0.2, 'Capacity': 'None'},
                                           {'C-rate': 0.5, 'Capacity': 'None'},
                                           {'C-rate': 1.0, 'Capacity': 123},
                                           {'C-rate': 2.0, 'Capacity': 'None'},
                                           {'C-rate': 4.0, 'Capacity': 'None'},
                                 