In [16]:
import os
from dotenv import load_dotenv
from pprint import pprint

from langchain_teddynote import logging

from graph_relevancerag import RelevanceRAG
from graph_ensemblerag import EnsembleRAG
from graph_multiagentrag import MultiAgentRAG

from utils import load_system_prompt, load_invoke_input

In [17]:
# .env 파일 로드
load_dotenv(dotenv_path=".env")

# API 키 가져오기
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Multi-agent Collaboration"

# 프로젝트 이름을 입력합니다.
logging.langsmith("RAG Experiment")

LangSmith 추적을 시작합니다.
[프로젝트명]
RAG Experiment


## 작성 시 고려사항
1. 질문 경로 및 번호 설정
2. rag_method 설정 str로 받으면 해당당 객체 불러오기

In [18]:
def get_rag_instance(
    rag_method, 
    file_folder, 
    file_number, 
    # db_folder,
    chunk_size, 
    chunk_overlap,
    search_k,
    system_prompt, 
    model_name, 
    save_graph_png
):
    """
    RAG 클래스를 동적으로 받아서 인스턴스를 생성하는 함수
    
    Params:
        rag_method: RAG 방법 ("relevance-rag", "ensemble-rag", "multiagent-rag")
        file_folder: 논문 파일이 위치한 폴더 경로
        file_number: 처리할 논문 번호
        system_prompt: system prompt
        model_name: LLM 모델 명 ("gpt-4o", "gpt-4o-mini")
        save_graph_png: graph 저장 결정
        
    Return:
        생성된 RAG 모델 인스턴스
    """
    
    # RAG 모델 인스턴스 생성
    if rag_method == "relevance-rag":
        return RelevanceRAG(file_folder, file_number, chunk_size, chunk_overlap, search_k, system_prompt, model_name, save_graph_png)
        
    elif rag_method == "ensemble-rag":
        return EnsembleRAG(file_folder, file_number, chunk_size, chunk_overlap, search_k, system_prompt, model_name, save_graph_png)
        
    elif rag_method == "multiagent-rag":
        return MultiAgentRAG(file_folder, file_number, chunk_size, chunk_overlap, search_k, system_prompt, model_name, save_graph_png)


def main(
    data_folder:str="./data",
    file_num_list:list=[11],
    category_number:int=1, 
    chunk_size:int=500, 
    chunk_overlap:int=100, 
    search_k:int=10,       
    config_folder:str="./config",
    rag_method:str="multiagent-rag", 
    model_name:str="gpt-4o", 
    save_graph_png:bool=False, 
):
    category_names = ["CAM (Cathode Active Material)", "Electrode (half-cell)", "Morphological Properties", "Samples"]
    
    ## system_prompt 와 invoke_input 불러오기
    system_prompt = load_system_prompt(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
    invoke_input = load_invoke_input(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
    
    ## 각 논문에 대해 반복
    for i, file_number in enumerate(file_num_list):
        print(f"#####    {file_number}번째 논문    #####")
        print(f"##       rag method     : {rag_method}")
        print(f"##       category name  : {category_names[category_number-1]}")
        
        ## graph 호출
        voltai_graph = get_rag_instance(
            rag_method=rag_method, 
            file_folder=f"{data_folder}/input_data/", 
            file_number=file_number, 
            chunk_size=chunk_size, 
            chunk_overlap=chunk_overlap, 
            search_k=search_k, 
            system_prompt=system_prompt,
            model_name=model_name, 
            save_graph_png=save_graph_png,
        ).graph
        
        ## 질문이 딕셔너리 형태일 경우와 아닌 경우를 처리
        if isinstance(invoke_input, dict):
            
            result = voltai_graph.invoke(**invoke_input)
        else:
            result = voltai_graph.invoke(*invoke_input)

        ## RAG method에 따른 결과 확인
        if result.get("answer"):
            temp_answer = result["answer"][0][category_names[category_number-1]]
        elif result.get("discussion"):
            temp_answer = result["discussion"][category_names[category_number-1]]
        elif result.get("messages"):
            temp_answer = result["messages"][-1][category_names[category_number-1]]
        
        print(f"##       print {file_number} result")
        print("------------------------------------")
        pprint(temp_answer, sort_dicts=False)
        
        return temp_answer

In [19]:
file_num_list = [11]

In [20]:
invoke_input = load_invoke_input(config_folder="./config", category_number=1, rag_method="ensemble-rag")


##       ./config/ensemble-rag/c1-question.yaml를 불러왔습니다.


In [22]:
print(invoke_input["input"]["question"])

Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field, replacing the placeholders. If the information is not mentioned in the PDF, write "None".

[
  {
    "CAM (Cathode Active Material)": {
      "Stoichiometry information": {
      },
      "Commercial NCM used": {
      },
      "Lithium source": ,
      "Synthesis method": ,
      "Crystallization method": ,
      "Crystallization final temperature": ,
      "Crystallization final duration (hours)": ,
      "Doping": ,
      "Coating": ,
      "Additional treatment": 
    }
  }
]


### Relevance RAG

In [7]:
# relevance_rag_answer = main(file_num_list=file_num_list, category_number=1, rag_method="relevance-rag")

In [8]:
# relevance_rag_answer = main(file_num_list=file_num_list, category_number=1, rag_method="relevance-rag")

In [9]:
# relevance_rag_answer = main(file_num_list=file_num_list, category_number=1, rag_method="relevance-rag")

### Ensemble RAG

In [10]:
ensemble_rag_answer = main(file_num_list=file_num_list, category_number=1, rag_method="ensemble-rag")

##       ./config/ensemble-rag/c1-system-prompt.yaml를 불러왔습니다.
##       ./config/ensemble-rag/c1-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : ensemble-rag
##       category name  : CAM (Cathode Active Material)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10


KeyError: 'Input to PromptTemplate is missing variables {\'\\n        "CAM (Cathode Active Material)"\'}.  Expected: [\'\\n        "CAM (Cathode Active Material)"\', \'context\', \'question\'] Received: [\'question\', \'context\', \'chat_history\']\nNote: if you intended {\n        "CAM (Cathode Active Material)"} to be part of the string and not a variable, please escape it with double curly braces like: \'{{\n        "CAM (Cathode Active Material)"}}\'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT '

### Multiagent RAG

In [7]:
# multiagent_rag_c1_answer = main(file_num_list=file_num_list, category_number=1, rag_method="multiagent-rag")

##       ./config/multiagent-rag/c1-system-prompt.yaml를 불러왔습니다.
##       ./config/multiagent-rag/c1-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : CAM (Cathode Active Material)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'Stoichiometry information': {'LiNi1/3Co1/3Mn1/3O2': {'Li ratio': 1.0,
                                                       'Ni ratio': 0.33,
                                                       'Co ratio': 0.33,
                                                       'Mn ratio': 0.33,
                                                       'O ratio': 2.0}},
 'Commercial NCM used': {'LiNi1/3Co1/3Mn1/3O2': 'no'},
 'Lithium source': 'LiNO3',
 'Synthesis method': 'solution combustion',
 'Crystallization method': 'calcination',
 'Crystallization final

In [None]:
# multiagent_rag_c2_answer = main(file_num_list=file_num_list, category_number=2, rag_method="multiagent-rag")

##       ./config/multiagent-rag/c2-system-prompt.yaml를 불러왔습니다.
##       ./config/multiagent-rag/c2-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Electrode (half-cell)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'Active material to Conductive additive to Binder ratio': '87:3:10',
 'Electrolyte': [{'Salt': 'LiPF6',
                  'Concentration': '1M',
                  'Solvent': 'EC:DMC',
                  'Solvent ratio': '1:1'}],
 'Additive': 'RGO, 5%',
 'Loading density (mass loading of NCM)': 'None',
 'Additional treatment for electrode': 'None'}


In [11]:
# multiagent_rag_c3_answer = main(file_num_list=file_num_list, category_number=3, rag_method="multiagent-rag")

##       ./config/multiagent-rag/c3-system-prompt.yaml를 불러왔습니다.
##       ./config/multiagent-rag/c3-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : MorphologicalProperties
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'ParticleSize': {'NCM': 'narrow size range of 200-300 nm'},
 'ParticleShape': {'NCM': 'faceted morphology, like regular polyhedrons with '
                          'smooth surfaces'},
 'ParticleDistribution': {'NCM': 'uniform distribution with no significant '
                                 'agglomeration'},
 'CoatingLayerCharacteristics': {'NCM-RGO': 'RGO sheets wrapped around NCM '
                                            'nanoparticles forming a '
                                            'core-shell-like structure'},
 'CrystalStructureAndLatti

In [None]:
# multiagent_rag_c4_answer = main(file_num_list=file_num_list, category_number=4, rag_method="multiagent-rag")

##       ./config/multiagent-rag/c4-system-prompt.yaml를 불러왔습니다.
##       ./config/multiagent-rag/c4-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Samples
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
[{'Name': 'NR0',
  'Voltage ranges': [{'Range': '2.5 - 4.3',
                      'Temperature': 25,
                      'Data': [{'C-rate': 0.1, 'Capacity': 155.3},
                               {'C-rate': 0.2, 'Capacity': 'None'},
                               {'C-rate': 0.5, 'Capacity': 'None'},
                               {'C-rate': 1, 'Capacity': 123},
                               {'C-rate': 2, 'Capacity': 'None'},
                               {'C-rate': 4, 'Capacity': 'None'},
                               {'C-rate': 6, 'Capacity': 'None'},
            