In [1]:
import os
from dotenv import load_dotenv
from pprint import pprint

from langchain_teddynote import logging
from models import sample_name_searcher, get_rag_instance

from utils import save_output2json
from prompt import load_system_prompt, load_invoke_input

In [2]:
# .env 파일 로드
load_dotenv(dotenv_path=".env")

# API 키 가져오기
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [8]:
def main(
    data_folder:str="./data",
    file_num_list:list=[11],
    chunk_size:int=500, 
    chunk_overlap:int=100, 
    search_k:int=10,       
    config_folder:str="./config",
    rag_method:str="multiagent-rag", 
    model_name:str="gpt-4o", 
    save_graph_png:bool=False, 
):
    category_names = ["CAM (Cathode Active Material)", "Electrode (half-cell)", "Morphological Properties", "Cathode Performance"]
    
    total_outputs = {}    
    
    ## 각 논문에 대해 반복
    for file_number in file_num_list:
        total_outputs[f"paper{file_number}"] = {}
        print(f"#####    {file_number}번째 논문    #####")
        print(f"##       rag method     : {rag_method}")
        
        ## Sample Name Searcher
        sample_name_searcher_chain = sample_name_searcher(
            file_folder=f"{data_folder}/raw/", 
            file_number=file_number, 
            chunk_size=chunk_size, 
            chunk_overlap=chunk_overlap, 
            search_k=search_k, 
            model_name=model_name        
        )
        sample_names = sample_name_searcher_chain.invoke("""Use all of the NCM cathode sample names (e.g., 'NCM-622', 'pristine NCM', 'M-NCM') provided in the electrochemical performance section. You just output sample names. Do Not output like '- NCM622' , just output 'NCM622.""")
        print(f"##       Sample Names    : {sample_names}")
        
        for category_number in range(1,5):
            print(f"##       Category Name   : {category_names[category_number-1]}")

            ## config 파일과 system_prompt 와 invoke_input 불러오기 (config 폴더 명 수정 필요요)
            system_prompt = load_system_prompt(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
            invoke_input = load_invoke_input(config_folder=config_folder, category_number=category_number, rag_method=rag_method, sample_names=sample_names)

            ## graph 호출
            voltai_graph = get_rag_instance(
                rag_method=rag_method, 
                file_folder=f"{data_folder}/raw/", 
                file_number=file_number, 
                chunk_size=chunk_size, 
                chunk_overlap=chunk_overlap, 
                search_k=search_k, 
                system_prompt=system_prompt,
                model_name=model_name, 
                save_graph_png=save_graph_png,
            ).graph
            
            ## 질문이 딕셔너리 형태일 경우와 아닌 경우를 처리
            if isinstance(invoke_input, dict):
                result = voltai_graph.invoke(**invoke_input)
            else:
                result = voltai_graph.invoke(*invoke_input)

            ## RAG method에 따른 결과 확인
            if result.get("answer"):
                temp_answer = result["answer"][0][category_names[category_number-1]]
            elif result.get("discussion"):
                temp_answer = result["discussion"][0][category_names[category_number-1]]
            elif result.get("messages"):
                temp_answer = result["messages"][-1][category_names[category_number-1]]
            
            pprint(temp_answer, sort_dicts=False)
            
            ## json 저장하는 코드
            save_output2json(each_answer=temp_answer,file_num=file_number, rag_method=rag_method, category_number=category_number)
            
            total_outputs[f"paper{file_number}"][category_names[category_number-1]] = temp_answer
                    
    return total_outputs

In [9]:
file_num_list = [39]

In [5]:
multiagent_rag_output = main(file_num_list=file_num_list, rag_method="multiagent-rag")

#####    39번째 논문    #####
##       rag method     : multiagent-rag
##       Sample Names    : ['N92  ', 'WN92']
##          Category Name   : CAM (Cathode Active Material)
##          ./config/multiagent-rag/c1-question.yaml를 불러왔습니다.
##       print 39 result
------------------------------------
{'Stoichiometry information': {'N92': {'Li ratio': 1.0,
                                       'Ni ratio': 0.92,
                                       'Co ratio': 0.04,
                                       'Mn ratio': 0.04,
                                       'O ratio': 2.0},
                               'WN92': {'Li ratio': 1.0,
                                        'Ni ratio': 0.92,
                                        'Co ratio': 0.04,
                                        'Mn ratio': 0.04,
                                        'W ratio': 0.01,
                                        'O ratio': 2.0}},
 'Commercial NCM used': {'N92': 'no', 'WN92': 'no'},
 'Lithium source': 'Li

In [6]:
relevance_rag_output = main(file_num_list=file_num_list, rag_method="relevance-rag")

#####    39번째 논문    #####
##       rag method     : relevance-rag
##       Sample Names    : ['N92', 'WN92']
##          Category Name   : CAM (Cathode Active Material)
##          ./config/relevance-rag/c1-question.yaml를 불러왔습니다.
        RELEVANCE CHECK : yes
##       print 39 result
------------------------------------
{'Stoichiometry information': {'N92': {'Li ratio': 1.0,
                                       'Ni ratio': 0.92,
                                       'Co ratio': 0.04,
                                       'Mn ratio': 0.04,
                                       'W ratio': 0.0,
                                       'O ratio': 2.0},
                               'WN92': {'Li ratio': 1.0,
                                        'Ni ratio': 0.92,
                                        'Co ratio': 0.04,
                                        'Mn ratio': 0.04,
                                        'W ratio': 0.01,
                                        'O ratio': 2

In [11]:
ensemble_rag_output = main(file_num_list=file_num_list, rag_method="ensemble-rag")

#####    39번째 논문    #####
##       rag method     : ensemble-rag
##       Sample Names    : ['N92  ', 'WN92']
##       Category Name   : CAM (Cathode Active Material)
##          ./config/ensemble-rag/c1-question.yaml를 불러왔습니다.
        RELEVANCE CHECK for ANSWER 3 : yes
        RELEVANCE CHECK for ANSWER 1 : yes
        RELEVANCE CHECK for ANSWER 2 : yes
{'Stoichiometry information': {'N92': {'Li ratio': 1.0,
                                       'Ni ratio': 0.92,
                                       'Co ratio': 0.04,
                                       'Mn ratio': 0.04,
                                       'W ratio': 0.0,
                                       'O ratio': 2.0},
                               'WN92': {'Li ratio': 1.0,
                                        'Ni ratio': 0.9198,
                                        'Co ratio': 0.04,
                                        'Mn ratio': 0.04,
                                        'W ratio': 0.001,
               