In [1]:
import sys
sys.path.append("../../")

import os
from dotenv import load_dotenv
from pprint import pprint

from graph_relevancerag import RelevanceRAG
from graph_ensemblerag import EnsembleRAG
from graph_multiagentrag import MultiAgentRAG

from utils import *

In [2]:
# .env 파일 로드
load_dotenv(dotenv_path=".env")

# API 키 가져오기
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [3]:
def get_rag_instance(
    rag_method, 
    file_folder, 
    file_number, 
    # db_folder,
    chunk_size, 
    chunk_overlap,
    search_k,
    system_prompt, 
    model_name, 
    save_graph_png
):
    """
    RAG 클래스를 동적으로 받아서 인스턴스를 생성하는 함수
    
    Params:
        rag_method: RAG 방법 ("relevance-rag", "ensemble-rag", "multiagent-rag")
        file_folder: 논문 파일이 위치한 폴더 경로
        file_number: 처리할 논문 번호
        system_prompt: system prompt
        model_name: LLM 모델 명 ("gpt-4o", "gpt-4o-mini")
        save_graph_png: graph 저장 결정
        
    Return:
        생성된 RAG 모델 인스턴스
    """
    
    # RAG 모델 인스턴스 생성
    if rag_method == "relevance-rag":
        return RelevanceRAG(file_folder, file_number, chunk_size, chunk_overlap, search_k, system_prompt, model_name, save_graph_png)
        
    elif rag_method == "ensemble-rag":
        return EnsembleRAG(file_folder, file_number, chunk_size, chunk_overlap, search_k, system_prompt, model_name, save_graph_png)
        
    elif rag_method == "multiagent-rag":
        return MultiAgentRAG(file_folder, file_number, chunk_size, chunk_overlap, search_k, system_prompt, model_name, save_graph_png)


def main(
    data_folder:str="./data",
    file_num_list:list=[11],
    category_number:int=1, 
    chunk_size:int=500, 
    chunk_overlap:int=100, 
    search_k:int=10,       
    config_folder:str="./config",
    rag_method:str="multiagent-rag", 
    model_name:str="gpt-4o", 
    save_graph_png:bool=False, 
):
    category_names = ["CAM (Cathode Active Material)", "Electrode (half-cell)", "Morphological Properties", "Cathode Performance"]
    
    ## system_prompt 와 invoke_input 불러오기
    system_prompt = load_system_prompt(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
    invoke_input = load_invoke_input(config_folder=config_folder, category_number=category_number, rag_method=rag_method)
    
    total_answer = []
    
    ## 각 논문에 대해 반복
    for file_number in file_num_list:
        print(f"#####    {file_number}번째 논문    #####")
        print(f"##       rag method     : {rag_method}")
        print(f"##       category name  : {category_names[category_number-1]}")
        
        ## graph 호출
        voltai_graph = get_rag_instance(
            rag_method=rag_method, 
            file_folder=f"{data_folder}/input_data/", 
            file_number=file_number, 
            chunk_size=chunk_size, 
            chunk_overlap=chunk_overlap, 
            search_k=search_k, 
            system_prompt=system_prompt,
            model_name=model_name, 
            save_graph_png=save_graph_png,
        ).graph
        
        ## 질문이 딕셔너리 형태일 경우와 아닌 경우를 처리
        if isinstance(invoke_input, dict):
            result = voltai_graph.invoke(**invoke_input)
        else:
            result = voltai_graph.invoke(*invoke_input)

        ## RAG method에 따른 결과 확인
        if result.get("answer"):
            temp_answer = result["answer"][0][category_names[category_number-1]]
        elif result.get("discussion"):
            temp_answer = result["discussion"][category_names[category_number-1]]
        elif result.get("messages"):
            temp_answer = result["messages"][-1][category_names[category_number-1]]
        
        print(f"##       print {file_number} result")
        print("------------------------------------")
        pprint(temp_answer, sort_dicts=False)
        
        ## json 저장하는 코드
        save_output2json(each_answer=temp_answer,file_num=file_number, rag_method=rag_method, category_number=category_number)
        
        total_answer.append(temp_answer)
        
    return total_answer

In [4]:
file_num_list = [11, 16, 22]

In [5]:
multiagent_rag_c1_answer = main(    
    data_folder="../../data",
    file_num_list=file_num_list,
    category_number=1, 
    chunk_size=500, 
    chunk_overlap=100, 
    search_k=10,       
    config_folder="../../config",
    rag_method="multiagent-rag", 
    model_name="gpt-4o", 
    save_graph_png=False
)

## ../../config/multiagent-rag/c1-system-prompt.yaml를 불러왔습니다.
## ../../config/multiagent-rag/c1-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : CAM (Cathode Active Material)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'Stoichiometry information': {'LiNi1/3Co1/3Mn1/3O2': {'Li ratio': 1.0,
                                                       'Ni ratio': 0.33,
                                                       'Co ratio': 0.33,
                                                       'Mn ratio': 0.33,
                                                       'O ratio': 2.0}},
 'Commercial NCM used': {'LiNi1/3Co1/3Mn1/3O2': 'no'},
 'Lithium source': 'LiNO3',
 'Synthesis method': 'solution combustion',
 'Crystallization method': 'calcination',
 'Crystallization final tem

In [6]:
multiagent_rag_c2_answer = main(    
    data_folder="../../data",
    file_num_list=file_num_list,
    category_number=2, 
    chunk_size=500, 
    chunk_overlap=100, 
    search_k=10,       
    config_folder="../../config",
    rag_method="multiagent-rag", 
    model_name="gpt-4o", 
    save_graph_png=False
)

## ../../config/multiagent-rag/c2-system-prompt.yaml를 불러왔습니다.
## ../../config/multiagent-rag/c2-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Electrode (half-cell)
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'Active material to Conductive additive to Binder ratio': 'null',
 'Electrolyte': [{'Salt': 'LiPF6',
                  'Concentration': '1M',
                  'Solvent': 'EC:DMC',
                  'Solvent ratio': '1:1'}],
 'Additive': 'RGO, 5%',
 'Loading density (mass loading of NCM)': 'null',
 'Additional treatment for electrode': 'null'}
##       ./output/json/multiagent-rag/paper_011_output/category-2-paper_011_output-250204162839.json를 저장했습니다.
#####    16번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Electrode (half-ce

In [7]:
multiagent_rag_c3_answer = main(    
    data_folder="../../data",
    file_num_list=file_num_list,
    category_number=3, 
    chunk_size=500, 
    chunk_overlap=100, 
    search_k=10,       
    config_folder="../../config",
    rag_method="multiagent-rag", 
    model_name="gpt-4o", 
    save_graph_png=False
)

## ../../config/multiagent-rag/c3-system-prompt.yaml를 불러왔습니다.
## ../../config/multiagent-rag/c3-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Morphological Properties
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'ParticleSize': {'NCM': '200-300 nm'},
 'ParticleShape': {'NCM': 'faceted morphology, like regular polyhedrons with '
                          'smooth surfaces'},
 'ParticleDistribution': {'NCM': 'uniform distribution with no significant '
                                 'agglomeration'},
 'CoatingLayerCharacteristics': {'NCM-RGO': 'thin RGO sheets wrapped around '
                                            'NCM nanoparticles forming a '
                                            'core-shell-like structure'},
 'CrystalStructureAndLatticeCharacteristics':

In [8]:
multiagent_rag_c4_answer = main(    
    data_folder="../../data",
    file_num_list=file_num_list,
    category_number=4, 
    chunk_size=500, 
    chunk_overlap=100, 
    search_k=10,       
    config_folder="../../config",
    rag_method="multiagent-rag", 
    model_name="gpt-4o", 
    save_graph_png=False
)

## ../../config/multiagent-rag/c4-system-prompt.yaml를 불러왔습니다.
## ../../config/multiagent-rag/c4-question.yaml를 불러왔습니다.
#####    11번째 논문    #####
##       rag method     : multiagent-rag
##       category name  : Cathode Performance
##       paper_011 retriever를 생성했습니다.
##          - chunk_size    :500
##          - chunk_overlap :100
##          - retrieve_k    :10
##       print 11 result
------------------------------------
{'NR0': [{'Voltage range': '2.5-4.3',
          'Temperature': 25,
          'C-rate and Specific capacity': [{'C-rate': 0.1, 'Capacity': 155.3},
                                           {'C-rate': 0.2, 'Capacity': None},
                                           {'C-rate': 0.5, 'Capacity': None},
                                           {'C-rate': 1.0, 'Capacity': 123},
                                           {'C-rate': 2.0, 'Capacity': None},
                                           {'C-rate': 4.0, 'Capacity': None},
                                   

In [9]:
import json
import pandas as pd

### Category1 전처리

In [17]:
rag_method = "multiagent-rag"

output_folder_path = []
for file_num in file_num_list:
    json_file_num = f"00{file_num}"[-3:]
    output_folder_path.append(f"./output/json/{rag_method}/paper_{json_file_num}_output")

In [19]:
output_folder_path[0]

'./output/json/multiagent-rag/paper_011_output'

In [21]:
total_data = {}
for filename in os.listdir(output_folder_path[0]):
    if filename.endswith(".json"):
        json_file_path = os.path.join(output_folder_path[0], filename)
        with open(json_file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        total_data = total_data | data
        

In [24]:
total_data

{'Stoichiometry information': {'LiNi1/3Co1/3Mn1/3O2': {'Li ratio': 1.0,
   'Ni ratio': 0.33,
   'Co ratio': 0.33,
   'Mn ratio': 0.33,
   'O ratio': 2.0}},
 'Commercial NCM used': {'LiNi1/3Co1/3Mn1/3O2': 'no'},
 'Lithium source': 'LiNO3',
 'Synthesis method': 'solution combustion',
 'Crystallization method': 'calcination',
 'Crystallization final temperature': 850,
 'Crystallization final duration (hours)': 15,
 'Doping': None,
 'Coating': 'RGO',
 'Additional treatment': None,
 'Active material to Conductive additive to Binder ratio': 'null',
 'Electrolyte': [{'Salt': 'LiPF6',
   'Concentration': '1M',
   'Solvent': 'EC:DMC',
   'Solvent ratio': '1:1'}],
 'Additive': 'RGO, 5%',
 'Loading density (mass loading of NCM)': 'null',
 'Additional treatment for electrode': 'null',
 'ParticleSize': {'NCM': '200-300 nm'},
 'ParticleShape': {'NCM': 'faceted morphology, like regular polyhedrons with smooth surfaces'},
 'ParticleDistribution': {'NCM': 'uniform distribution with no significant agglo

In [26]:
# pd.DataFrame()
pd.DataFrame(json_data_dict["category1"])

Unnamed: 0,paper_011,paper_016,paper_022
Stoichiometry information,"{'Li(Ni1/3Co1/3Mn1/3)O2': {'Li ratio': 1.0, 'N...","{'NCM': {'Li ratio': 1.0, 'Ni ratio': 0.33, 'C...","{'LiNi0.84Co0.10Mn0.06O2': {'Li ratio': 1.0, '..."
Commercial NCM used,{'Li(Ni1/3Co1/3Mn1/3)O2': 'no'},"{'NCM': 'no', 'NCM/C': 'no'}","{'LiNi0.84Co0.10Mn0.06O2': 'yes', 'V-doped LiN..."
Lithium source,LiNO3,LiAc,LiOH
Synthesis method,solution combustion,sol-gel,solid-state reaction
Crystallization method,calcination,,calcination
Crystallization final temperature,850,850,760
Crystallization final duration (hours),15,10,15
Doping,,,V5+
Coating,RGO,carbon,
Additional treatment,microwave irradiation,,


In [53]:
temp_df = pd.DataFrame(json_data_list[0])

In [54]:
temp_df

Unnamed: 0,Stoichiometry information,Commercial NCM used,Lithium source,Synthesis method,Crystallization method,Crystallization final temperature,Crystallization final duration (hours),Doping,Coating,Additional treatment
NR0,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
NR1,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
NR3,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
NR5,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition


In [46]:
temp_c1_df = temp_df["Stoichiometry information"].apply(pd.Series).join(temp_df.drop(columns=["Stoichiometry information"])).reset_index(names="Sample name")

Unnamed: 0,Sample name,Li ratio,Ni ratio,Co ratio,Mn ratio,O ratio,Commercial NCM used,Lithium source,Synthesis method,Crystallization method,Crystallization final temperature,Crystallization final duration (hours),Doping,Coating,Additional treatment
0,NR0,1.0,0.33,0.33,0.33,2.0,no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
1,NR1,1.0,0.33,0.33,0.33,2.0,no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
2,NR3,1.0,0.33,0.33,0.33,2.0,no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
3,NR5,1.0,0.33,0.33,0.33,2.0,no,LiNO3,solution combustion,calcination,850,15,,,RGO addition


In [26]:
pd.DataFrame(json_data_list[0])["Stoichiometry information"].apply(pd.Series)

Unnamed: 0,Li ratio,Ni ratio,Co ratio,Mn ratio,O ratio
NR0,1.0,0.33,0.33,0.33,2.0
NR1,1.0,0.33,0.33,0.33,2.0
NR3,1.0,0.33,0.33,0.33,2.0
NR5,1.0,0.33,0.33,0.33,2.0


In [27]:
pd.DataFrame(json_data_list[0])

Unnamed: 0,Stoichiometry information,Commercial NCM used,Lithium source,Synthesis method,Crystallization method,Crystallization final temperature,Crystallization final duration (hours),Doping,Coating,Additional treatment
NR0,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
NR1,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
NR3,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
NR5,"{'Li ratio': 1.0, 'Ni ratio': 0.33, 'Co ratio'...",no,LiNO3,solution combustion,calcination,850,15,,,RGO addition
