## Multi Agent RAG
- prompt 업데이트 

In [1]:
from dotenv import load_dotenv
import getpass
import os
from langchain_core.messages import HumanMessage
from langchain_teddynote import logging

from graph_multiagentrag import MultiAgentRAG

In [2]:
def _set_if_undefined(var: str):
    # 주어진 환경 변수가 설정되어 있지 않다면 사용자에게 입력을 요청하여 설정합니다.
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Please provide your {var}")


# OPENAI_API_KEY 환경 변수가 설정되어 있지 않으면 사용자에게 입력을 요청합니다.
_set_if_undefined("OPENAI_API_KEY")
# LANGCHAIN_API_KEY 환경 변수가 설정되어 있지 않으면 사용자에게 입력을 요청합니다.
_set_if_undefined("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Multi-agent Collaboration"

# key 확인
# os.environ["OPENAI_API_KEY"]

In [3]:
# 프로젝트 이름을 입력합니다.
logging.langsmith("Multi-Agent-RAG")

LangSmith 추적을 시작합니다.
[프로젝트명]
Multi-Agent-RAG


In [4]:
q3_researcher_system_prompt = """You are an expert assistant specializing in extracting information from research papers related to battery technology. Your role is to carefully analyze the provided PDF and extract key data in a structured JSON format. Follow these instructions strictly:

1. **Domain-Specific Focus**:
    - Focus exclusively on content related to battery technology (e.g., materials, synthesis methods, properties, performance metrics).
    - Ignore irrelevant sections or general references outside the battery-related content.

2. **Extraction Guidelines**:
    - Use the JSON structure provided as a template.
    - Replace placeholders with values found in the PDF.
    - If a field is not mentioned in the PDF, write "-" instead of removing it.

3. **Clarity and Precision**:
    - Extract numerical data (e.g., ratios, temperatures, durations) with maximum precision.
    - For descriptive fields, summarize the relevant information concisely without adding interpretations.

4. **Structure Adherence**:
    - Maintain the given JSON structure and formatting exactly.
    - Do not modify or rearrange the JSON schema.

5. **External Reference Exclusion**:
    - Only use information from the provided PDF.
    - Ignore any supplementary information or external references not contained in the PDF.

6. **Morphology Analysis**:
    - You must find the morphology results of NCM from the document and respond to the given questions.
    - Review all parts of the document related to SEM and TEM.
    - Answer about the NCM particle’s size, distribution, shape, and coating layer characteristics, crystal structure, and lattice characteristics.
    - Ensure to specify which sample the explanation is for and explicitly exclude any precursor-related details.
    - If no answer can be found despite thorough review, use the value "-".

7. **Example Answer Guidance**:
    - The given question provides an example, and its format consists of keys and values.
    - When generating answers, the values must be rewritten as responses based on the document, ensuring that the example answers are not directly reproduced in the output.

Your task is to ensure that the extracted data is complete, accurate, and formatted according to the requirements.

Below are instructions for filling out items by referring to the examples.
[ 
    {
        "MorphologicalProperties": {
            "ParticleSize": {
                "SC-NCM": "micron-sized particles, which are about 2μm in diameter",
                "N-NCM": "secondary particles are 10-13μm in size"
            },
            "ParticleShape": {
                "SC-NCM": "smooth surfaces",
                "N-NCM": "-"
            },
            "ParticleDistribution": {
                "SC-NCM": "-",
                "N-NCM": "composed of agglomerated primary particles"
            },
            "CoatingLayerCharacteristics": {
                "NCM-RGO": "RGO appears as thin carbon sheets with wrinkled surfaces and folded structures"
            },
            "CrystalStructureAndLatticeCharacteristics": {
                "SC-NCM": "uniform and clear lattice stripes with a crystal plane spacing of about 0.474 nm, corresponding to the (003) crystal plane of the layered structure",
                "WN92": "a layered structure with a space group of R-3m"
            }
        }
    }
]
"""

In [5]:
verifier_system_prompt = """You are a meticulous verifier agent specializing in the domain of battery technology.
Your primary task is to check the accuracy of information extracted from research papers on batteries, formatted into JSON by another agent. Your responsibilities include validating the following:

Accuracy:
Cross-check the extracted values against the provided PDF. Ensure every field matches the battery-related content in the PDF accurately.

Completeness:
Confirm that all fields in the JSON structure are either filled with accurate values from the battery-related sections of the PDF or marked as "None" if not mentioned in the document.

Consistency:
Verify that the JSON structure, format, and data types adhere strictly to the required schema for battery-related research data.

Corrections:
Identify and highlight any errors, including inaccurate values, missing data, or structural inconsistencies, providing clear and actionable feedback for correction.
For any issues found, specify:

The field in question.
The nature of the issue (e.g., incorrect value, missing data, formatting error).
Suggestions or corrections to resolve the issue.

Final Output:
If the JSON is entirely correct, confirm its validity and output the JSON structure exactly as provided.
After confirming, you should include the phrase `### Final Output` as a heading before printing the JSON. This ensures the output is clearly marked and easy to locate.

Focus exclusively on battery-related content extracted from the PDF.
Ignore any reference content or information outside the provided document."""

### response

In [6]:
q3_graph = MultiAgentRAG(
    file_folder="../../data/input_data", 
    file_number=33, 
    model_name="gpt-4o", 
    researcher_system_prompt=q3_researcher_system_prompt, 
    verifier_system_prompt=verifier_system_prompt
).graph

q3_result = q3_graph.invoke(
    {
        "messages": [
            HumanMessage(
                content="""Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field, replacing the placeholders. If the information is not mentioned in the PDF, write "None".

[ 
    {
        "MorphologicalProperties": {
            "ParticleSize": {
            
            },
            "ParticleShape": {
            
            },
            "ParticleDistribution": {
            
            },
            "CoatingLayerCharacteristics": {
                
            },
            "CrystalStructureAndLatticeCharacteristics": {
            
            }
        }
    }
]""",
                name="Researcher"  # Ensure the name is valid, here "Researcher" is used
            )
        ]
    }, {"recursion_limit": 30}
)

  functions = [format_tool_to_openai_function(t) for t in tools]
  return prompt | llm.bind_functions(functions)


In [7]:
## 8번 논문 question 1 답변 
import pprint 
pprint.pprint(q3_result["messages"][-1], sort_dicts=False)

{'MorphologicalProperties': {'ParticleSize': {'SC-NCM': 'None',
                                              'N-NCM': 'secondary particles '
                                                       'are spherical in shape '
                                                       'having a diameter of '
                                                       '5−15 μm'},
                             'ParticleShape': {'SC-NCM': 'None',
                                               'N-NCM': 'spherical'},
                             'ParticleDistribution': {'SC-NCM': 'None',
                                                      'N-NCM': 'composed of '
                                                               'submicron and '
                                                               'nanosized '
                                                               'primary '
                                                               'particles'},
                             'CoatingLa