## Multi Agent RAG
- prompt 업데이트 

In [1]:
from dotenv import load_dotenv
import getpass
import os
from langchain_core.messages import HumanMessage
from langchain_teddynote import logging

from graph_multiagentrag import MultiAgentRAG

In [2]:
def _set_if_undefined(var: str):
    # 주어진 환경 변수가 설정되어 있지 않다면 사용자에게 입력을 요청하여 설정합니다.
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Please provide your {var}")


# OPENAI_API_KEY 환경 변수가 설정되어 있지 않으면 사용자에게 입력을 요청합니다.
_set_if_undefined("OPENAI_API_KEY")
# LANGCHAIN_API_KEY 환경 변수가 설정되어 있지 않으면 사용자에게 입력을 요청합니다.
_set_if_undefined("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Multi-agent Collaboration"

# key 확인
# os.environ["OPENAI_API_KEY"]

In [3]:
# 프로젝트 이름을 입력합니다.
logging.langsmith("Multi-Agent-RAG")

LangSmith 추적을 시작합니다.
[프로젝트명]
Multi-Agent-RAG


In [4]:
# researcher_system_prompt = """You are an expert assistant specializing in extracting information from research papers related to battery technology. Your role is to carefully analyze the provided PDF and extract key data in a structured JSON format. Follow these instructions strictly:

# 1. **Domain-Specific Focus**:
#    - Focus exclusively on content related to battery technology (e.g., materials, synthesis methods, properties, performance metrics).
#    - Ignore irrelevant sections or general references outside the battery-related content.

# 2. **Extraction Guidelines**:
#    - Use the JSON structure provided as a template.
#    - Replace placeholders with values found in the PDF.
#    - If a field is not mentioned in the PDF, write "None" instead of removing it.

# 3. **Clarity and Precision**:
#    - Extract numerical data (e.g., ratios, temperatures, durations) with maximum precision.
#    - For descriptive fields, summarize the relevant information concisely without adding interpretations.

# 4. **Structure Adherence**:
#    - Maintain the given JSON structure and formatting exactly.
#    - Do not modify or rearrange the JSON schema.

# 5. **External Reference Exclusion**:
#    - Only use information from the provided PDF.
#    - Ignore any supplementary information or external references not contained in the PDF.

# Your task is to ensure that the extracted data is complete, accurate, and formatted according to the requirements.

# Below are instructions for filling out items by referring to the examples.
# [ 
#     {
#         "CAM (Cathode Active Material)": {
#             "Stoichiometry information": {
#                 "NCM-622": {
#                     "Li ratio": "1",
#                     "Ni ratio": "0.6",
#                     "Co ratio": "0.2",
#                     "Mn ratio": "0.2",
#                     "O ratio": "2"
#                 },
#                 "ZrO2-NCM-622 (Z622)": {
#                     "Li ratio": "0.98",
#                     "Ni ratio": "0.6",
#                     "Co ratio": "0.2",
#                     "Mn ratio": "0.2",
#                     "O ratio": "2"
#                 }
#             },
#             "Whether or not commercial NCM was used for each sample (Stoichiometry information in order)": [
#                 "yes",
#                 "no"
#             ],
#             "Lithium source": "LiOH",
#             "Synthesis method": "co-precipitation",
#             "Describe the crystallization method, such as Hydrothermal, Sintering, or any other technique used during the process.": "Hydrothermal",
#             "What is the Crystallization final temperature in degree of Celcius used in the process? (e.g., calcination or sintering) mentioned for the crystallization stage.": "100",
#             "What is the time duration for the final crystallization process, including any calcination or sintering stages? Specify the hours.": "12",
#             "Doping": "Zr4+",
#             "Coating": "ZrO2",
#             "Additional treatment": "None"
#         }
#     }
# ]
# """

In [5]:
q1_researcher_system_prompt = """You are an expert assistant specializing in extracting information from research papers related to battery technology. Your role is to carefully analyze the provided PDF and extract key data in a structured JSON format. Follow these instructions strictly:

1. **Domain-Specific Focus**:
   - Focus exclusively on content related to battery technology (e.g., materials, synthesis methods, properties, performance metrics).
   - Ignore irrelevant sections or general references outside the battery-related content.

2. **Extraction Guidelines**:
   - Use the JSON structure provided as a template.
   - Replace placeholders with values found in the PDF.
   - If a field is not mentioned in the PDF, write "None" instead of removing it.

3. **Clarity and Precision**:
   - Extract numerical data (e.g., ratios, temperatures, durations) with maximum precision.
   - For descriptive fields, summarize the relevant information concisely without adding interpretations.

4. **Structure Adherence**:
   - Maintain the given JSON structure and formatting exactly.
   - Do not modify or rearrange the JSON schema.

5. **External Reference Exclusion**:
   - Only use information from the provided PDF.
   - Ignore any supplementary information or external references not contained in the PDF.

You are an answer generator that receives a battery research article document and answers the given questions.
The question provides the content and format that must be included in the answer.
In the question, the values displayed to the right of the colon (":") serve as examples. After reviewing them, delete these examples and rewrite them using the values found in the document. If any item is not mentioned in the document, retain it and write "None" as the value.

Additional Instructions:
Do not directly reproduce the examples in the output.
Strictly follow the prescribed format (JSON).
Do not include titles or information from reference papers mentioned in the document.
Provide information for every NCM sample mentioned in the document.
For "Stoichiometry information":

The ratio of each element must be written as a float type, including decimal points.
If provided in fractional form, convert it to a float type before writing it.
For lithium sources:

If the lithium source used is in hydrate form (e.g., LiOH · H2O), remove the hydrate information and record it in its base form (e.g., LiOH).
For crystallization process:

If there are two or more heat treatment temperatures, record only the final temperature used.

If there are two or more heat treatment durations, record only the final duration used.

Avoid including units in the values.

Do not repeat words like "doping" or "coating" that duplicate the key names.

Below are instructions for filling out items by referring to the examples.
[
  {
    "CAM": {
      "Stoichiometry information": {
        "NCM-622": {
          "Li ratio": 1.0,
          "Ni ratio": 0.6,
          "Co ratio": 0.2,
          "Mn ratio": 0.2,
          "O ratio": 2.0
        },
        "ZrO2-NCM-622 (Z622)": {
          "Li ratio": 0.98,
          "Ni ratio": 0.6,
          "Co ratio": 0.2,
          "Mn ratio": 0.2,
          "O ratio": 2.0
        }
      },
      "Commercial NCM used": {
        "NCM-622": "yes",
        "ZrO2-NCM-622 (Z622)": "no"
      },
      "Lithium source": "LiOH",
      "Synthesis method": "co-precipitation",
      "Crystallization method": "Hydrothermal",
      "Crystallization final temperature": 100,
      "Crystallization final duration (hours)": 12,
      "Doping": "Zr4+",
      "Coating": "ZrO2",
      "Additional treatment": "None"
    }
  }
]
"""

verifier_system_prompt = """You are a meticulous verifier agent specializing in the domain of battery technology.
Your primary task is to check the accuracy of information extracted from research papers on batteries, formatted into JSON by another agent. Your responsibilities include validating the following:

Accuracy:
Cross-check the extracted values against the provided PDF. Ensure every field matches the battery-related content in the PDF accurately.

Completeness:
Confirm that all fields in the JSON structure are either filled with accurate values from the battery-related sections of the PDF or marked as "None" if not mentioned in the document.

Consistency:
Verify that the JSON structure, format, and data types adhere strictly to the required schema for battery-related research data.

Corrections:
Identify and highlight any errors, including inaccurate values, missing data, or structural inconsistencies, providing clear and actionable feedback for correction.
For any issues found, specify:

The field in question.
The nature of the issue (e.g., incorrect value, missing data, formatting error).
Suggestions or corrections to resolve the issue.

Final Output:
If the JSON is entirely correct, confirm its validity and output the JSON structure exactly as provided.
After confirming, you should include the phrase `### Final Output` as a heading before printing the JSON. This ensures the output is clearly marked and easy to locate.

Focus exclusively on battery-related content extracted from the PDF.
Ignore any reference content or information outside the provided document."""

### response

In [6]:
graph = MultiAgentRAG(
    file_folder="../../data/input_data", 
    file_number=8, 
    model_name="gpt-4o", 
    researcher_system_prompt=q1_researcher_system_prompt, 
    verifier_system_prompt=verifier_system_prompt
).graph

result = graph.invoke(
    {
        "messages": [
            HumanMessage(
                content="""Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field, replacing the placeholders. If the information is not mentioned in the PDF, write "None".

[
  {
    "CAM": {
      "Stoichiometry information": {
        "": {
        
        },
        "": {
        
        }
      },
      "Commercial NCM used": {
        "": "",
        "": ""
      },
      "Lithium source": ,
      "Synthesis method": ,
      "Crystallization method": ,
      "Crystallization final temperature": ,
      "Crystallization final duration (hours)": ,
      "Doping": ,
      "Coating": ,
      "Additional treatment": 
    }
  }
]""",
                name="Researcher"  # Ensure the name is valid, here "Researcher" is used
            )
        ]
    }, {"recursion_limit": 30}
)

  functions = [format_tool_to_openai_function(t) for t in tools]
  return prompt | llm.bind_functions(functions)


In [10]:
## 8번 논문 question 4 답변 
import pprint 
pprint.pprint(result["messages"][-1], sort_dicts=False)

{'Cathode Performance(All units should be standardized to mAh/g)': {'0.1C': {'NCM111': '150'},
                                                                    '0.2C': {'NCM811': '200'},
                                                                    '0.5C': {'NCM622': '180'},
                                                                    '1.0C': {'NCM523': 'None'},
                                                                    '2.0C': {'NCM721': 'None'},
                                                                    '4.0C': {'NCM851005': 'None'},
                                                                    '6.0C': {'NCM811': 'None'},
                                                                    'Other C-rates and performance': {'C/5': {'NCM811': '200'}}}}


In [8]:
result["messages"]

[HumanMessage(content='Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field, replacing the placeholders. If the information is not mentioned in the PDF, write "None".\n\n[ \n  {\n    "Cathode Performance(All units should be standardized to mAh/g)": {\n      {\n        "0.1C": {"": },\n        "0.2C": {"": },\n        "0.5C": {"": },\n        "1.0C": {"": },\n        "2.0C": {"": },\n        "4.0C": {"": },\n        "6.0C": {"": },\n        "Other C-rates and performance": {"": }\n      }\n    }\n  }\n]', additional_kwargs={}, response_metadata={}, name='Researcher'),
 HumanMessage(content='', additional_kwargs={'function_call': {'arguments': '{"__arg1":"battery technology discharge capacity C-rate PDF"}', 'name': 'retriever'}, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 1164, 'total_tokens': 1188, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 're