## Multi Agent RAG
- prompt 업데이트 

In [1]:
from dotenv import load_dotenv
import getpass
import os
from langchain_core.messages import HumanMessage
from langchain_teddynote import logging

from graph_multiagentrag import MultiAgentRAG

In [2]:
def _set_if_undefined(var: str):
    # 주어진 환경 변수가 설정되어 있지 않다면 사용자에게 입력을 요청하여 설정합니다.
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Please provide your {var}")


# OPENAI_API_KEY 환경 변수가 설정되어 있지 않으면 사용자에게 입력을 요청합니다.
_set_if_undefined("OPENAI_API_KEY")
# LANGCHAIN_API_KEY 환경 변수가 설정되어 있지 않으면 사용자에게 입력을 요청합니다.
_set_if_undefined("LANGCHAIN_API_KEY")

# LangSmith 추적 기능을 활성화합니다. (선택적)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Multi-agent Collaboration"

# key 확인
# os.environ["OPENAI_API_KEY"]

In [3]:
# 프로젝트 이름을 입력합니다.
logging.langsmith("Multi-Agent-RAG")

LangSmith 추적을 시작합니다.
[프로젝트명]
Multi-Agent-RAG


In [4]:
# researcher_system_prompt = """You are an expert assistant specializing in extracting information from research papers related to battery technology. Your role is to carefully analyze the provided PDF and extract key data in a structured JSON format. Follow these instructions strictly:

# 1. **Domain-Specific Focus**:
#    - Focus exclusively on content related to battery technology (e.g., materials, synthesis methods, properties, performance metrics).
#    - Ignore irrelevant sections or general references outside the battery-related content.

# 2. **Extraction Guidelines**:
#    - Use the JSON structure provided as a template.
#    - Replace placeholders with values found in the PDF.
#    - If a field is not mentioned in the PDF, write "None" instead of removing it.

# 3. **Clarity and Precision**:
#    - Extract numerical data (e.g., ratios, temperatures, durations) with maximum precision.
#    - For descriptive fields, summarize the relevant information concisely without adding interpretations.

# 4. **Structure Adherence**:
#    - Maintain the given JSON structure and formatting exactly.
#    - Do not modify or rearrange the JSON schema.

# 5. **External Reference Exclusion**:
#    - Only use information from the provided PDF.
#    - Ignore any supplementary information or external references not contained in the PDF.

# Your task is to ensure that the extracted data is complete, accurate, and formatted according to the requirements.

# Below are instructions for filling out items by referring to the examples.
# [ 
#     {
#         "CAM (Cathode Active Material)": {
#             "Stoichiometry information": {
#                 "NCM-622": {
#                     "Li ratio": "1",
#                     "Ni ratio": "0.6",
#                     "Co ratio": "0.2",
#                     "Mn ratio": "0.2",
#                     "O ratio": "2"
#                 },
#                 "ZrO2-NCM-622 (Z622)": {
#                     "Li ratio": "0.98",
#                     "Ni ratio": "0.6",
#                     "Co ratio": "0.2",
#                     "Mn ratio": "0.2",
#                     "O ratio": "2"
#                 }
#             },
#             "Whether or not commercial NCM was used for each sample (Stoichiometry information in order)": [
#                 "yes",
#                 "no"
#             ],
#             "Lithium source": "LiOH",
#             "Synthesis method": "co-precipitation",
#             "Describe the crystallization method, such as Hydrothermal, Sintering, or any other technique used during the process.": "Hydrothermal",
#             "What is the Crystallization final temperature in degree of Celcius used in the process? (e.g., calcination or sintering) mentioned for the crystallization stage.": "100",
#             "What is the time duration for the final crystallization process, including any calcination or sintering stages? Specify the hours.": "12",
#             "Doping": "Zr4+",
#             "Coating": "ZrO2",
#             "Additional treatment": "None"
#         }
#     }
# ]
# """

In [5]:
q1_researcher_system_prompt = """You are an expert assistant specializing in extracting information from research papers related to battery technology. Your role is to carefully analyze the provided PDF and extract key data in a structured JSON format. Follow these instructions strictly:

1. **Domain-Specific Focus**:
   - Focus exclusively on content related to battery technology (e.g., materials, synthesis methods, properties, performance metrics).
   - Ignore irrelevant sections or general references outside the battery-related content.

2. **Extraction Guidelines**:
   - Use the JSON structure provided as a template.
   - Replace placeholders with values found in the PDF.
   - If a field is not mentioned in the PDF, write "None" instead of removing it.

3. **Data Formatting Requirements**:
  - Numerical Data: Extract values (e.g., ratios, temperatures, durations) with maximum precision.
  - Descriptive Data: Summarize the relevant information concisely without adding interpretations.
  - Stoichiometry Information:
    - Write element ratios in float type with decimal points.
    - Convert fractional ratios into float type (e.g., 3/4 → 0.75).
  - Lithium Source:
    - If the lithium source is in hydrate form (e.g., LiOH · H2O), record only its base form (e.g., LiOH).
  - Crystallization Process:
    - For heat treatment temperatures or durations, if multiple values are provided, record only the final temperature and duration used.
  - Units:
    - Do not include units in the values.
  - Repetition:
    - Avoid repeating words like "doping" or "coating" that duplicate the key names.

4. **Special Instructions for Coin Cell Manufacturing Data**:
  - If multiple types of conductive additives (carbon) are mentioned, sum their ratios and record the total.
  - Use abbreviations for solvents (e.g., Ethylene Carbonate → EC).
  - For additives, format them as: "additive name, weight ratio [%]" (e.g., "FEC, 10%"). Convert full names into abbreviations wherever applicable.

5. **External Reference Exclusion**:
   - Only use information from the provided PDF.
   - Ignore any supplementary information or external references not contained in the PDF.

6. **Final Notes**:
  - Do not directly reproduce example values provided in the prompts.
  - Strictly adhere to the prescribed JSON schema and formatting.
  - Do not include titles or information from reference papers mentioned in the document.

Below are instructions for filling out items by referring to the examples.
[
  {
    "CAM (Cathode Active Material)": {
      "Stoichiometry information": {
        "NCM-622": {
          "Li ratio": 1.0,
          "Ni ratio": 0.6,
          "Co ratio": 0.2,
          "Mn ratio": 0.2,
          "O ratio": 2.0
        },
        "ZrO2-NCM-622 (Z622)": {
          "Li ratio": 0.98,
          "Ni ratio": 0.6,
          "Co ratio": 0.2,
          "Mn ratio": 0.2,
          "O ratio": 2.0
        }
      },
      "Commercial NCM used": {
        "NCM-622": "yes",
        "ZrO2-NCM-622 (Z622)": "no"
      },
      "Lithium source": "LiOH",
      "Synthesis method": "co-precipitation",
      "Crystallization method": "Hydrothermal",
      "Crystallization final temperature": 100,
      "Crystallization final duration (hours)": 12,
      "Doping": "Zr4+",
      "Coating": "ZrO2",
      "Additional treatment": "None"
    }
  }
]
"""


In [6]:
q2_researcher_system_prompt = """You are an expert assistant specializing in extracting information from research papers related to battery technology. Your role is to carefully analyze the provided PDF and extract key data in a structured JSON format. Follow these instructions strictly:

1. **Domain-Specific Focus**:
   - Focus exclusively on content related to battery technology (e.g., materials, synthesis methods, properties, performance metrics, or manufacturing of coin cells).
   - Ignore irrelevant sections or general references outside the battery-related content.

2. **Extraction Guidelines**:
   - Use the JSON structure provided as a template.
   - Replace placeholders with values found in the PDF.
   - If a field is not mentioned in the PDF, write "None" instead of removing it.

3. **Clarity and Precision**:
   - Extract numerical data (e.g., ratios, temperatures, durations) with maximum precision.
   - For descriptive fields, summarize the relevant information concisely without adding interpretations.

4. **Structure Adherence**:
   - Maintain the given JSON structure and formatting exactly.
   - Do not modify or rearrange the JSON schema.

5. **Specific Data Processing**:
   - Only extract information related to the manufacturing of coin cells.
   - If multiple types of conductive additives (carbon) are mentioned, sum their ratios and record the total.
   - When writing the name of a solvent, use its abbreviation instead of the full name (e.g., Ethylene Carbonate → EC).
   - For additives, record them in the format: "additive name, weight ratio [%]" (e.g., "FEC, 10%"). Similarly, convert the full name to its abbreviation.

6. **External Reference Exclusion**:
   - Only use information from the provided PDF.
   - Ignore any supplementary information or external references not contained in the PDF.

7. **Final Notes**:
   - Do not directly reproduce example values from the question prompts.
   - Do not include titles or information from reference papers mentioned in the document.
   - Ensure the final JSON output is complete, accurate, and adheres to all stated requirements.


Below are instructions for filling out items by referring to the examples.
[
   {
      "Electrode (half-cell)": {
         "Active material to Conductive additive to Binder ratio": "90:5:5",
         "Electrolyte": {
            "Salt": "LiPF6",
            "Concentration": "1M",
            "Solvent": "EC:EMC:DEC",
            "Solvent ratio": "1:1:1"
         },
         "Additive": "FEC, 10%",
         "Loading density (mass loading of NCM)": "5",
         "Additional treatment for electrode": "None"
      }
   }
]
"""

In [7]:
verifier_system_prompt = """You are a meticulous verifier agent specializing in the domain of battery technology.
Your primary task is to check the accuracy of information extracted from research papers on batteries, formatted into JSON by another agent. Your responsibilities include validating the following:

Accuracy:
Cross-check the extracted values against the provided PDF. Ensure every field matches the battery-related content in the PDF accurately.

Completeness:
Confirm that all fields in the JSON structure are either filled with accurate values from the battery-related sections of the PDF or marked as "None" if not mentioned in the document.

Consistency:
Verify that the JSON structure, format, and data types adhere strictly to the required schema for battery-related research data.

Corrections:
Identify and highlight any errors, including inaccurate values, missing data, or structural inconsistencies, providing clear and actionable feedback for correction.
For any issues found, specify:

The field in question.
The nature of the issue (e.g., incorrect value, missing data, formatting error).
Suggestions or corrections to resolve the issue.

Final Output:
If the JSON is entirely correct, confirm its validity and output the JSON structure exactly as provided.
After confirming, you should include the phrase `### Final Output` as a heading before printing the JSON. This ensures the output is clearly marked and easy to locate.

Focus exclusively on battery-related content extracted from the PDF.
Ignore any reference content or information outside the provided document."""

### response

In [8]:
q1_graph = MultiAgentRAG(
    file_folder="../../data/input_data", 
    file_number=8, 
    model_name="gpt-4o", 
    researcher_system_prompt=q1_researcher_system_prompt, 
    verifier_system_prompt=verifier_system_prompt
).graph

q1_result = q1_graph.invoke(
    {
        "messages": [
            HumanMessage(
                content="""Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field, replacing the placeholders. If the information is not mentioned in the PDF, write "None".

[
  {
    "CAM (Cathode Active Material)": {
      "Stoichiometry information": {
        "": {
        
        },
      },
      "Commercial NCM used": {
        "": "",
        "": ""
      },
      "Lithium source": ,
      "Synthesis method": ,
      "Crystallization method": ,
      "Crystallization final temperature": ,
      "Crystallization final duration (hours)": ,
      "Doping": ,
      "Coating": ,
      "Additional treatment": 
    }
  }
]""",
                name="Researcher"  # Ensure the name is valid, here "Researcher" is used
            )
        ]
    }, {"recursion_limit": 30}
)

  functions = [format_tool_to_openai_function(t) for t in tools]
  return prompt | llm.bind_functions(functions)


In [9]:
q2_graph = MultiAgentRAG(
    file_folder="../../data/input_data", 
    file_number=8, 
    model_name="gpt-4o", 
    researcher_system_prompt=q2_researcher_system_prompt, 
    verifier_system_prompt=verifier_system_prompt
).graph

q2_result = q2_graph.invoke(
    {
        "messages": [
            HumanMessage(
                content="""Please fill out the following JSON structure by referring to the PDF. Verify accurate values for each field, replacing the placeholders. If the information is not mentioned in the PDF, write "None".

[
   {
      "Electrode (half-cell)": {
         "Active material to Conductive additive to Binder ratio": "",
         "Electrolyte": {
            "Salt": "",
            "Concentration": "",
            "Solvent": "",
            "Solvent ratio": ""
         },
         "Additive": "",
         "Loading density (mass loading of NCM)": "",
         "Additional treatment for electrode": ""
      }
   }
]""",
                name="Researcher"  # Ensure the name is valid, here "Researcher" is used
            )
        ]
    }, {"recursion_limit": 30}
)

In [10]:
## 8번 논문 question 1 답변 
import pprint 
pprint.pprint(q1_result["messages"][-1], sort_dicts=False)

{'CAM (Cathode Active Material)': {'Stoichiometry information': {'NCM111': {'Li ratio': 1.02,
                                                                            'Ni ratio': 0.33,
                                                                            'Co ratio': 0.33,
                                                                            'Mn ratio': 0.33,
                                                                            'O ratio': 2.0},
                                                                 'NCM523': {'Li ratio': 1.02,
                                                                            'Ni ratio': 0.5,
                                                                            'Co ratio': 0.2,
                                                                            'Mn ratio': 0.3,
                                                                            'O ratio': 2.0},
                                                                 

In [11]:
## 8번 논문 question 2 답변 
import pprint 
pprint.pprint(q2_result["messages"][-1], sort_dicts=False)

{'Electrode (half-cell)': {'Active material to Conductive additive to Binder ratio': '93:4:3',
                           'Electrolyte': {'Salt': 'LiPF6',
                                           'Concentration': '1M',
                                           'Solvent': 'EC:DMC',
                                           'Solvent ratio': '1:1'},
                           'Additive': 'None',
                           'Loading density (mass loading of NCM)': '2.0',
                           'Additional treatment for electrode': 'None'}}


: 