# 1. Install AG2 + Materials Project dependencies

In [30]:
!pip install -U "ag2[openai]" autogen -q

import sys
!{sys.executable} -m pip install pymatgen mp-api numpy cython ipywidgets jupyterlab_widgets -q

print("✓ AG2/autogen + Pymatgen + MP-API installed.")

✓ AG2/autogen + Pymatgen + MP-API installed.


# 2. Imports + LLM Configuration


In [31]:
import os
import json
import random
from datetime import datetime, timedelta
from typing import Annotated, Any, Literal, Union, List, Optional, Tuple

from autogen import ConversableAgent, LLMConfig
from autogen.agentchat import ReplyResult
from autogen.agentchat.group import (
    ContextVariables,
    AgentTarget, AgentNameTarget, StayTarget,
    OnCondition, StringLLMCondition,
    OnContextCondition, ExpressionContextCondition, ContextExpression,
    RevertToUserTarget, TerminateTarget
)
from autogen.agentchat.groupchat import GroupChat, GroupChatManager
from autogen.tools import tool

from mp_api.client import MPRester

from autogen.coding.local_commandline_code_executor import LocalCommandLineCodeExecutor
from autogen.coding.base import CodeBlock
from pydantic import BaseModel, Field

from autogen.agentchat.group.patterns import DefaultPattern
from autogen.agentchat import initiate_group_chat

# LLM configuration
llm_config = LLMConfig(
    config_list=[
        {
            "model": "gpt-4o",
            "api_key": os.environ["OPENAI_API_KEY"],
        }
    ],
)
print("✓ Imports loaded and LLM configured.")

✓ Imports loaded and LLM configured.


In [3]:
import autogen
print(autogen.__version__)

0.10.2


# 3. User query + Context Variables

In [32]:
from autogen.agentchat.group import ContextVariables

# Context variables (empty initial state — A will fill them)
context_variables = ContextVariables(
    data={
        "task_started": False,
        "query": None,                
        "query_explanation": None,      
        "search_criteria": None,       
        "fields": None,                 
        "sample_number": None,         
        "mp_results": None,                
        "next_agent": None,
    }
)

print("✓ ContextVariables correctly initialized for AG2 multi-agent pipeline.")

✓ ContextVariables correctly initialized for AG2 multi-agent pipeline.


# 4. TOOLS (A → B → C → D)

In [33]:
# Tool A — explain_query

def explain_query_tool(
    query: Annotated[str, "initial query AS provided by the user without modification."],
    query_explanation: Annotated[str, "An explanation of the query, including a breakdown of key and secondary terms, 2-3 sentences."],
    context_variables: ContextVariables,
) -> ReplyResult:
    """Explain the posed query. What does it ask? Include a breakdown of the key and secondary terms in the query."""

    # Basic validation
    try:
        if not isinstance(query, str):
            raise ValueError("The 'query' parameter must be a string.")
        if len(query.strip()) == 0:
            raise ValueError("Query cannot be empty.")

        if not isinstance(query_explanation, str) or len(query_explanation.strip()) == 0:
            raise ValueError("The 'query_explanation' parameter must be a non-empty string.")

   
    except ValueError as e:
        target_agent = AgentNameTarget("AgentA_Explainer")
        return ReplyResult(
            message=f"Error in explain_query_tool: {e}",
            target=target_agent,
            context_variables=context_variables,
        )

    # Build explanation
    explanation = (
        f"Explanation of user query: '{query}'\n\n"
        f"- Query explanation: {query_explanation}"
    )

    # Update context
    context_variables["task_started"] = True
    context_variables["query"] = query                         
    context_variables["query_explanation"] = query_explanation
    context_variables["explained_terms"] = query_explanation
    context_variables["next_agent"] = "AgentB_MaterialsRetriever"

    # Set next agent
    target_agent = AgentNameTarget("AgentB_MaterialsRetriever")

    return ReplyResult(
        message=explanation,
        target=target_agent,
        context_variables=context_variables,
    )

# Tool B — download_materials_structures_properties_from_mp

def material_retiever(
    search_criteria: Annotated[dict, "Filtering conditions used to select candidate materials."],
    fields: Annotated[List[str], "List of metadata fields to retrieve for each material."],
    sample_number: Annotated[int, "Number of materials to randomly sample and download from the filtered set."],
    context_variables: ContextVariables,
) -> ReplyResult:
    """Retrieve materials from Materials Project using search_criteria, fields, and sample_number."""

    # 0) Basic input validation
    try:
        if not isinstance(search_criteria, dict):
            raise ValueError("search_criteria must be a dictionary.")
        if not isinstance(fields, list) or not all(isinstance(f, str) for f in fields):
            raise ValueError("fields must be a list of strings.")
        if not isinstance(sample_number, int) or sample_number <= 0:
            raise ValueError("sample_number must be a positive integer.")
    except ValueError as e:
        target_agent = AgentNameTarget("AgentB_MaterialsRetriever")
        return ReplyResult(
            message=f"Error in material_retiever: {e}",
            target=target_agent,
            context_variables=context_variables,
        )

    # 0b) Whitelists
    allowed_search_keys = {
        "band_gap",
        "energy_above_hull",
        "density",
        "num_sites",
        "k_voigt",
        "g_voigt",
        "elements",
        "chemsys",
    }
    allowed_fields = {
        "material_id",
        "formula_pretty",
        "band_gap",
        "energy_above_hull",
        "density",
        "volume",
        "symmetry",
        "nsites",
        "elements",
        "chemsys",
        "is_stable",
        "bulk_modulus",
        "shear_modulus",
    }

    try:
        bad_keys = [k for k in search_criteria.keys() if k not in allowed_search_keys]
        if bad_keys:
            raise ValueError(
                f"Unsupported search_criteria key(s): {bad_keys}. "
                f"Allowed keys: {sorted(list(allowed_search_keys))}"
            )

        bad_fields = [f for f in fields if f not in allowed_fields]
        if bad_fields:
            raise ValueError(
                f"Unsupported field(s): {bad_fields}. "
                f"Allowed fields: {sorted(list(allowed_fields))}"
            )
    except ValueError as e:
        target_agent = AgentNameTarget("AgentB_MaterialsRetriever")
        context_variables["next_agent"] = "AgentB_MaterialsRetriever"
        return ReplyResult(
            message=f"Error in material_retiever: {e}",
            target=target_agent,
            context_variables=context_variables,
        )

    # 1) Normalize aliases
    alias_map = {
        "num_sites": "nsites",
        "k_voigt": "bulk_modulus",
        "g_voigt": "shear_modulus",
        "pretty_formula": "formula_pretty",
        "excluded_elements": "exclude_elements",
    }

    # 1b) Normalize numeric range filters: (min, max) -> {"$gte": min, "$lte": max}
    def _normalize_value(v):
        if isinstance(v, tuple) and len(v) == 2:
            vmin, vmax = v
            out = {}
            if vmin is not None:
                out["$gte"] = vmin
            if vmax is not None:
                out["$lte"] = vmax
            return out
        return v

    normalized_criteria = {}
    for key, value in search_criteria.items():
        mapped_key = alias_map.get(key, key)
        normalized_criteria[mapped_key] = _normalize_value(value)

    # 2) Query Materials Project
    api_key = os.getenv("MP_API_KEY")
    try:
        if not api_key:
            raise ValueError("Missing MP_API_KEY environment variable.")
    except ValueError as e:
        target_agent = AgentNameTarget("AgentB_MaterialsRetriever")
        return ReplyResult(
            message=f"Error in material_retiever: {e}",
            target=target_agent,
            context_variables=context_variables,
        )

    try:
        with MPRester(api_key) as mpr:
            all_results = list(
                mpr.materials.summary.search(
                    fields=fields,
                    **normalized_criteria,
                )
            )
    except Exception as e:
        return ReplyResult(
            message=f"Error in material_retiever while querying Materials Project: {e}",
            target=AgentNameTarget("AgentB_MaterialsRetriever"),
            context_variables=context_variables,
        )

    # 3) Handle empty result case
    if not all_results:
        message = (
            "No materials found for the given search_criteria. "
            "This may be due to a typo/inconsistency or because no materials satisfy the constraints. "
            "Please adapt the filters while keeping the main constraints in place."
        )
        context_variables["mp_results"] = []
        context_variables["search_criteria"] = search_criteria
        context_variables["fields"] = fields
        context_variables["sample_number"] = sample_number
        context_variables["next_agent"] = "AgentB_MaterialsRetriever"

        return ReplyResult(
            message=message,
            target=AgentNameTarget("AgentB_MaterialsRetriever"),
            context_variables=context_variables,
        )

    # 4) Sampling
    if sample_number < len(all_results):
        results = random.sample(all_results, sample_number)
    else:
        results = all_results

    # 5) Store results in context_variables
    context_variables["search_criteria"] = search_criteria
    context_variables["fields"] = fields
    context_variables["sample_number"] = sample_number
    context_variables["mp_results"] = results
    context_variables["next_agent"] = "AgentC_Analyzer"

    message = (
        f"Retrieved {len(results)} materials from Materials Project "
        f"with search_criteria={search_criteria}. "
        "Results stored in context_variables['mp_results'] for further analysis."
    )

    target_agent = AgentNameTarget("AgentC_Analyzer")

    return ReplyResult(
        message=message,
        target=target_agent,
        context_variables=context_variables,
    )


# Tool C — final_conclusion_tool (Analyzer)

def final_conclusion_tool(
    context_variables: ContextVariables,
) -> ReplyResult:
    """Build a data-grounded structured analysis and store it in context_variables['final_conclusion']."""

    results = context_variables.get("mp_results", None)

    if not results:
        target_agent = AgentNameTarget("AgentC_Analyzer")
        return ReplyResult(
            message='{"error":"No materials available in context_variables[\\"mp_results\\"] to analyze."}',
            target=target_agent,
            context_variables=context_variables,
        )

    query_explanation = context_variables.get("query_explanation", "")

    def infer_application(band_gap):
        if band_gap is None:
            return "Band gap not available; cannot infer electronic/optical application domain."
        try:
            bg = float(band_gap)
        except Exception:
            return "Band gap not numeric; cannot infer electronic/optical application domain."

        if bg < 0.1:
            return "Likely metallic or effectively gapless; candidate for conductive applications (verify is_metal if available)."
        if 0.1 <= bg < 1.0:
            return "Narrow-gap semiconductor; IR/thermal-sensitive electronics possible depending on stability and structure."
        if 1.0 <= bg < 3.0:
            return "Semiconductor regime; general electronics or optoelectronics possible depending on direct/indirect gap (not provided)."
        if 3.0 <= bg <= 6.0:
            return "Wide-bandgap regime; candidates for power electronics, high-field devices, and UV optoelectronics depending on other properties."
        return "Very wide band gap; likely insulating behavior and potential deep-UV/insulating applications."

    def assess_stability(e_above_hull):
        if e_above_hull is None:
            return "Energy above hull not available; stability cannot be assessed from this dataset."
        try:
            eah = float(e_above_hull)
        except Exception:
            return "Energy above hull not numeric; stability cannot be assessed reliably."

        if eah <= 1e-6:
            return "Stable (energy_above_hull ~ 0); higher likelihood of equilibrium synthesizability."
        if 0.0 < eah <= 0.05:
            return "Near-stable (<= 0.05 eV/atom); potentially synthesizable with suitable conditions."
        if 0.05 < eah <= 0.2:
            return "Metastable (0.05–0.2 eV/atom); synthesis may be nontrivial and condition-dependent."
        return "Likely unstable under equilibrium (> 0.2 eV/atom); lower prioritization unless strong motivation."

    def density_note(density):
        if density is None:
            return "Density not available; cannot provide even qualitative structural/handling hints."
        try:
            d = float(density)
        except Exception:
            return "Density not numeric; cannot provide qualitative structural/handling hints."

        if d < 2.5:
            return "Relatively low density; could be advantageous for weight-sensitive contexts (mechanical suitability unknown)."
        if 2.5 <= d <= 6.0:
            return "Moderate density; typical for many ceramics/oxides/intermetallics (mechanical suitability unknown)."
        return "High density; may correlate with heavy-element content and higher mass per volume (mechanical suitability unknown)."

    materials = []

    for entry in results:
        def get(k):
            if isinstance(entry, dict):
                return entry.get(k)
            return getattr(entry, k, None)

        material_id = get("material_id")
        formula_pretty = get("formula_pretty") or get("pretty_formula")

        band_gap = get("band_gap")
        density = get("density")
        volume = get("volume")
        e_above_hull = get("energy_above_hull") or get("e_above_hull")

        limitations = []
        if band_gap is None:
            limitations.append("band_gap missing -> application inference limited.")
        if e_above_hull is None:
            limitations.append("energy_above_hull missing -> stability assessment limited.")
        if density is None:
            limitations.append("density missing -> only limited physical/handling hints.")
        if volume is None:
            limitations.append("volume missing -> no volumetric comparison possible.")
        limitations.append("No direct/indirect gap, elastic moduli, thermal conductivity, or toxicity data in retrieved fields -> avoid over-claiming.")

        materials.append(
            {
                "material_id": str(material_id) if material_id is not None else None,
                "formula_pretty": str(formula_pretty) if formula_pretty is not None else None,
                "key_properties": {
                    "band_gap": band_gap,
                    "energy_above_hull": e_above_hull,
                    "density": density,
                    "volume": volume,
                },
                "stability_assessment": assess_stability(e_above_hull),
                "application_inference": infer_application(band_gap),
                "limitations_and_unknowns": limitations,
                "density_note": density_note(density),
            }
        )

    def score_candidate(m):
        eah = m["key_properties"].get("energy_above_hull")
        bg = m["key_properties"].get("band_gap")
        try:
            eah_v = float(eah) if eah is not None else 1e9
        except Exception:
            eah_v = 1e9
        try:
            bg_v = float(bg) if bg is not None else 0.0
        except Exception:
            bg_v = 0.0

        stability_score = -eah_v
        info_score = 0.0
        if bg is not None:
            info_score += 0.2
        if eah is not None:
            info_score += 0.2
        if m["key_properties"].get("density") is not None:
            info_score += 0.1

        return stability_score + info_score

    ranked = sorted(materials, key=score_candidate, reverse=True)
    best_candidates = [m["material_id"] for m in ranked[:2] if m.get("material_id") is not None]

    output = {
        "query_explanation": query_explanation,
        "materials": materials,
        "overall_recommendation": {
            "best_candidates": best_candidates,
            "rationale": [
                "Ranking prioritizes lower energy_above_hull (stability) and availability of key fields for data-grounded inference.",
                "Application inference is derived primarily from band_gap regimes; adjust retrieval fields if you need direct/indirect gap, is_metal, or transport properties.",
            ],
        },
    }

    context_variables["final_conclusion"] = output
    context_variables["next_agent"] = "AgentD_Coder"

    project_folder = os.environ.get("PROJECT_FOLDER", "ag2_project")
    os.makedirs(project_folder, exist_ok=True)

    with open(os.path.join(project_folder, "materials_data.json"), "w") as f:
        json.dump(output, f, indent=2, default=str)

    message = json.dumps(output, indent=2, default=str)

    return ReplyResult(
        message=message,
        target=AgentNameTarget("AgentD_Coder"),
        context_variables=context_variables,
    )


# Tool D — python_coder

project_folder = os.path.abspath("ag2_project")
os.makedirs(project_folder, exist_ok=True)
os.environ["PROJECT_FOLDER"] = project_folder


class PythonCode(BaseModel):
    code: str = Field(..., description="Full python code executed by the coder agent")


def _safe_filename(name: str) -> str:
    name = os.path.basename(name.strip())
    if len(name) == 0:
        return "script.py"
    if not name.endswith(".py"):
        name = name + ".py"
    return name


def python_coder_tool(
    code: Annotated[str, "A single block of Python code to execute."],
    file_name: Annotated[str, "Name of the code file to store the executed script, e.g., 'script.py'"],
    context_variables: ContextVariables,
) -> ReplyResult:
    """Execute Python code, save it to ag2_project/<file_name>, update a persistent JSON context, and return the execution output."""

    try:
        if not isinstance(code, str) or len(code.strip()) == 0:
            raise ValueError("The 'code' parameter must be a non-empty string.")
        if not isinstance(file_name, str) or len(file_name.strip()) == 0:
            raise ValueError("The 'file_name' parameter must be a non-empty string.")
    except ValueError as e:
        return ReplyResult(
            message=f"Error in python_coder_tool: {e}",
            target=AgentNameTarget("AgentD_Coder"),
            context_variables=context_variables,
        )

    safe_name = _safe_filename(file_name)

    runtime_prelude = (
        "import os\n"
        "PROJECT_FOLDER = os.environ.get('PROJECT_FOLDER', 'ag2_project')\n"
        "os.makedirs(PROJECT_FOLDER, exist_ok=True)\n"
        "os.chdir(PROJECT_FOLDER)\n"
    )

    full_code = "#!/usr/bin/env python3\n" + runtime_prelude + "\n" + code
    python_code_model = PythonCode(code=full_code)

    executor = LocalCommandLineCodeExecutor(timeout=600)
    code_block = CodeBlock(language="python", code=full_code)

    try:
        result = executor.execute_code_blocks([code_block])
    except Exception as e:
        message = (
            "Code execution failed due to an internal executor error:\n"
            f"{e}\n"
        )
        return ReplyResult(
            message=message,
            target=AgentNameTarget("AgentD_Coder"),
            context_variables=context_variables,
        )

    exit_code = result.exit_code
    output = result.output

    context_path = os.path.join(project_folder, "context_variables_data.json")

    try:
        with open(context_path, "r") as f:
            context_file_data = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        context_file_data = {
            "execution_results": {},
            "execution_history": [],
            "execution_notes": [],
            "code": [],
            "code_error": "",
        }

    if exit_code != 0:
        context_file_data["code_error"] = python_code_model.model_dump()
    else:
        if "code" not in context_file_data or not isinstance(context_file_data.get("code"), list):
            context_file_data["code"] = []
        context_file_data["code"].append(python_code_model.model_dump())
        context_file_data["code_error"] = ""

    context_file_data["execution_results"][safe_name] = {
        "exit_code": exit_code,
        "output": output,
    }
    context_file_data["execution_history"].append(f"Executed {safe_name} with exit_code={exit_code}")
    context_file_data["execution_notes"].append("Executed code and stored output/exit_code.")

    code_output_path = os.path.join(project_folder, safe_name)
    os.makedirs(os.path.dirname(code_output_path), exist_ok=True)
    with open(code_output_path, "w") as f:
        f.write(full_code)

    try:
        if getattr(result, "code_file", None) and os.path.exists(result.code_file):
            os.remove(result.code_file)
    except Exception:
        pass

    with open(context_path, "w") as f:
        json.dump(context_file_data, f, indent=2)

    context_variables["last_executed_code"] = full_code
    context_variables["last_execution_output"] = output
    context_variables["last_executed_file"] = code_output_path

    if exit_code == 0:
        message = f"Code executed successfully.\n{output}"
    else:
        message = f"Code execution failed.\n{output}"

    context_variables["next_agent"] = "Human"

    return ReplyResult(
        message=message,
        target=AgentNameTarget("Human"),
        context_variables=context_variables,
)

    # Tool E — handoff_to_human_tool
def handoff_to_human_tool(
    message: Annotated[str, "Short explanation of why the query cannot be handled by Materials Project."],
    context_variables: ContextVariables,
) -> ReplyResult:
    """Route the conversation back to the Human for query reformulation."""

    context_variables["next_agent"] = "Human"

    return ReplyResult(
        message=message,
        target=AgentNameTarget("Human"),
        context_variables=context_variables,
    )


# 5. Manual tests for material_retiever


print("Running manual tests for material_retiever...\n")

from autogen.agentchat.group import ContextVariables

# Test 1 — Simple valid query
try:
    search_criteria_1 = {
        "band_gap": (3, 5),
        "energy_above_hull": (0, 0.1),
    }
    fields_1 = ["material_id", "formula_pretty", "band_gap", "energy_above_hull"]
    sample_number_1 = 5

    ctx_1 = ContextVariables(data={})
    result_1 = material_retiever(
        search_criteria=search_criteria_1,
        fields=fields_1,
        sample_number=sample_number_1,
        context_variables=ctx_1,
    )
    print("Test 1 completed. Message:")
    print(result_1.message)
except Exception as e:
    print("Test 1 failed with unexpected exception:", e)


# Test 2 — basic test with simple filters
try:
    search_criteria_2 = {
        "band_gap": (0, 1),
        "energy_above_hull": (0, 0.05),   
    }
    fields_2 = ["material_id", "band_gap", "energy_above_hull"]
    sample_number_2 = 3

    ctx_2 = ContextVariables(data={})
    result_2 = material_retiever(
        search_criteria=search_criteria_2,
        fields=fields_2,
        sample_number=sample_number_2,
        context_variables=ctx_2,
    )
    print("\nTest 2 completed. Message:")
    print(result_2.message)
except Exception as e:
    print("Test 2 raised exception:", e)



# Test 3 — Empty fields
try:
    search_criteria_3 = {
        "band_gap": (1, 3),
    }
    fields_3 = []
    sample_number_3 = 3

    ctx_3 = ContextVariables(data={})
    result_3 = material_retiever(
        search_criteria=search_criteria_3,
        fields=fields_3,
        sample_number=sample_number_3,
        context_variables=ctx_3,
    )
    print("\nTest 3 completed (empty fields). Message:")
    print(result_3.message)
except Exception as e:
    print("Test 3 raised exception (likely due to empty fields):", e)


# Test 4 — invalid sample_number
try:
    search_criteria_4 = {
        "band_gap": (0, 10),
    }
    fields_4 = ["material_id"]
    sample_number_4 = 0

    ctx_4 = ContextVariables(data={})
    result_4 = material_retiever(
        search_criteria=search_criteria_4,
        fields=fields_4,
        sample_number=sample_number_4,
        context_variables=ctx_4,
    )
    print("\nTest 4 completed (sample_number <= 0). Message:")
    print(result_4.message)
except Exception as e:
    print("Test 4 raised exception (expected due to validation):", e)


# Test 5 — Invalid key 
try:
    search_criteria_5 = {
        "invalid_property": (0, 1),
    }
    fields_5 = ["material_id"]
    sample_number_5 = 3

    ctx_5 = ContextVariables(data={})
    result_5 = material_retiever(
        search_criteria=search_criteria_5,
        fields=fields_5,
        sample_number=sample_number_5,
        context_variables=ctx_5,
    )
    print("\nTest 5 completed (invalid filter key). Message:")
    print(result_5.message)
except Exception as e:
    print("Test 5 raised exception (this will guide future validation):", e)


print("\nManual tool tests completed.")


# 6. AGENTS (A → B → C → D + HUMAN)

In [34]:
# Agent A: Explainer

explainer_message = """
You are an explainer AI agent.

ROLE
- Interpret the user's materials-science question.
- Produce a short, structured scientific explanation.

TASKS
- Read the query as provided in the latest user message.
- Identify key scientific terms (e.g., band gap, symmetry, density, conductivity).
- Explain what the user is asking in clear, technically correct language.

TOOL USAGE
- You must always call explain_query_tool.
- Do not answer directly without using the tool.
"""

AgentA_Explainer = ConversableAgent(
    name="AgentA_Explainer",
    llm_config=llm_config,
    system_message=explainer_message,
    human_input_mode="NEVER",
    functions=[explain_query_tool],
    function_map={"explain_query_tool": explain_query_tool},
)


#Agent B: Materials Retriever 

#two tasks: material retrieving, and explaining. you must follow this workflow:
retriever_message = """ 
You are a material retriever AI agent.

You transform the explained query into a valid Materials Project request,
retrieve candidate materials, and store results for the analyzer agent.

### YOUR TASKS
- retrieve candidate materials from Materials Project, and
- store the retrieved results in context_variables["mp_results"] for downstream analysis (do not produce a structured analysis yourself).

When retrieving materials from materials project, you must carefully consider the following instructions. 
Otherwise, errors will be raised

---
Use material_retiever tool which takes the following parameters:
a. `search_criteria`: A dictionary of filtering conditions used to select candidate materials.
b. `fields`: A list of metadata fields to retrieve for each material.
c. `sample_number`: The number of materials to randomly sample and download from the filtered set.

### IMPORTANT (read before calling the tool)
1) Read the explanation from context_variables["query_explanation"] and convert it into valid filters.
2) You must ONLY use valid keys in search_criteria and valid fields in fields (see whitelists below).
3) Do NOT invent semantic filters or unsupported kwargs.

### search_criteria KEYS (WHITELIST)
You may ONLY use these keys:
- band_gap
- energy_above_hull
- density
- num_sites
- k_voigt
- g_voigt
- elements
- chemsys

Do NOT use any other keys (e.g., excluded_elements is NOT allowed).

### fields (WHITELIST)
You may ONLY request fields from this list:
- material_id
- formula_pretty
- band_gap
- energy_above_hull
- density
- volume
- symmetry
- nsites
- elements
- chemsys
- is_stable

For example, filters can be assigned as follows:

Numerical range filters examples:
- "band_gap": (3, 5) for band gaps between 3 and 5
- "energy_above_hull": (0, 0.1) for energy above hull between 0 and 0.1
- "k_voigt": (150, None) for voigt bulk modulus above 150 GPa
- "g_voigt": (100, None) for voigt shear modulus above 100 GPa
- "num_sites": (1, 20) for number of atomic sites between 1 and 20

Elemental composition filters examples:
- "chemsys": ["Li-O", "Na-Cl"] -> chemical systems of either Li-O or Na-Cl
- "elements": ["Nb", "V"] -> structures containing (at least) both Nb and V

FORMAT RULES:
- Numeric filters MUST be tuples: (min_value, max_value) using None for open-ended bounds.
- List filters MUST be plain lists of strings.
- Do NOT use Mongo-style operators like {"$gt": ...}, {"$gte": ...}, {"$in": ...}.

NOTE: The following aliases are automatically mapped to canonical internal fields:
- "num_sites" → "nsites" 
- "k_voigt" → "bulk_modulus" 
- "g_voigt" → "shear_modulus"

If no material was found:
- If no material was found based on the provided filters it may stem from two reasons; 
(a) there is a typo or inconsistency in the queries. Double check and try again. 
(b) no material is indeed found, which normally happens for complex multi-element systems and/or when specified properties are provided. 
- In case NO material was found, retry calling material_retiever with an adapted strategy ensuring that the posed query constraints are still in place.

If the user query is outside Materials Project scope, do NOT call material_retiever.
Ask the Human to rephrase the query in Materials Project terms (composition/properties).
Request a handoff to the Human (the pipeline must route to Human for rephrasing).
"""

AgentB_MaterialsRetriever = ConversableAgent(
    name="AgentB_MaterialsRetriever",
    llm_config=llm_config,
    system_message=retriever_message,
    human_input_mode="NEVER",
    functions=[material_retiever],
    function_map={"material_retiever": material_retiever},
)


# Agent C: Analyzer

analyzer_message = """
You are an analyzer AI agent.

ROLE
- Analyze and organize Materials Project retrieval results into a scientific, data-grounded conclusion.

INPUTS (from context_variables)
- context_variables["mp_results"]: raw list/dicts returned by the Materials Project retrieval tool.
- context_variables.get("query_explanation", ""): short explanation of the user intent.

YOUR TASKS
1) Read mp_results and extract only the available fields (do not assume missing fields exist).
2) Produce a REAL analysis derived from the data, not placeholders.
   - band_gap -> infer likely application domain:
       * ~0 eV: metallic / conductor (if present), otherwise narrow-gap semiconductor
       * 0–1 eV: IR / narrow-gap semiconductor
       * 1–3 eV: general semiconductor (electronics / PV depending on other context)
       * 3–6 eV: wide-bandgap (power electronics / UV optoelectronics / insulating behavior)
       * >6 eV: likely insulator / deep-UV potential
   - energy_above_hull -> stability:
       * == 0 (or very close): stable / synthesizability higher
       * 0–0.05 eV/atom: near-stable / potentially synthesizable
       * 0.05–0.2 eV/atom: metastable / synthesis may be harder
       * >0.2 eV/atom: likely unstable under equilibrium
   - density -> structural / thermal hints (qualitative only; do not claim mechanical strength without moduli)
3) Keep the output STRICTLY as a JSON object (no extra prose outside JSON).
4) Store the JSON in context_variables["final_conclusion"].

OUTPUT JSON SCHEMA (must follow exactly)
{
  "query_explanation": "...",
  "materials": [
    {
      "material_id": "...",
      "formula_pretty": "...",
      "key_properties": {
        "band_gap": ...,
        "energy_above_hull": ...,
        "density": ...,
        "volume": ...
      },
      "stability_assessment": "...",
      "application_inference": "...",
      "limitations_and_unknowns": ["..."]
    }
  ],
  "overall_recommendation": {
    "best_candidates": ["material_id1", "material_id2"],
    "rationale": ["..."]
  }
}

TOOL USAGE
- You must always call final_conclusion_tool.
- Do not answer directly without using the tool.
"""

AgentC_Analyzer = ConversableAgent(
    name="AgentC_Analyzer",
    llm_config=llm_config,
    system_message=analyzer_message,
    human_input_mode="NEVER",
    functions=[final_conclusion_tool],
    function_map={"final_conclusion_tool": final_conclusion_tool},
)

# Agent D: Coding Agent

coder_message = """
You are a coder AI agent.

ROLE
- Generate and execute Python code when needed by the pipeline.

TASKS
- When you are invoked by the pipeline and context_variables contains mp_results, you must generate at least ONE computational artifact automatically:
  - either export a CSV summary table (e.g., material_id, formula_pretty, band_gap, energy_above_hull, density), and/or
  - plot a histogram of band_gap distribution.
- The Analyzer persists the structured results to materials_data.json in the project folder. Use that file as the single source of truth for plots/tables.
- Assume python_coder_tool sets the working directory to PROJECT_FOLDER (ag2_project). Therefore, use relative paths only:
  - read "materials_data.json" (do not prefix with "ag2_project/")
  - write outputs like "summary_table.csv" and "band_gap_histogram.png" (no "ag2_project/" prefix)
- Prefer generating both CSV and histogram in a single python_coder_tool call unless there is a strong reason to split.
- Execute all code only via python_coder_tool.
- Save executed code under ag2_project/<file_name>.
- Use the tool output to decide the next step.

TOOL USAGE (STRICT)
- You must always call python_coder_tool for any code execution.
- Do not execute code directly.
- Do not answer with analysis only; the response must come from the tool output.
"""

AgentD_Coder = ConversableAgent(
    name="AgentD_Coder",
    llm_config=llm_config,
    system_message=coder_message,
    human_input_mode="NEVER",
    functions=[python_coder_tool],
    function_map={"python_coder_tool": python_coder_tool},
)



# Human Agent

Human = ConversableAgent(
    name="Human",
    llm_config=None,
    human_input_mode="ALWAYS",
)
print("✓ Agents created.")

✓ Agents created.


# 7. Pattern definition 


In [35]:
# Set up the conversation pattern

pattern = DefaultPattern(
    initial_agent=AgentA_Explainer,
    user_agent=Human,
    agents=[
        AgentA_Explainer,
        AgentB_MaterialsRetriever,
        AgentC_Analyzer,
        AgentD_Coder,  
    ],
    context_variables=context_variables,
)


# 8. Group Chat

In [None]:
print("Hi! Type your query and press Enter.\n")
human_query = input("Query: ").strip()

result, context, _ = initiate_group_chat(
    pattern=pattern,
    messages=[{"role": "user", "content": human_query}],
    max_rounds=20,
)



Hi! Type your query and press Enter.



Query:  Find wide-bandgap stable oxides (band_gap 3–6 eV, energy_above_hull ≤ 0.05) and summarize best candidates.


[33mHuman[0m (to chat_manager):

Find wide-bandgap stable oxides (band_gap 3–6 eV, energy_above_hull ≤ 0.05) and summarize best candidates.

--------------------------------------------------------------------------------
[32m
Next speaker: AgentA_Explainer
[0m
[33mAgentA_Explainer[0m (to chat_manager):

[32m***** Suggested tool call (call_jrUbSsy7jYeD8st4Kw59X33v): explain_query_tool *****[0m
Arguments: 
{"query":"Find wide-bandgap stable oxides (band_gap 3–6 eV, energy_above_hull ≤ 0.05) and summarize best candidates.","query_explanation":"The user is asking for a list of oxide materials that have specific properties making them potentially useful for applications like electronics or photonics. The key term 'band_gap' refers to the energy difference between the highest occupied electronic band and the lowest unoccupied electronic band, with a range of 3–6 eV indicating wide bandgap materials suitable for high-power and high-frequency applications. The 'energy_above_hull' is a

Retrieving SummaryDoc documents:   0%|          | 0/11669 [00:00<?, ?it/s]

[35m
>>>>>>>> EXECUTED FUNCTION material_retiever...
Call ID: call_vHbfGaKKwwkhanvZSTz5otuo
Input arguments: {'fields': ['material_id', 'formula_pretty', 'band_gap', 'energy_above_hull', 'density', 'volume', 'symmetry', 'nsites', 'elements', 'chemsys', 'is_stable'], 'sample_number': 10, 'search_criteria': {'band_gap': [3, 6], 'energy_above_hull': [0, 0.05], 'elements': ['O']}}
Output:
Retrieved 10 materials from Materials Project with search_criteria={'band_gap': [3, 6], 'energy_above_hull': [0, 0.05], 'elements': ['O']}. Results stored in context_variables['mp_results'] for further analysis.[0m
[34m***** ReplyResult transition (AgentB_MaterialsRetriever): AgentC_Analyzer *****[0m
[33m_Group_Tool_Executor[0m (to chat_manager):

[32m***** Response from calling tool (call_vHbfGaKKwwkhanvZSTz5otuo) *****[0m
Retrieved 10 materials from Materials Project with search_criteria={'band_gap': [3, 6], 'energy_above_hull': [0, 0.05], 'elements': ['O']}. Results stored in context_variables[

In [22]:
# --- Agent D: export CSV from mp_results ---

mp_results = context.get("mp_results", [])
if not mp_results:
    raise RuntimeError("No materials found in context_variables['mp_results'].")

import pandas as pd
from pathlib import Path

df = pd.DataFrame(mp_results)

out_dir = Path("ag2_project")
out_dir.mkdir(exist_ok=True)

csv_path = out_dir / "materials_summary.csv"
df.to_csv(csv_path, index=False)

print(f"Saved CSV to {csv_path.resolve()}")


Saved CSV to /Users/evareysanchez/WildfiresAI/ag2_project/materials_summary.csv
