In [1]:
import os
import json
import time
from openai import OpenAI
import together
import random
from dotenv import load_dotenv
import google.generativeai as genai
import anthropic
from concurrent.futures import ThreadPoolExecutor


In [2]:
# === Load Keys ===
load_dotenv()
together.api_key = os.getenv("TOGETHER_API_KEY")
client = together.Together()
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
claude_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))


# MODEL_GENERATOR = "gemini-2.5-flash"
MODEL_mx = "mistralai/Mixtral-8x7B-Instruct-v0.1"
MODEL_gpt = "gpt-4.1-2025-04-14"
MODEL_o3 = "o3"
MODEL_cld = "claude-3-5-sonnet-20241022"
MODEL_cld4 = "claude-sonnet-4-20250514"
MODEL_gemini = "gemini-2.5-pro"

In [3]:
MAX_CYCLES = 4

CHEMISTRY_BASE_TOPICS = [
    "a reaction kinetics experiment analyzing the rate law of a multi-step catalyzed reaction",
    "an electrochemical cell under variable temperature with concentration polarization effects",
    "a phase equilibrium setup for a binary mixture undergoing fractional distillation",
    "a titration curve with mixed weak acids and buffer capacity analysis",
    "an adsorption isotherm on a porous catalyst surface (e.g., Langmuir or BET model)",
    "a gas-phase reaction inside a high-pressure reactor with temperature and pressure gradients",
    "an enzyme-catalyzed reaction with substrate inhibition at extreme pH",
    "an equilibrium mixture involving acid-base buffers under non-ideal ionic strength",
    "a complexometric titration involving metal-ligand stability constants",
    "a photochemical reaction driven by polychromatic light with quantum yield and quenching effects",
    "an electroplating process under non-uniform current density with overpotential effects",
    "a galvanic cell constructed with non-standard conditions and Nernst equation applications",
    "thermodynamic cycle of an ideal gas reaction analyzed under variable heat capacity",
    "a combustion process with incomplete combustion and analysis of enthalpy, entropy, and Gibbs energy",
    "an acid-base neutralization reaction under varying temperature and volume, analyzing heat release",
    "a radioactive decay series with branching paths and equilibrium calculations",
    "isotope separation using centrifugal or diffusive methods under real-world constraints",
    "spectroscopic analysis of molecular energy levels and bond vibrations in IR or UV-Vis",
    "an electrokinetic setup analyzing migration of charged species under an electric field and buffer gradient",
    "a polymerization reaction with chain transfer and propagation kinetics leading to molecular weight distribution",
    "a calorimetric experiment with phase change under controlled heating rates",
    "equilibrium of a multi-component redox titration with electrode potentials and activity coefficients",
    "determination of solubility product and common-ion effects in mixed-solvent systems",
    "a chromatography setup analyzing retention times and partition coefficients under a non-linear gradient",
    "thermodynamics of adsorption and desorption on nanostructured surfaces at low temperature",
    "analysis of photophysical processes like fluorescence quenching and Förster resonance energy transfer",
    "isotopic exchange reaction rates under variable temperature and pressure, including kinetic isotope effects",
    "electrolyte conductivity variation under extreme dilution and calculation of limiting molar conductivity",
    "modeling pH changes during multi-step buffer additions and iterative titrations",
    "thermochemical cycle of a bioenergetic process and calculation of overall enthalpy and entropy changes",
    "solvation and hydration energy analysis of ions in mixed-solvent systems",
    "kinetics of a competitive reversible reaction under non-ideal mixing conditions",
    "determining reaction order and activation energy from time-dependent concentration profiles",
    "diffusion-controlled rate of a bimolecular reaction in viscous solvents",
    "photodissociation and recombination kinetics under pulsed laser excitation",
    "electrochemical impedance of an RC-equivalent model of a reaction interface",
    "thermodynamic analysis of a Carnot cycle using a working fluid with Van der Waals behavior",
    "pressure-composition diagram and azeotrope analysis in vapor-liquid equilibrium",
    "analyzing non-ideal behavior of gases under Joule-Thomson throttling at extreme pressures",
    "catalytic hydrogenation kinetics under variable partial pressure and catalyst surface poisoning",
    "energy level quantization and tunneling effects in hydrogen-bonded molecular dimers",
    "quenching effects on a triplet-state photoreaction with collisional energy transfer",
    "analysis of half-life and decay heat in a mixed radionuclide source",
    "electrokinetic phenomena in charged colloids under an applied electric field gradient",
    "osmotic pressure effects in a semipermeable membrane separating solutions of differing molality",
    "solvent extraction equilibrium with multiple species and complex partition coefficients",
    "nucleation and growth rate of crystals under super-saturation and variable temperature profile",
    "analysis of enthalpic vs entropic contributions in a reversible protein-ligand binding process"
]

CHEMISTRY_COMPLEXITY_TEMPLATES = [
    "Integrate concepts across two or more domains, such as electrochemistry with thermodynamics or kinetics with surface chemistry.",
    "Design a problem with multiple correct quantitative approaches, ensuring only one is valid under specified experimental conditions.",
    "Include variable dependencies like concentration-dependent rate constants, temperature-dependent solubility, or pressure-driven equilibrium shifts.",
    "Construct the problem with at least three tightly linked steps such as rate law derivation → equilibrium expression → numeric concentration calculation → final yield estimate.",
    "Require graphical reasoning, e.g. extracting rates from a concentration-time plot or interpreting slopes in Arrhenius or Van’t Hoff plots.",
    "Introduce hidden or implicit quantities (e.g. buffer capacity, fugacity, activity coefficients) that must be derived before reaching the final numeric answer.",
    "Use edge-case analysis or extreme conditions such as very high pressure, very low temperature, dilute solutions approaching ideality, or highly non-ideal activity behavior.",
    "Force the setup and reasoning to occur under real-world experimental constraints (e.g. Nernst potential under non-standard conditions, reaction in a flow reactor with continuous sampling).",
    "Ensure all variables and assumptions (e.g. ideal vs non-ideal behavior, constant vs variable heat capacity) are explicitly stated and numerically defined."
]


In [4]:
# === Utility: Call LLM ===
# ---openai---
def call_model_openai(prompt: str, model: str) -> str:
    response = openai_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

#---gemini---

def call_model_gemini(prompt: str, model: str) -> str:
    model = genai.GenerativeModel(model_name=model)
    response = model.generate_content(prompt)
    return response.text.strip()

# ---together---

def call_model_tog(prompt: str, model: str) -> str:
    prompt = prompt
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=1.0
    )
    return response.choices[0].message.content.strip()

# ---claude---

def call_model_claude(prompt: str, model: str) -> str:
    try:
        response = claude_client.messages.create(
            model=model,
            max_tokens=1024,
            temperature=0.7,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.content[0].text.strip()
    except Exception as e:
        print("[Claude Error]", e)
        return "[Error]" 

In [5]:
# === Step 1: Generate Initial Chemistry Question ===
def generate_chemistry_seed_question() -> str:
    def generate_initial_prompt() -> str:
        topic_k = random.randint(2, min(4, len(CHEMISTRY_BASE_TOPICS)))
        complexity_k = random.randint(4, min(6, len(CHEMISTRY_COMPLEXITY_TEMPLATES)))
        
        topic = random.sample(CHEMISTRY_BASE_TOPICS, k=topic_k)
        complexity = random.sample(CHEMISTRY_COMPLEXITY_TEMPLATES, k=complexity_k)

        return f"""
You are a domain-expert AI in chemistry problem generation. Your task is to generate a **rigorous, self-contained graduate-level chemistry problem** that:

🔹 Combines multiple areas of chemistry  
🔹 Involves real-world physical and chemical dependencies  
🔹 Requires multi-step, chained symbolic and quantitative reasoning  
🔹 Is **fully defined, reproducible**, and yields **one unique final numeric answer**  
🔹 **Must use LaTeX format** for all chemical notation, equations, and math symbols.

---

###  Problem Requirements:

1. **Interdisciplinary Concept Integration**  
   Integrate at least **two major chemistry domains**, such as physical chemistry + organic, thermodynamics + electrochemistry, quantum chemistry + kinetics, etc.

2. **Structured Symbolic and Quantitative Reasoning**  
   The problem must require **minimum four to five symbolic/quantitative steps**, including derivations, chemical laws, or equations.

3. **Complete and Unambiguous Specifications**  
   - Clearly define system setup, conditions, and assumptions.  
   - Explicitly state all given data: concentrations, masses, temperature, pressure, constants, etc.  
   - Units must be clearly specified and must follow SI conventions.

4. **Question Construction Constraints**  
   - The final question must demand **one boxed numeric answer with correct SI or standard chemical units** (e.g., mol/L, J/mol·K).  
   - Avoid asking for intermediate steps — include only **one final explicitly asked question**.  
   - Ensure all constants (e.g., $R$, $F$, $k_B$) are numerically provided with units.

5. **Strict Formatting Guidelines**  
   - Use **LaTeX** for chemical reactions, thermodynamic equations, quantum terms, etc.  
   - Do not include qualitative prompts or ambiguous wording like “assume reasonable values.”  
   - Present the final answer as **boxed numeric value with correct units**.

6. **Reproducibility and Uniqueness**  
   - Ensure that the given data leads to **one unique, well-defined final result**.  
   - Avoid edge cases or scenarios requiring undefined assumptions.

---

###  Use These Concepts (Topics):  
{topic}

###  Follow These Complexity Requirements:  
{complexity}

---

###  Output Format (Strict JSON Only):

Return **only** the following JSON object (no markdown, no comments — strictly this format):

{{
  "question": "<Complete and rigorous LaTeX-formatted chemistry problem, with all chemical assumptions, equations, constants, given values, units, and one final numeric answer requested>",
  "topic_tags": ["<tag1>", "<tag2>", ...]
}}

---

### Important Guidelines:
- No placeholders or generic references — all values and constants must be specified.
- The problem must involve real-world chemical phenomena, ideally with graduate-level depth.
- Avoid trivial questions. Focus on reasoning, derivation, and synthesis of multiple concepts.
- Final question must be solvable with provided data only — do not require external lookups.

Produce **only** the JSON format shown — no markdown, extra formatting, or narrative.
""".strip()

    prompt = generate_initial_prompt()
    return call_model_gemini(prompt, MODEL_gemini)


In [6]:
# === Step 2: Extract Parameters, Assumptions, Ambiguities for Chemistry ===
def extract_parts_from_chemistry_question(seed_json: str) -> str:
    prompt = f"""
You are a chemistry reasoning assistant.

Your task is to extract **all** key chemical components from the provided chemistry problem JSON. Be precise, comprehensive, and follow these guidelines:

1. **Parameters**  
   - List all chemical symbols, variables, and constants involved in the question.  
   - For each, provide:
     - `symbol` (e.g., "n", "ΔH", "Kₐ")
     - `meaning` (e.g., "number of moles", "enthalpy change", "acid dissociation constant")
     - `unit` (e.g., "mol", "kJ/mol", "mol/L", or "dimensionless" if applicable)
   - Include all numerical constants or standard values (e.g., R = 8.314 J/mol·K, Avogadro’s number, pH = 7).

2. **Assumptions**  
   - Extract explicitly stated assumptions (e.g., "ideal gas behavior", "solution is dilute", "reaction is at equilibrium").
   - Also infer common implicit assumptions critical to solving the question (e.g., "room temperature = 298K", "STP conditions", "complete dissociation").

3. **Concepts Used**  
   - Identify **chemical principles, laws, models, or concepts** needed to solve the question.
   - For each concept, briefly explain its relevance in this context.
   - Include both fundamental (e.g., mole concept, stoichiometry, Le Chatelier’s principle) and advanced ideas (e.g., Gibbs free energy, reaction kinetics, molecular orbital theory) as applicable.

### Output JSON Format (strict):
{{
  "parameters": [
    {{"symbol": "...", "meaning": "...", "unit": "..."}},
    ...
  ],
  "assumptions": ["...", "..."],
  "concepts": ["...", "..."]
}}

### Input JSON:
{seed_json}
"""
    return call_model_openai(prompt, MODEL_o3)


In [7]:
def rewrite_parts_to_make_tougher_chemistry(extracted_json: str) -> str:
    prompt = f"""
You are an expert chemistry reasoning engine.

Your task is to:
1. Take the extracted components of a chemistry problem.
2. Rewrite and enhance them to make the question **1000 times more challenging** — at an advanced **graduate-level**, requiring **multi-domain conceptual and quantitative reasoning**.
3. Ensure the final question is:
   - Fully presented in **LaTeX math notation** where applicable (e.g., equations, expressions, units).
   - Requires **no intermediate calculations** — ask only **one final, precise, numeric result**.
   - Demands **deep understanding of multiple chemistry subfields** (e.g., thermodynamics + kinetics, quantum chemistry + spectroscopy).
   - Includes **realistic, explicitly defined numerical parameters** — no vague assumptions.
   - Results in **one unique numeric final answer with correct SI units**, clearly expressed.
   - Involves **symbolic formulation**, **algebraic manipulation**, and **numerical calculation**.
   - Entirely self-contained — all constants, assumptions, and conditions must be specified.

---

### OUTPUT FORMAT (JSON only):

Return a fully rewritten **toughened problem** in the following strict JSON format:

{{
  "question": "<Complete, rigorous chemistry problem in LaTeX notation>",
  "topic_tags": ["<relevant chemistry domains>"]
}}

---

### INPUT (Original Extracted JSON):
{extracted_json}

---
Only return the final JSON — no explanation, no commentary.
"""
    return call_model_gemini(prompt, MODEL_gemini)


In [8]:
# === Step 4: Get Feedback and Improve Chemistry Question ===
def get_chemistry_feedback(question_json: str) -> str:
    prompt = f"""
You are a critical chemistry evaluator AI trained to review complex chemistry problems and solutions with a rigorous expert lens.

Analyze the following JSON which contains a chemistry question.

### Your tasks:
1. **Identify and point out ambiguities** in the question:
   - Are any compounds, concentrations, temperatures, pressures, phases, or reaction conditions undefined or unclear?
   - Are chemical terms or species like "the solution," "the compound," or "the product" ill-defined?

2. **Check for consistency**:
   - Are all symbols, notations, and abbreviations clearly defined (e.g., [H+], K_eq, ΔH)?
   - Are all reactions balanced and chemically valid?
   - Are there unnecessary or unresolved terms in the question?

3. **Assess logical flow and chemical reasoning**:
   - Does the question logically follow accepted chemical principles (stoichiometry, thermodynamics, kinetics, equilibrium, etc.)?
   - Are there any reasoning steps skipped, oversimplified, or chemically incorrect?
   - Are all reactions feasible and physically possible under the given conditions?

4. **Check assumptions and constraints**:
   - Are all assumptions (e.g., ideal gas, dilute solution, complete dissociation, constant temperature) stated explicitly?
   - Are any assumptions unrealistic or irrelevant?
   - Is there any hidden constraint (e.g., standard state conditions, temperature of 25°C) not explicitly declared?

5. **Ensure No Intermediate Sub-Questions**:
   - The question should not request sub-results or multi-step calculations explicitly.
   - It must lead clearly to a **single final numeric answer** with correct SI units (e.g., mol/L, atm, kJ/mol).
   - Avoid phrasings like:
     - “First calculate the number of moles, then...”
     - “Find the enthalpy change and then...”
     - “Derive the rate law and then compute...”
   - All intermediate work must be implicit. Only the **final output** should be asked.

### Input JSON:
{json.dumps(question_json, indent=2)}

### Output:
Return a **well-structured, technical paragraph of feedback**, identifying flaws, gaps, or improvements as per the tasks above. Be extremely specific, avoid general praise, and give examples when possible. Focus on clarity, chemical correctness, and question design quality.
""".strip()

    return call_model_openai(prompt, MODEL_o3)


In [9]:
def improve_chemistry_question_based_on_feedback(feedback: str, original_json: str) -> str:
    prompt = f"""
You previously generated a high-level chemistry problem, but it has been reviewed and received the following detailed feedback:
### Original question:
{original_json}

### Feedback:
\"\"\"{feedback}\"\"\"

Your new task:
- Revise and improve the original chemistry problem so that it fully addresses **every point** in the feedback.
- The resulting question must:
  1. Be **fully self-contained** with all reagents, compounds, conditions, constants, and SI units explicitly specified.
  2. Be **chemically rigorous**, integrating at least two subdomains (e.g., physical chemistry + organic chemistry, or thermodynamics + kinetics, etc.).
  3. Be **challenging and solvable**, requiring **multi-step quantitative and conceptual reasoning**.
  4. Include **chemical equations and mathematical expressions** in proper **LaTeX notation**.
  5. Require **only one** explicit **final numerical answer** at the end, with correct **SI units**.
  6. Do **not** ask for intermediate results — just one clear, final outcome.
  7. Be clearly answerable from the information given, such that the answer is **unique** and **deterministic**.

### Important:
- Do NOT include any commentary or markdown formatting.
- Output must be strictly JSON and fully parsable.
- The regenerated problem must **fully reflect and satisfy the feedback** above.

Strictly return ONLY a valid, properly formatted JSON object with the following format:
{{
  "question": "<Improved and fully self-contained chemistry problem in strict LaTeX notation>",
  "topic_tags": ["<tag1>", "<tag2>", ...]
}}
Produce ONLY the JSON as specified.
""".strip()

    return call_model_openai(prompt, MODEL_gpt)


In [10]:
# === Step 5: Get Final Answer from 2 Models for Chemistry ===
def get_final_chemistry_answer(question_json: str) -> dict:
    prompt = f"""
You are a high-precision chemistry solver.

Solve the following chemistry problem and return **only the final boxed numeric result with appropriate SI or conventional units**.

---

### STRICT OUTPUT RULES:
- Do NOT show any working, steps, or explanations.
- Do NOT return anything other than the final answer.
- Use boxed format: Example → `"C = 0.250 mol/L"` or `"ΔH = -285.8 kJ/mol"`.
- Include proper units and symbols (e.g., `mol/L`, `kPa`, `g/mol`, `kJ`, `°C`, `atm`, `M`, `mol`, `L`).
- If constants (e.g., `R = 8.314`, `ln(2)`, `π`) are involved, compute numerically to **at least 3 significant digits**.
- Round values sensibly according to significant figures in the question.

---

### CHEMISTRY PROBLEM INPUT:
{question_json}

---

### FINAL OUTPUT:
**Strictly** return just a single string with the *boxed numeric result*. *No markdown*, *no JSON*, *no equations*.
"""
    def run_openai():
        return call_model_openai(prompt, MODEL_o3)  # o3 or GPT-4

    def run_gemini():
        return call_model_gemini(prompt, MODEL_gemini)  # Gemini 2.5 Pro

    with ThreadPoolExecutor() as executor:
        future_openai = executor.submit(run_openai)
        future_gemini = executor.submit(run_gemini)

        openai_result = future_openai.result()
        gemini_result = future_gemini.result()

    return {
        "gpt_o3": openai_result,
        "gemini2.5pro": gemini_result
    }


In [11]:
def compare_chemistry_answers(ans1: str, ans2: str) -> dict:
    prompt = f"""
You are a chemistry answer verification engine.

Your task is to compare two final answers from chemistry models and determine whether they represent the **same chemical result**. Use chemical knowledge to evaluate unit compatibility, stoichiometric correctness, numeric equivalence, and chemical/molecular representation.

---

### Evaluation Criteria and Scoring

Each of the following aspects contributes to a **similarity score** between 0.0 and 1.0. The total score is calculated by adding weighted partial scores:

| Criterion                      | Weight | Description |
|-------------------------------|--------|-------------|
| 1. Unit Compatibility         | 0.20   | Full score if units are identical or chemically compatible (e.g., mol vs mmol, atm vs Pa). Zero if unrelated (e.g., mol vs g/L). |
| 2. Numerical Closeness        | 0.25   | Compare using relative error. Full score if < 1%, partial for 1–2%, zero if >2%. |
| 3. Formula/Molecule Match     | 0.20   | Score full if chemical formulas or names refer to the same substance (e.g., H₂O vs water). |
| 4. Stoichiometric Consistency | 0.20   | Based on molar ratios, balanced equations, or concentration-based correctness. |
| 5. Rounding/Notation Format   | 0.15   | Full score if formats differ but the numeric or symbolic values are effectively same (e.g., 3.14 vs 3.1416). |

---

### Final Decision Rule

- If total score **≥ 0.80**, return: `"decision": "similar"`
- If total score **< 0.80**, return: `"decision": "different"`

---

### Example:

**Answer 1:** `0.5 mol of H₂SO₄`  
**Answer 2:** `500 mmol sulfuric acid`

Evaluation:

- Units: mol vs mmol → compatible → 0.20  
- Numeric: 0.5 mol = 500 mmol → 0.25  
- Formula Match: H₂SO₄ = sulfuric acid → 0.20  
- Stoichiometry: valid conversion → 0.20  
- Rounding/Format: naming difference, no error → 0.15

**Total score = 1.00 → "similar"**

---

### Your Task

Compare the following two answers and return a **valid JSON object** in this format:

{{
  "similarity_score": <float between 0.0 and 1.0>,
  "decision": "similar" or "different",
  "comment": "<brief technical explanation>"
}}

Only return the JSON object. Do NOT include Markdown, LaTeX formatting, or extra commentary.

### Input:
Answer 1: {ans1}  
Answer 2: {ans2}
"""
    response = call_model_openai(prompt, MODEL_gpt)
    return json.loads(response)


In [12]:
# === Main Pipeline Loop ===
def pipeline_loop():
    seed_json = None  # Initialize seed_json
    improved_question = None  # Initialize improved_question
    cycle_logs = []

    for cycle in range(MAX_CYCLES):
        print(f"\n [Cycle {cycle + 1}]")

        # Step 1: Generate new seed only if first cycle or models disagreed last time
        if seed_json is None:
            seed_json = generate_chemistry_seed_question()

        # Step 2
        extracted = extract_parts_from_chemistry_question(seed_json)

        # Step 3
        tougher_question = rewrite_parts_to_make_tougher_chemistry(extracted)

        # Step 4
        feedback = get_chemistry_feedback(tougher_question)
        improved_question = improve_chemistry_question_based_on_feedback(feedback, tougher_question)

        # Step 5
        
        answer_dict = get_final_chemistry_answer(improved_question)
        ans_o3 = answer_dict["gpt_o3"]
        ans_gemini = answer_dict["gemini2.5pro"]

        # Step 6
        similarity_result = compare_chemistry_answers(ans_gemini, ans_o3)

        # If using enhanced version with JSON output:
        if isinstance(similarity_result, dict):
            decision = similarity_result.get("decision", "different")
            score = similarity_result.get("similarity_score", 0.0)
        else:
            decision = similarity_result
            score = None

        print(f"\n Gemini Answer: {ans_gemini}\n o3 Answer: {ans_o3}\n Similarity Decision: {decision}  (Score: {score})")
        
        # Save this cycle's outputs
        cycle_logs.append({
            "cycle": cycle + 1,
            "seed_json": seed_json,
            "extracted": extracted,
            "tougher_question": tougher_question,
            "feedback": feedback,
            "improved_question": improved_question,
            "ans_gpt": ans_gemini,
            "ans_o3": ans_o3,
            "similarity_result": similarity_result,
        })

        if decision == "different":
            print("\n Final Refined Question (as models disagree):")
            print(improved_question)
            return improved_question, cycle_logs

        print(" Answers similar — reusing same question as new seed...\n")
        # Reuse the improved question as next seed
        seed_json = improved_question
        time.sleep(2)
    
    print("\nReturning last question.")
    print(improved_question)
    return improved_question, cycle_logs

In [14]:
# === Run ===
if __name__ == "__main__":
    all_results = []
    iter = 10
    all_cycle_logs = []
    
    for i in range(iter):
        print(f"\n=== Running pipeline_loop #{i+1}/{iter} ===")
        try: 
            final_question, cycle_logs = pipeline_loop()
            all_results.append(final_question)
            all_cycle_logs.append({
                "iteration": i + 1,
                "logs": cycle_logs
            })
        except Exception as e:
            print(f"[ERROR] Iteration {i+1} failed: {e}")
            all_results.append({"error": str(e)})
            all_cycle_logs.append({
                "iteration": i + 1,
                "logs": [],
                "error": str(e)
            })
    
                
    all_results_path = "Chem_all_results1.json"            
    with open(all_results_path, "w", encoding="utf-8") as f:
        json.dump(all_results, f, indent=2)
        
    # Save all cycle logs
    all_cycle_logs_path = "Chem_cycle_logs_1.json"
    with open(all_cycle_logs_path, "w", encoding="utf-8") as f:
        json.dump(all_cycle_logs, f, indent=2)
            
    print("\n Completed all iterations and saved results to all_results.json")        


=== Running pipeline_loop #1/10 ===

 [Cycle 1]

 Gemini Answer: ```json
"t_detect = 3.83 × 10⁶ s"
```
 o3 Answer: t_detect = 4.47 × 10^6 s
 Similarity Decision: different  (Score: 0.65)

 Final Refined Question (as models disagree):
{
  "question": "A non-aqueous catalytic system is designed for the low-temperature decomposition of molecular formic acid ($\\mathrm{HCOOH}$) in liquid ammonia at $T = 195.00 \\, \\mathrm{K}$ (well above ammonia's freezing point, and ensuring all reactants remain soluble). A homogeneous Ru-based molecular catalyst is used. The stoichiometry is:\n\n\\[\n\\mathrm{HCOOH~(solv)} \\xrightarrow{\\text{Ru~cat.}} \\mathrm{CO}_2\\mathrm{~(g)} + \\mathrm{H}_2\\mathrm{~(g)}\n\\]\n\nAt this temperature, formic acid remains undissociated in ammonia, so only $\\mathrm{HCOOH}$ molecules are reactive; no formate anion is present. The pseudo-first-order rate constant, $k_{\\text{obs}}(T)$, for molecular $\\mathrm{HCOOH}$ is described by an Arrhenius relation corrected fo