In [1]:
# -*- coding: utf-8 -*-
# =========================================================================================
# ValidationMoE.py — Run Herschian Router v2 on ValidationHard.jsonl
# =========================================================================================

import json
from pathlib import Path
from typing import Dict, Any, List

import router_v2 as HR


# ------------------------------ Paths ------------------------------

try:
    ROOT = Path(__file__).resolve().parent
except NameError:
    ROOT = Path.cwd()


PATH_VAL = ROOT / "ValidationHard.jsonl"
LOG_TXT = ROOT / "validationHard.MoE_results.txt"
LOG_MD = ROOT / "validationHard.MoE_results.md"

MATH_DIR = ROOT / "Math"
CHAT_DIR = ROOT / "Qwen2.5-0.5B-Instruct"
CODER_DIR = ROOT / "Qwen2.5-Coder-0.5B-Instruct"


# ------------------------------ Utilities ------------------------------

def _read_jsonl(path: Path) -> List[Dict[str, Any]]:
    data = []
    with path.open("r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))
    return data


def _format_txt_block(idx: int, item: Dict[str, Any], res: Dict[str, Any]) -> str:
    qid = item.get("id", idx)
    topic = item.get("topic", "")
    diff = item.get("diff", "")
    q = item.get("query", "")

    outs = res.get("outputs", {})
    s1 = res.get("stage1_losses", {})
    per_task = res.get("per_task_losses", {})
    assignments = res.get("assignments", {})
    tasks = res.get("tasks", [])

    lines = []
    lines.append("=" * 100)
    lines.append(f"TEST #{idx} | ID = {qid}")
    lines.append(f"Topic: {topic} | Diff: {diff}")
    lines.append("-" * 100)
    lines.append("QUESTION:")
    lines.append(q)
    lines.append("")
    lines.append("Stage-1 losses: " + str({k: round(float(v), 4) for k, v in s1.items()}))
    lines.append("Tasks: " + str(tasks))
    for t, mp in per_task.items():
        lines.append(f"{t.title()} losses: " + str({k: round(float(v), 4) for k, v in mp.items()}))
    lines.append("Assignments: " + str(assignments))
    lines.append("")
    if outs.get("answer"):
        lines.append("[ANSWER]")
        lines.append(outs["answer"])
        lines.append("")
    if outs.get("explanation"):
        lines.append("[EXPLANATION]")
        lines.append(outs["explanation"])
        lines.append("")
    if outs.get("code"):
        lines.append("[CODE]")
        lines.append(outs["code"])
        lines.append("")
    return "\n".join(lines) + "\n"


def _format_md_block(idx: int, item: Dict[str, Any], res: Dict[str, Any]) -> str:
    qid = item.get("id", idx)
    topic = item.get("topic", "")
    diff = item.get("diff", "")
    q = item.get("query", "")

    outs = res.get("outputs", {})
    s1 = res.get("stage1_losses", {})
    per_task = res.get("per_task_losses", {})
    assignments = res.get("assignments", {})
    tasks = res.get("tasks", [])

    lines = []
    lines.append(f"## TEST #{idx} — ID = {qid}")
    lines.append("")
    lines.append(f"- **Topic:** {topic}")
    lines.append(f"- **Difficulty:** {diff}")
    lines.append("")
    lines.append("### Question")
    lines.append("")
    lines.append("```text")
    lines.append(q)
    lines.append("```")
    lines.append("")
    lines.append("### Routing diagnostics")
    lines.append("")
    lines.append("```text")
    lines.append("Stage-1 losses: " + str({k: round(float(v), 4) for k, v in s1.items()}))
    lines.append("Tasks: " + str(tasks))
    for t, mp in per_task.items():
        lines.append(f"{t.title()} losses: " + str({k: round(float(v), 4) for k, v in mp.items()}))
    lines.append("Assignments: " + str(assignments))
    lines.append("```")
    lines.append("")
    if outs.get("answer"):
        lines.append("### Answer")
        lines.append("")
        lines.append("```text")
        lines.append(outs["answer"])
        lines.append("```")
        lines.append("")
    if outs.get("explanation"):
        lines.append("### Explanation")
        lines.append("")
        lines.append("```text")
        lines.append(outs["explanation"])
        lines.append("```")
        lines.append("")
    if outs.get("code"):
        lines.append("### Code")
        lines.append("")
        lines.append("```python")
        lines.append(outs["code"])
        lines.append("```")
        lines.append("")
    return "\n".join(lines) + "\n"


# ------------------------------ Main ------------------------------

def main() -> None:
    if not PATH_VAL.exists():
        raise FileNotFoundError(f"Validation file not found: {PATH_VAL}")

    print(f"[PATH] ROOT     = {ROOT}")
    print(f"[PATH] VAL      = {PATH_VAL}")
    print(f"[PATH] LOG TXT  = {LOG_TXT}")
    print(f"[PATH] LOG MD   = {LOG_MD}")
    print(f"[PATH] MATH DIR = {MATH_DIR}")
    print(f"[PATH] CHAT DIR = {CHAT_DIR}")
    print(f"[PATH] CODERDIR = {CODER_DIR}")
    print("")

    expert_cfgs = [
        {"name": "M", "path": str(MATH_DIR)},
        {"name": "Q", "path": str(CHAT_DIR)},
        {"name": "C", "path": str(CODER_DIR)},
    ]
    experts = HR.build_experts(expert_cfgs)

    cfg = HR.RouterConfig(
        tau=0.5,
        exclusive_roles=False,
        exclusive_allow_all_if_best_all=True,
        native_solve="M",
        native_explain="Q",
        native_code="C",
        min_relative_gain=0.20,
        solve_instruction=(
            "You are the MATH specialist. Solve the problem rigorously. "
            "If something is missing, state assumptions."
        ),
        explain_instruction=(
            "You are the EXPLAIN specialist. Given the answer, explain the reasoning "
            "clearly and step by step."
        ),
        code_instruction=(
            "You are the CODE specialist. Given the math problem and explanation, "
            "write clean, well-structured Python code that solves it."
        ),
        max_new_solve=1024,
        max_new_explain=1024,
        max_new_code=1024,
        do_sample=False,
    )

    instruction = (
        "Router uses M for solving, Q for explaining, C for coding. Follow the split "
        "strictly: M -> Q -> C (sequential)."
    )

    dataset = _read_jsonl(PATH_VAL)
    print(f"[INFO] Loaded {len(dataset)} questions.")
    print("")

    txt_blocks: List[str] = []
    md_blocks: List[str] = []

    for idx, item in enumerate(dataset, start=1):
        qid = item.get("id", idx)
        topic = item.get("topic", "")
        diff = item.get("diff", "")
        query = item.get("query", "")

        print(f"[RUN] {idx} | {qid} | {topic} | {diff}")

        res = HR.route_and_execute(
            experts=experts,
            full_question=query,
            global_instruction=instruction,
            cfg=cfg,
        )

        acts = res.get("active_experts", [])
        tasks = res.get("tasks", [])
        assigns = res.get("assignments", {})

        print(f"  Active experts: {acts}")
        print(f"  Tasks: {tasks}")
        print(f"  Assignments: {assigns}")
        print("-" * 80)

        txt_blocks.append(_format_txt_block(idx, item, res))
        md_blocks.append(_format_md_block(idx, item, res))

    LOG_TXT.write_text("\n".join(txt_blocks), encoding="utf-8")
    LOG_MD.write_text("\n".join(md_blocks), encoding="utf-8")

    print("")
    print("Validation Complete.")
    print(f"TXT: {LOG_TXT}")
    print(f"MD : {LOG_MD}")


if __name__ == "__main__":
    main()


`torch_dtype` is deprecated! Use `dtype` instead!


[PATH] ROOT     = c:\Users\super\Desktop\MoE_LLM\Pretrained
[PATH] VAL      = c:\Users\super\Desktop\MoE_LLM\Pretrained\ValidationHard.jsonl
[PATH] LOG TXT  = c:\Users\super\Desktop\MoE_LLM\Pretrained\validationHard.MoE_results.txt
[PATH] LOG MD   = c:\Users\super\Desktop\MoE_LLM\Pretrained\validationHard.MoE_results.md
[PATH] MATH DIR = c:\Users\super\Desktop\MoE_LLM\Pretrained\Math
[PATH] CHAT DIR = c:\Users\super\Desktop\MoE_LLM\Pretrained\Qwen2.5-0.5B-Instruct
[PATH] CODERDIR = c:\Users\super\Desktop\MoE_LLM\Pretrained\Qwen2.5-Coder-0.5B-Instruct

[INFO] Loaded 50 questions.

[RUN] 1 | 1 | Real Analysis | Engineering hard
[S1] Prompt preview: 'Let (f_n) be a sequence of C^1 functions on [0,1] such that (i) |f_n(x)| ≤ 1 for all x and n, (ii) (f_n') is equicontinuous and uniformly bounded on [0,1], and '
[S1] Losses: {'M': 2.1628, 'Q': 1.9876, 'C': 2.4791}
[LOSS/solve] E=M | loss=1.7272 | text='Problem: Let (f_n) be a sequence of C^1 functions on [0,1] such that (i) |f_n(x)| ≤ 1 for 

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  Active experts: ['Q', 'M', 'C']
  Tasks: ['solve', 'explain', 'code']
  Assignments: {'solve': 'M', 'explain': 'Q', 'code': 'C'}
--------------------------------------------------------------------------------
[RUN] 2 | 2 | Measure & Integration | Engineering hard
[S1] Prompt preview: 'Construct explicitly a sequence of measurable functions (f_n) on [0,1] such that f_n(x) → 0 almost everywhere, the sequence is dominated by an integrable envelo'
[S1] Losses: {'M': 2.0318, 'Q': 2.1142, 'C': 2.414}
[LOSS/solve] E=M | loss=2.1358 | text='Problem: Construct explicitly a sequence of measurable functions (f_n) on [0,1] such that f_n(x) → 0 almost everywhere, the sequence is dominated by an integrable envelope |f_n(x)| ≤ g(x) with g ∈ L^1[0,1], but the convergence of ∫_0^1 |f_n(x)| dx → 0 is very slow and highly non-uniform in n. then design a second sequence (g_n) for which monotone convergence holds but dominated convergence fails due to lack of a finite integrable majorant. \n  \n Prove r