In [None]:
from phaseedge.storage.store import get_jobstore
from phaseedge.jobs.store_ce_model import lookup_ce_by_key
from phaseedge.storage.cetrainref_dataset import Dataset

def delete_phaseedge_entries(ce_key: str, dry_run: bool = True) -> None:
    """
    Delete all MongoDB entries associated with a ce_key/dataset_key and their
    Atomate2 relaxation tasks.  When dry_run=True, the function reports how many
    documents would be removed without deleting them.

    Parameters
    ----------
    ce_key : str
        Cluster-expansion key whose documents should be removed.
    dataset_key : Optional[str]
    dry_run : bool
        If True (default), only report counts.  If False, perform deletions.

    """
    js = get_jobstore()

    # Lookup the CEModelDoc to infer dataset_key if needed
    ce_doc = lookup_ce_by_key(ce_key)
    if ce_doc is None:
        raise ValueError(f"No CEModelDoc found for ce_key={ce_key!r}.")

    dataset_key = ce_doc["dataset_key"]
    dataset = Dataset.from_key(dataset_key)

    # Build deletion criteria: CEModelDoc and dataset document
    criteria_info: list[tuple[str, dict]] = [
        ("CEModelDoc", {"output.kind": "CEModelDoc", "output.ce_key": ce_key}),
        ("CETrainRef_dataset", {"output.kind": "CETrainRef_dataset", "output.dataset_key": dataset_key}),
    ]

    # For each training reference, build criteria for its relaxation tasks
    seen = set()
    for ref in dataset.train_refs:
        key = (
            ref.occ_key,
            ref.calc_spec.calculator,
            ref.calc_spec.relax_type.value,
            ref.calc_spec.spin_type.value,
            ref.calc_spec.frozen_sublattices,
        )
        if key in seen:
            continue
        seen.add(key)
        criteria_info.append((
            "relax_task",
            {
                "metadata.occ_key": ref.occ_key,
                "metadata.calculator": ref.calc_spec.calculator,
                "metadata.relax_type": ref.calc_spec.relax_type.value,
                "metadata.spin_type": ref.calc_spec.spin_type.value,
                "metadata.frozen_sublattices": ref.calc_spec.frozen_sublattices,
            }
        ))

    # Preview counts for all criteria
    for label, criteria in criteria_info:
        count = js.count(criteria)
        print(f"{label} -> {count} document(s) match criteria {criteria}")

    if dry_run:
        print("Dry run only; no deletions performed.")
        return

    # Perform deletions
    for label, criteria in criteria_info:
        pre_count = js.count(criteria)
        if pre_count:
            js.docs_store.remove_docs(criteria)
            post_count = js.count(criteria)
            print(f"{label}: deleted {pre_count - post_count}, remaining {post_count}")
        else:
            print(f"{label}: nothing to delete.")

    print("Deletion complete.")

delete_phaseedge_entries(ce_key="cc4deb3c2ff88cfef40a9a8bb7954c1239dfb5310d4a00c0cdc9b5c556c8527d", dry_run=True)

ValueError: No CEModelDoc found for ce_key='cc4deb3c2ff88cfef40a9a8bb7954c1239dfb5310d4a00c0cdc9b5c556c8527d'.

In [1]:
from typing import Any
from phaseedge.storage.store import get_jobstore
from phaseedge.schemas.calc_spec import CalcSpec


def debug_energy_lookup(
    *,
    occ_key: str,
    calc_spec: CalcSpec,
    limit: int = 5,
) -> None:
    """
    Inspect why lookup_total_energy_eV is (not) finding an existing relaxation.

    Prints counts for progressively stricter criteria and shows the metadata
    for any matching docs.
    """
    js = get_jobstore()

    def _spin_value(spin: Any) -> Any:
        # Handle Enum[str] vs plain str
        return getattr(spin, "value", spin)

    # Full criteria (what lookup_total_energy_eV is using now)
    full_criteria: dict[str, Any] = {
        "metadata.occ_key": occ_key,
        "metadata.calculator": calc_spec.calculator,
        "metadata.relax_type": calc_spec.relax_type.value,
        "metadata.spin_type": _spin_value(calc_spec.spin_type),
        "metadata.max_force_eV_per_A": calc_spec.max_force_eV_per_A,
        "metadata.frozen_sublattices": calc_spec.frozen_sublattices,
        "output.output.energy": {"$exists": True},
    }

    # Drop spin_type
    no_spin_criteria = dict(full_criteria)
    no_spin_criteria.pop("metadata.spin_type", None)

    # Drop spin_type and max_force
    core_criteria: dict[str, Any] = {
        "metadata.occ_key": occ_key,
    }

    print("Counts:")
    print(f"  core (no spin, no max_force): {js.count(core_criteria)}")
    print(f"  no_spin (with max_force):     {js.count(no_spin_criteria)}")
    print(f"  full (with spin + max_force): {js.count(full_criteria)}")

    print("\nDocs matching core criteria (up to {limit}):")
    for i, doc in enumerate(
        js.docs_store.query(
            criteria=core_criteria,
            properties={"_id": 1, "metadata": 1},
        )
    ):
        if i >= limit:
            break
        md = doc.get("metadata", {})
        print(
            f"  _id={doc.get('_id')} | "
            f"occ_key={str(md.get('occ_key'))[:12]}... | "
            f"calculator={md.get('calculator')} | "
            f"relax_type={md.get('relax_type')} | "
            f"spin_type={md.get('spin_type', '<missing>')} | "
            f"maxF={md.get('max_force_eV_per_A', '<missing>')} | "
            f"frozen_sublattices={md.get('frozen_sublattices')!r}"
        )


from phaseedge.schemas.calc_spec import RelaxType, SpinType

calc_spec = CalcSpec(
    calculator="vasp-mp-24",
    relax_type=RelaxType.FULL,
    frozen_sublattices="",
    max_force_eV_per_A=0.02,
    spin_type=SpinType.NONMAGNETIC,
)

debug_energy_lookup(
    occ_key="28cd7f89f6fdfbcf382a6d01c6b11582c1f22f334c194049f3c8d02830f3df31",
    calc_spec=calc_spec,
)

  from .autonotebook import tqdm as notebook_tqdm


Counts:
  core (no spin, no max_force): 4
  no_spin (with max_force):     1
  full (with spin + max_force): 1

Docs matching core criteria (up to {limit}):
  _id=690bc9e053e6a4f48f9d2595 | occ_key=28cd7f89f6fd... | calculator=MACE-MPA-0 | relax_type=full | spin_type=nonmagnetic | maxF=0.02 | frozen_sublattices=''
  _id=690bc9e253e6a4f48f9d2a22 | occ_key=28cd7f89f6fd... | calculator=MACE-MPA-0 | relax_type=full | spin_type=nonmagnetic | maxF=0.02 | frozen_sublattices=''
  _id=690bf92653e6a4f48fe79f44 | occ_key=28cd7f89f6fd... | calculator=vasp-mp-gga | relax_type=full | spin_type=nonmagnetic | maxF=0.02 | frozen_sublattices=''
  _id=690c2f5c53e6a4f48f43530a | occ_key=28cd7f89f6fd... | calculator=vasp-mp-24 | relax_type=full | spin_type=nonmagnetic | maxF=0.02 | frozen_sublattices=''
