Assuming OD sample data has been previously pulled using `pull_od_sample_data.ipynb` notebook,
this notebook collects and visualizes statistics for it.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pyarrow as pa
import pyarrow.compute as pc
import quivr as qv
from pathlib import Path

from adam_assist import ASSISTPropagator
from adam_core.orbits.orbits import Orbits
from adam_core.orbit_determination import evaluate_orbits, mpc_to_od_observations
from mpcq import MPCObservations
from mpcq.orbits import MPCOrbits

In [None]:
data_dir = Path("../data/orbit_fit_eval")
mpc_observations = MPCObservations.from_parquet(data_dir / "mpc_observations.parquet")
mpc_orbits = MPCOrbits.from_parquet(data_dir / "mpc_orbits.parquet")
sbdb_orbits = Orbits.from_parquet(data_dir / "sbdb_orbits.parquet")
neocc_orbits = Orbits.from_parquet(data_dir / "neocc_orbits.parquet")

In [None]:
class ObservationStatistics(qv.Table):
    object_id = qv.LargeStringColumn() # requested_provid
    observation_count = qv.Int64Column(nullable=True)
    with_uncertainty_count = qv.Int64Column(nullable=True)
    arc_length = qv.Float64Column(nullable=True)
    # STN names and counts are sorted in descending counts order
    stn_names = qv.ListColumn(pa.large_string(), nullable=True)
    stn_counts = qv.ListColumn(pa.int64(), nullable=True)
    dynamical_class = qv.StringColumn(nullable=True)
    mpc_rchi2 = qv.Float64Column(nullable=True)
    sbdb_rchi2 = qv.Float64Column(nullable=True)
    sbdb_name = qv.LargeStringColumn(nullable=True)
    neocc_rchi2 = qv.Float64Column(nullable=True)
    neocc_name = qv.LargeStringColumn(nullable=True)

    @classmethod
    def from_observations_and_orbits(cls, obs: MPCObservations, mpc_orbits: Orbits, sbdb_orbits: Orbits, neocc_orbits: Orbits):
        object_id = obs.requested_provid.unique()
        stns = []
        counts = []
        with_unc = []
        arc_len = []
        dynamical_class = []
        mpc_rchi2 = []
        sbdb_rchi2 = []
        sbdb_names = []
        neocc_rchi2 = []
        neocc_names = []
        propagator = ASSISTPropagator()

        # Remove any nulls
        mpc_orbits = mpc_orbits.apply_mask(pc.is_valid(mpc_orbits.object_id))

        for oid in object_id:
            oid = oid.as_py()
            subset = obs.apply_mask(pc.equal(obs.requested_provid, oid))
            counts.append(len(subset))
            have_uncertainty = (~np.isnan(subset.rmsra)) & (~np.isnan(subset.rmsdec))
            with_unc.append(np.sum(have_uncertainty))

            times = subset.obstime.mjd().to_numpy()
            arc_len.append(times.max() - times.min())

            stns.append(pc.value_counts(subset.stn).sort(order='descending', by='counts'))
            bad_stn = not np.all(subset.stn)
            if bad_stn:
                print(f"Null in STN for object {oid}")

            observations = mpc_to_od_observations(subset)
            dclass = None

            orb = mpc_orbits.apply_mask(pc.equal(mpc_orbits.object_id, oid))
            if orb and not bad_stn:
                fitted_orbit, _ = evaluate_orbits(orb, observations, propagator)
                mpc_rchi2.append(fitted_orbit.reduced_chi2[0])
                dclass = dclass or orb.dynamical_class()[0]
            else:
                mpc_rchi2.append(None)

            orb = sbdb_orbits.apply_mask(pc.match_substring(sbdb_orbits.object_id, oid))
            if orb and not bad_stn:
                fitted_orbit, _ = evaluate_orbits(orb, observations, propagator)
                sbdb_rchi2.append(fitted_orbit.reduced_chi2[0])
                sbdb_names.append(orb.object_id[0])
                dclass = dclass or orb.dynamical_class()[0]
            else:
                sbdb_rchi2.append(None)
                sbdb_names.append(None)

            orb = neocc_orbits.apply_mask(pc.equal(neocc_orbits.object_id, oid.replace(" ", "")))
            if orb and not bad_stn:
                fitted_orbit, _ = evaluate_orbits(orb, observations, propagator)
                neocc_rchi2.append(fitted_orbit.reduced_chi2[0])
                neocc_names.append(orb.object_id[0])
                dclass = dclass or orb.dynamical_class()[0]
            else:
                neocc_rchi2.append(None)
                neocc_names.append(None)

            dynamical_class.append(dclass)

        return cls.from_kwargs(
            object_id = object_id,
            observation_count = counts,
            with_uncertainty_count = with_unc,
            arc_length = arc_len,
            stn_names = [stn.field("values") for stn in stns],
            stn_counts = [stn.field("counts") for stn in stns],
            dynamical_class = dynamical_class,
            mpc_rchi2 = mpc_rchi2,
            sbdb_rchi2 = sbdb_rchi2,
            sbdb_name = sbdb_names,
            neocc_rchi2 = neocc_rchi2,
            neocc_name = neocc_names,
        )

In [None]:
def summary_report(stats: ObservationStatistics) -> None:
    print(f"Number of objects: {len(stats)}")
    for row in stats:
        count_all = row.observation_count[0].as_py()
        count_unc = row.with_uncertainty_count[0].as_py()
        print(f"{row.object_id[0].as_py()}: {count_all} observations, "
              f"with uncertainty {count_unc} ({np.round(count_unc * 100.0 / count_all, 2)}%), "
              f"{len(row.stn_names[0])} stations, "
              f"count per station {row.stn_counts[0][-1]} to {row.stn_counts[0][0]}, "
              f"arc length {np.round(row.arc_length[0].as_py(), 2)}, "
              f"dynamical class {row.dynamical_class[0].as_py()}"
              )
        if pc.is_valid(row.mpc_rchi2[0]):
            print(f"   MPC orbit available, rchi2={row.mpc_rchi2[0]}")
        else:
            print(f"   No MPC orbit available")
        if pc.is_valid(row.sbdb_rchi2[0]):
            print(f"   SBDB orbit available [{row.sbdb_name[0]}], rchi2={row.sbdb_rchi2[0]}")
        else:
            print(f"   No SBDB orbit available")
        if pc.is_valid(row.neocc_rchi2[0]):
            print(f"   NEOCC orbit available [{row.neocc_name[0]}], rchi2={row.neocc_rchi2[0]}")
        else:
            print(f"   No NEOCC orbit available")

def detailed_report(object_id: str, obs: MPCObservations, mpc_orbits: Orbits, sbdb_orbits: Orbits, neocc_orbits: Orbits) -> None:
    # Subset all for our object_id
    obs = obs.apply_mask(pc.equal(obs.requested_provid, object_id))
    mpc_orbits = mpc_orbits.apply_mask(pc.is_valid(mpc_orbits.object_id))
    mpc_orbits = mpc_orbits.apply_mask(pc.equal(mpc_orbits.object_id, object_id))
    sbdb_orbits = sbdb_orbits.apply_mask(pc.match_substring(sbdb_orbits.object_id, object_id))
    neocc_orbits = neocc_orbits.apply_mask(pc.equal(neocc_orbits.object_id, object_id.replace(" ", "")))

    # Compute counts per STN, with and without uncertainty info
    stn_all_count = pc.value_counts(obs.stn).sort(order='descending', by='counts').to_pylist()
    stn_all_count = {p['values']: p['counts'] for p in stn_all_count}
    obs_with_uncertainty = obs.apply_mask((~np.isnan(obs.rmsra)) & (~np.isnan(obs.rmsdec)))
    stn_unc_count = pc.value_counts(obs_with_uncertainty.stn).to_pylist()
    stn_unc_count = {p['values']: p['counts'] for p in stn_unc_count}
    stn_with_unc = [stn_unc_count.get(key, 0) for key in stn_all_count.keys()]
    stn_no_unc = [a-b for a,b in zip(stn_all_count.values(), stn_with_unc)]

    count_with_uncertainty = len(obs_with_uncertainty)
    times = obs.obstime.mjd().to_numpy()
    print(f"{object_id}: {len(obs)} observations, "
          f"with uncertainty {count_with_uncertainty} ({np.round(count_with_uncertainty * 100.0 / len(obs), 2)}%), "
          f"{len(stn_all_count)} stations, out of which {len(stn_unc_count)} have some uncertainty data, "
          f"arc length {np.round(times.max() - times.min(), 2)}"
    )

    # Various orbits
    observations = mpc_to_od_observations(obs)
    propagator = ASSISTPropagator()

    if mpc_orbits:
        fitted_orbit, fitted_members = evaluate_orbits(mpc_orbits, observations, propagator)
        mpc_rchi2, mpc_residuals = fitted_orbit.reduced_chi2[0], fitted_members.residuals
        mpc_chi2 = mpc_residuals.chi2.to_numpy()
        print(f"MPC orbit dynamical class {mpc_orbits.dynamical_class()[0]}, rchi2 = {mpc_rchi2}, "
              f"range of chi2 {mpc_chi2.min()} - {mpc_chi2.max()}")    
    if sbdb_orbits:
        fitted_orbit, fitted_members = evaluate_orbits(sbdb_orbits, observations, propagator)
        sbdb_rchi2, sbdb_residuals = fitted_orbit.reduced_chi2[0], fitted_members.residuals
        sbdb_chi2 = sbdb_residuals.chi2.to_numpy()
        print(f"SBDB object {sbdb_orbits.object_id[0]}, dynamical class {sbdb_orbits.dynamical_class()[0]}, "
              f"rchi2 = {sbdb_rchi2}, range of chi2 {sbdb_chi2.min()} - {sbdb_chi2.max()}")
    if neocc_orbits:
        fitted_orbit, fitted_members = evaluate_orbits(neocc_orbits, observations, propagator)
        neocc_rchi2, neocc_residuals = fitted_orbit.reduced_chi2[0], fitted_members.residuals
        neocc_chi2 = neocc_residuals.chi2.to_numpy()
        print(f"NEOCC object {neocc_orbits.object_id[0]}, dynamical class {neocc_orbits.dynamical_class()[0]}, "
              f"rchi2 = {neocc_rchi2}, range of chi2 {neocc_chi2.min()} - {neocc_chi2.max()}")

    # Plot it all
    _fig, axs = plt.subplots(2, 2, figsize=(12, 8))
    bottom = np.zeros(len(stn_all_count))
    axs[0, 0].bar(stn_all_count.keys(), stn_no_unc, label="No uncertainty", bottom=bottom)
    bottom += stn_no_unc
    axs[0, 0].bar(stn_all_count.keys(), stn_with_unc, label="With uncertainty", bottom=bottom)
    axs[0, 0].legend(loc="upper right")
    axs[0, 0].set_title("Counts per station")

    if mpc_orbits:
      axs[0, 1].plot(mpc_chi2)
      axs[0, 1].set_title("MPC orbit chi2")
      axs[0, 1].set_xlabel("Observation index")

    if sbdb_orbits:
      axs[1, 0].plot(sbdb_chi2)
      axs[1, 0].set_title("SBDB orbit chi2")
      axs[1, 0].set_xlabel("Observation index")

    if neocc_orbits:
      axs[1, 1].plot(neocc_chi2)
      axs[1, 1].set_title("NEOCC orbit chi2")
      axs[1, 1].set_xlabel("Observation index")

    plt.show()    

In [None]:
object_statistics = ObservationStatistics.from_observations_and_orbits(mpc_observations, mpc_orbits.orbits(), sbdb_orbits, neocc_orbits)
summary_report(object_statistics)

In [None]:
detailed_report(mpc_observations.requested_provid[0].as_py(), mpc_observations, mpc_orbits.orbits(), sbdb_orbits, neocc_orbits)