In [1]:
import numpy as np
import pandas as pd

# --- 1) SPARC table2 URL (CDS) ---
PATH_TABLE2 = "https://cdsarc.cds.unistra.fr/ftp/J/AJ/152/157/table2.dat"

# --- 2) DATA LOADING (SPARC) ---
def load_sparc_data():
    print("Downloading SPARC catalog table2.dat ...")
    cols = [(0, 11), (19, 25), (26, 32), (33, 38), (39, 45), (46, 52), (53, 59)]
    names = ['Name', 'Rad', 'Vobs', 'e_Vobs', 'Vgas', 'Vdisk', 'Vbul']
    df = pd.read_fwf(PATH_TABLE2, colspecs=cols, header=None, names=names, comment='#')

    # to numeric
    for c in names[1:]:
        df[c] = pd.to_numeric(df[c], errors='coerce')

    # clean
    df = df.dropna(subset=['Name', 'Rad', 'Vobs', 'e_Vobs', 'Vgas', 'Vdisk'])
    df = df[df['Rad'] > 0]
    # If Vbul is missing, treat as 0
    df['Vbul'] = df['Vbul'].fillna(0.0)

    # Ensure nonzero baryon signal
    df = df[(df['Vgas']**2 + df['Vdisk']**2 + df['Vbul']**2) > 0]
    df = df.reset_index(drop=True)
    print(f"Loaded {len(df)} rows from table2.dat across {df['Name'].nunique()} galaxies.")
    return df

# --- 3) MOND acceleration a(a_N) for standard choices of mu(x) ---
def mond_acc_from_aN(aN, a0, mu_form="simple"):
    """
    Inputs:
      aN: Newtonian acceleration from baryons [m/s^2], ndarray
      a0: MOND acceleration scale [m/s^2]
      mu_form: 'simple' or 'standard'
    Returns:
      a: MOND true acceleration [m/s^2]
    """
    aN = np.asarray(aN, dtype=float)
    aN = np.maximum(aN, 0.0)

    if mu_form.lower() == "simple":
        # mu(x) = x/(1+x)
        # Solve: (a/(a+a0)) * a = aN  -> a^2 - aN a - aN a0 = 0
        return 0.5 * (aN + np.sqrt(aN*aN + 4.0*aN*a0))

    if mu_form.lower() == "standard":
        # mu(x) = x/sqrt(1+x^2)
        # Solve: (a/sqrt(a^2+a0^2))*a = aN
        # Let z=a^2: z^2 = aN^2 (z + a0^2) -> z^2 - aN^2 z - aN^2 a0^2=0
        z = 0.5 * (aN*aN + np.sqrt(aN**4 + 4.0*(aN*aN)*(a0*a0)))
        return np.sqrt(np.maximum(z, 0.0))

    raise ValueError("mu_form must be 'simple' or 'standard'")

# --- 4) Compute MOND predicted V(r) from SPARC baryon components with fixed params ---
def compute_mond_predictions(df, a0=1.2e-10, mu_form="simple",
                             ups_disk=0.7, ups_bul=0.5):
    """
    df columns: Rad [kpc], Vgas/Vdisk/Vbul [km/s]
    Returns a copy df_out with Vb, aN, aMOND, Vpred
    """
    df_out = df.copy()

    # baryonic circular speed squared (km/s)^2
    Vb2 = (df_out['Vgas'].to_numpy()**2
           + ups_disk * df_out['Vdisk'].to_numpy()**2
           + ups_bul  * df_out['Vbul'].to_numpy()**2)

    Vb = np.sqrt(np.maximum(Vb2, 0.0))  # km/s
    df_out['Vb'] = Vb

    # Units: convert r[kpc] -> m, V[km/s] -> m/s
    kpc_to_m = 3.0856775814913673e19
    r_m = df_out['Rad'].to_numpy() * kpc_to_m
    Vb_ms = Vb * 1e3

    # Newtonian acceleration from baryons: aN = Vb^2 / r
    aN = (Vb_ms**2) / r_m
    df_out['aN'] = aN

    # MOND acceleration
    a = mond_acc_from_aN(aN, a0=a0, mu_form=mu_form)
    df_out['aMOND'] = a

    # Predicted speed: V^2 = a r
    Vpred_ms = np.sqrt(np.maximum(a * r_m, 0.0))
    df_out['Vpred'] = Vpred_ms / 1e3  # km/s

    return df_out

# --- 5) Metrics: weighted RMSE and chi2 (global + per-galaxy) ---
def evaluate_fit(df_pred, sigma0=5.0):
    """
    df_pred must include Vobs, e_Vobs, Vpred, Name
    sigma0: error floor in km/s
    Returns dict + per-galaxy table
    """
    Vobs = df_pred['Vobs'].to_numpy(dtype=float)
    Verr = df_pred['e_Vobs'].to_numpy(dtype=float)
    Vpred = df_pred['Vpred'].to_numpy(dtype=float)

    sigma = np.sqrt(np.maximum(Verr, 0.0)**2 + sigma0**2)
    w = 1.0 / (sigma**2)

    resid = Vobs - Vpred

    chi2 = np.sum((resid**2) * w)
    npts = len(df_pred)
    chi2_nu = chi2 / npts  # no fitted params in this locked protocol

    rmse_w = np.sqrt(np.sum(w * resid**2) / np.sum(w))
    rmse = np.sqrt(np.mean(resid**2))

    # per-galaxy metrics (equal-galaxy weighting option)
    rows = []
    for name, g in df_pred.groupby('Name'):
        Vobs_g = g['Vobs'].to_numpy(float)
        Verr_g = g['e_Vobs'].to_numpy(float)
        Vpred_g = g['Vpred'].to_numpy(float)

        sigma_g = np.sqrt(np.maximum(Verr_g, 0.0)**2 + sigma0**2)
        w_g = 1.0 / (sigma_g**2)
        resid_g = Vobs_g - Vpred_g

        chi2_g = np.sum(resid_g**2 * w_g)
        n_g = len(g)
        rmse_w_g = np.sqrt(np.sum(w_g * resid_g**2) / np.sum(w_g))
        rmse_g = np.sqrt(np.mean(resid_g**2))

        rows.append((name, n_g, rmse_w_g, rmse_g, chi2_g / n_g))

    per_gal = pd.DataFrame(rows, columns=['Name', 'Npts', 'RMSE_w', 'RMSE', 'chi2_nu'])
    per_gal = per_gal.sort_values('RMSE_w').reset_index(drop=True)

    summary = {
        "N_rows": npts,
        "N_galaxies": int(df_pred['Name'].nunique()),
        "RMSE_unweighted_kms": float(rmse),
        "RMSE_weighted_kms": float(rmse_w),
        "chi2": float(chi2),
        "chi2_nu": float(chi2_nu),
        "per_gal_median_RMSEw": float(per_gal['RMSE_w'].median()),
        "per_gal_median_chi2nu": float(per_gal['chi2_nu'].median()),
    }
    return summary, per_gal

# --- 6) Run ---
if __name__ == "__main__":
    # Your locked protocol knobs
    sigma0 = 5.0
    ups_disk = 0.7
    ups_bul = 0.5

    # MOND knobs (to be fixed globally)
    a0 = 1.2e-10          # m/s^2 (common canonical value)
    mu_form = "simple"    # "simple" or "standard"

    df = load_sparc_data()

    df_pred = compute_mond_predictions(
        df,
        a0=a0,
        mu_form=mu_form,
        ups_disk=ups_disk,
        ups_bul=ups_bul
    )

    summary, per_gal = evaluate_fit(df_pred, sigma0=sigma0)

    print("\n=== GLOBAL (all points) ===")
    for k, v in summary.items():
        print(f"{k:>24s}: {v}")

    print("\n=== PER-GALAXY (first 10 best by RMSE_w) ===")
    print(per_gal.head(10).to_string(index=False))

    print("\n=== PER-GALAXY (first 10 worst by RMSE_w) ===")
    print(per_gal.tail(10).to_string(index=False))

    # Optional: save detailed outputs
    df_pred.to_csv("sparc_table2_mond_predictions.csv", index=False)
    per_gal.to_csv("sparc_table2_mond_per_gal_metrics.csv", index=False)
    print("\nWrote: sparc_table2_mond_predictions.csv, sparc_table2_mond_per_gal_metrics.csv")

Downloading SPARC catalog table2.dat ...
Loaded 3391 rows from table2.dat across 175 galaxies.

=== GLOBAL (all points) ===
                  N_rows: 3391
              N_galaxies: 175
     RMSE_unweighted_kms: 26.253607079900476
       RMSE_weighted_kms: 24.055991498812105
                    chi2: 41688.09017638727
                 chi2_nu: 12.293745259919572
    per_gal_median_RMSEw: 15.464743752395258
   per_gal_median_chi2nu: 4.300165104023484

=== PER-GALAXY (first 10 best by RMSE_w) ===
    Name  Npts   RMSE_w     RMSE  chi2_nu
UGC08550    11 2.430690 2.418872 0.200135
 NGC3741    21 3.173503 3.182352 0.332298
UGC06399     9 3.664253 4.022950 0.274045
 UGCA442     8 4.117181 4.063377 0.579467
UGC00634     4 4.391146 5.018651 0.632678
  DDO064    14 4.547319 4.849299 0.416579
 NGC3109    25 4.628394 4.521665 0.655033
 UGCA281     7 4.792608 4.773706 0.811704
 NGC0300    25 4.885219 4.480684 0.451042
UGC07603    12 5.124706 5.124706 0.787435

=== PER-GALAXY (first 10 worst by RMSE

In [2]:
import numpy as np
import pandas as pd

# --- 1) SPARC table2 URL (CDS) ---
PATH_TABLE2 = "https://cdsarc.cds.unistra.fr/ftp/J/AJ/152/157/table2.dat"

# --- 2) DATA LOADING (SPARC) ---
def load_sparc_data():
    print("Downloading SPARC catalog table2.dat ...")
    cols = [(0, 11), (19, 25), (26, 32), (33, 38), (39, 45), (46, 52), (53, 59)]
    names = ['Name', 'Rad', 'Vobs', 'e_Vobs', 'Vgas', 'Vdisk', 'Vbul']
    df = pd.read_fwf(PATH_TABLE2, colspecs=cols, header=None, names=names, comment='#')

    for c in names[1:]:
        df[c] = pd.to_numeric(df[c], errors='coerce')

    df = df.dropna(subset=['Name', 'Rad', 'Vobs', 'e_Vobs', 'Vgas', 'Vdisk'])
    df = df[df['Rad'] > 0]
    df['Vbul'] = df['Vbul'].fillna(0.0)
    df = df[(df['Vgas']**2 + df['Vdisk']**2 + df['Vbul']**2) > 0]
    df = df.reset_index(drop=True)

    print(f"Loaded {len(df)} rows from table2.dat across {df['Name'].nunique()} galaxies.")
    return df

# --- 3) MOND acceleration a(a_N) for standard choices of mu(x) ---
def mond_acc_from_aN(aN, a0, mu_form="simple"):
    aN = np.asarray(aN, dtype=float)
    aN = np.maximum(aN, 0.0)

    if mu_form.lower() == "simple":
        # mu(x)=x/(1+x): a^2 - aN a - aN a0 = 0
        return 0.5 * (aN + np.sqrt(aN*aN + 4.0*aN*a0))

    if mu_form.lower() == "standard":
        # mu(x)=x/sqrt(1+x^2): solve for z=a^2: z^2 - aN^2 z - aN^2 a0^2=0
        z = 0.5 * (aN*aN + np.sqrt(aN**4 + 4.0*(aN*aN)*(a0*a0)))
        return np.sqrt(np.maximum(z, 0.0))

    raise ValueError("mu_form must be 'simple' or 'standard'")

# --- 4) Shared: compute baryonic circular speed Vb from SPARC components ---
def compute_Vb(df, ups_disk=0.7, ups_bul=0.5):
    Vb2 = (df['Vgas'].to_numpy()**2
           + ups_disk * df['Vdisk'].to_numpy()**2
           + ups_bul  * df['Vbul'].to_numpy()**2)
    Vb = np.sqrt(np.maximum(Vb2, 0.0))  # km/s
    return Vb

# --- 5) MOND predicted V(r) ---
def compute_mond_predictions(df, a0=1.2e-10, mu_form="simple",
                             ups_disk=0.7, ups_bul=0.5):
    df_out = df.copy()
    Vb = compute_Vb(df_out, ups_disk=ups_disk, ups_bul=ups_bul)
    df_out['Vb'] = Vb

    kpc_to_m = 3.0856775814913673e19
    r_m = df_out['Rad'].to_numpy() * kpc_to_m
    Vb_ms = Vb * 1e3

    aN = (Vb_ms**2) / r_m
    df_out['aN'] = aN

    a = mond_acc_from_aN(aN, a0=a0, mu_form=mu_form)
    df_out['aMOND'] = a

    Vpred_ms = np.sqrt(np.maximum(a * r_m, 0.0))
    df_out['Vpred_MOND'] = Vpred_ms / 1e3  # km/s
    return df_out

# --- 6) YOUR model: compute a_Mach from your chain of definitions ---
def compute_a_mach(
    T0=2.7255,                 # K
    alpha=7.29735e-3,          # dimensionless
    G=6.674e-11,               # m^3 kg^-1 s^-2
    c=2.99792e8,               # m/s
    sigma_SB=5.67037e-8,       # W m^-2 K^-4
    Omega_pot=2.0/3.0          # dimensionless
):
    """
    Implements exactly your definitions (SI units throughout):

      rho_gamma = 4 sigma_SB T0^4 / c^3
      rho_max   = rho_gamma / alpha^2
      rho_L     = Omega_pot * rho_max  (with Omega_pot = 2/3)
      H0        = sqrt(4 pi G rho_L)
      a_Mach    = c H0 / (3 pi)

    Returns:
      a_Mach [m/s^2], plus a dict of intermediate values for debugging.
    """
    rho_gamma = (4.0 * sigma_SB * (T0**4)) / (c**3)        # kg/m^3
    rho_max   = rho_gamma / (alpha**2)                     # kg/m^3
    rho_L     = Omega_pot * rho_max                        # kg/m^3
    H0        = np.sqrt(4.0 * np.pi * G * rho_L)           # 1/s
    a_Mach    = (c * H0) / (3.0 * np.pi)                   # m/s^2

    dbg = {
        "rho_gamma_kg_m3": rho_gamma,
        "rho_max_kg_m3": rho_max,
        "rho_Lambda_kg_m3": rho_L,
        "H0_s^-1": H0,
        "a_Mach_m_s2": a_Mach
    }
    return a_Mach, dbg

# --- 7) YOUR model: V_Q = sqrt(vbar^2 + sqrt(vbar^2 * a_Mach * r)) ---
def compute_Q_predictions(df, ups_disk=0.7, ups_bul=0.5,
                          sigma0=5.0,
                          T0=2.7255, alpha=7.29735e-3, G=6.674e-11,
                          c=2.99792e8, sigma_SB=5.67037e-8,
                          Omega_pot=2.0/3.0):
    df_out = df.copy()
    Vb = compute_Vb(df_out, ups_disk=ups_disk, ups_bul=ups_bul)
    df_out["Vb"] = Vb

    a_Mach, dbg = compute_a_mach(T0=T0, alpha=alpha, G=G, c=c,
                                 sigma_SB=sigma_SB, Omega_pot=Omega_pot)

    kpc_to_m = 3.0856775814913673e19
    r_m = df_out["Rad"].to_numpy() * kpc_to_m
    vbar_ms = Vb * 1e3

    # V_Q = sqrt(vbar^2 + sqrt(vbar^2 * a_Mach * r))
    term = np.sqrt(np.maximum((vbar_ms**2) * a_Mach * r_m, 0.0))  # m^2/s^2
    VQ_ms = np.sqrt(np.maximum(vbar_ms**2 + term, 0.0))           # m/s
    df_out["Vpred_Q"] = VQ_ms / 1e3                               # km/s

    return df_out, dbg

# --- 8) Metrics (generic: choose which prediction column to evaluate) ---
def evaluate_fit(df_pred, pred_col, sigma0=5.0):
    Vobs = df_pred['Vobs'].to_numpy(dtype=float)
    Verr = df_pred['e_Vobs'].to_numpy(dtype=float)
    Vpred = df_pred[pred_col].to_numpy(dtype=float)

    sigma = np.sqrt(np.maximum(Verr, 0.0)**2 + sigma0**2)
    w = 1.0 / (sigma**2)

    resid = Vobs - Vpred

    chi2 = np.sum((resid**2) * w)
    npts = len(df_pred)
    chi2_nu = chi2 / npts  # locked protocol: no fitted params

    rmse_w = np.sqrt(np.sum(w * resid**2) / np.sum(w))
    rmse = np.sqrt(np.mean(resid**2))

    # per-galaxy metrics
    rows = []
    for name, g in df_pred.groupby('Name'):
        Vobs_g = g['Vobs'].to_numpy(float)
        Verr_g = g['e_Vobs'].to_numpy(float)
        Vpred_g = g[pred_col].to_numpy(float)

        sigma_g = np.sqrt(np.maximum(Verr_g, 0.0)**2 + sigma0**2)
        w_g = 1.0 / (sigma_g**2)
        resid_g = Vobs_g - Vpred_g

        chi2_g = np.sum(resid_g**2 * w_g)
        n_g = len(g)
        rmse_w_g = np.sqrt(np.sum(w_g * resid_g**2) / np.sum(w_g))
        rmse_g = np.sqrt(np.mean(resid_g**2))

        rows.append((name, n_g, rmse_w_g, rmse_g, chi2_g / n_g))

    per_gal = pd.DataFrame(rows, columns=['Name', 'Npts', 'RMSE_w', 'RMSE', 'chi2_nu'])
    per_gal = per_gal.sort_values('RMSE_w').reset_index(drop=True)

    summary = {
        "pred_col": pred_col,
        "N_rows": npts,
        "N_galaxies": int(df_pred['Name'].nunique()),
        "RMSE_unweighted_kms": float(rmse),
        "RMSE_weighted_kms": float(rmse_w),
        "chi2": float(chi2),
        "chi2_nu": float(chi2_nu),
        "per_gal_median_RMSEw": float(per_gal['RMSE_w'].median()),
        "per_gal_median_chi2nu": float(per_gal['chi2_nu'].median()),
    }
    return summary, per_gal

# --- 9) Run both models under the same locked protocol ---
if __name__ == "__main__":
    # Locked protocol knobs (yours)
    sigma0 = 5.0
    ups_disk = 0.7
    ups_bul = 0.5

    # MOND global knobs (fixed; choose once)
    a0 = 1.2e-10
    mu_form = "simple"  # or "standard"

    df = load_sparc_data()

    # MOND predictions
    df_m = compute_mond_predictions(df, a0=a0, mu_form=mu_form,
                                    ups_disk=ups_disk, ups_bul=ups_bul)
    summary_m, pergal_m = evaluate_fit(df_m, pred_col="Vpred_MOND", sigma0=sigma0)

    # Your predictions (Q)
    df_q, dbg = compute_Q_predictions(df, ups_disk=ups_disk, ups_bul=ups_bul,
                                      sigma0=sigma0)

    summary_q, pergal_q = evaluate_fit(df_q, pred_col="Vpred_Q", sigma0=sigma0)

    print("\n=== Constants chain for your a_Mach (SI) ===")
    for k, v in dbg.items():
        print(f"{k:>18s}: {v:.6e}")

    print("\n=== MOND (locked protocol) ===")
    for k, v in summary_m.items():
        print(f"{k:>24s}: {v}")

    print("\n=== YOUR MODEL (locked protocol) ===")
    for k, v in summary_q.items():
        print(f"{k:>24s}: {v}")

    print("\n=== PER-GALAXY (MOND best 10 by RMSE_w) ===")
    print(pergal_m.head(10).to_string(index=False))

    print("\n=== PER-GALAXY (MOND worst 10 by RMSE_w) ===")
    print(pergal_m.tail(10).to_string(index=False))

    print("\n=== PER-GALAXY (YOUR MODEL best 10 by RMSE_w) ===")
    print(pergal_q.head(10).to_string(index=False))

    print("\n=== PER-GALAXY (YOUR MODEL worst 10 by RMSE_w) ===")
    print(pergal_q.tail(10).to_string(index=False))

    # Save outputs
    df_out = df_m.copy()
    df_out["Vpred_Q"] = df_q["Vpred_Q"].to_numpy()
    df_out.to_csv("sparc_table2_predictions_MOND_vs_Q.csv", index=False)
    pergal_m.to_csv("sparc_pergal_MOND_metrics.csv", index=False)
    pergal_q.to_csv("sparc_pergal_Q_metrics.csv", index=False)

    print("\nWrote:")
    print("  sparc_table2_predictions_MOND_vs_Q.csv")
    print("  sparc_pergal_MOND_metrics.csv")
    print("  sparc_pergal_Q_metrics.csv")

Downloading SPARC catalog table2.dat ...
Loaded 3391 rows from table2.dat across 175 galaxies.

=== Constants chain for your a_Mach (SI) ===
   rho_gamma_kg_m3: 4.645110e-31
     rho_max_kg_m3: 8.722996e-27
  rho_Lambda_kg_m3: 5.815331e-27
           H0_s^-1: 2.208438e-18
       a_Mach_m_s2: 7.024803e-11

=== MOND (locked protocol) ===
                pred_col: Vpred_MOND
                  N_rows: 3391
              N_galaxies: 175
     RMSE_unweighted_kms: 26.253607079900476
       RMSE_weighted_kms: 24.055991498812105
                    chi2: 41688.09017638727
                 chi2_nu: 12.293745259919572
    per_gal_median_RMSEw: 15.464743752395258
   per_gal_median_chi2nu: 4.300165104023484

=== YOUR MODEL (locked protocol) ===
                pred_col: Vpred_Q
                  N_rows: 3391
              N_galaxies: 175
     RMSE_unweighted_kms: 28.042623524056996
       RMSE_weighted_kms: 25.634662567724778
                    chi2: 47339.17517629798
                 chi2_nu: 13.