
# ELAS — Data Import & Conversion (v2, robuste)
Ce notebook **télécharge des jeux de données réels** (Pantheon+ SH0ES, DESI DR2 BAO, Cosmic Chronometers, RSD fσ₈) et les **convertit** en formats normalisés pour les notebooks ELAS.
**Sorties** dans `/content/` :
- `/content/sn_pantheonplus_mu.csv` ; `/content/sn_pantheonplus_cov.npy` (optionnel)
- `/content/bao_desi_dr2_meas.csv` ; `/content/bao_desi_dr2_cov.npy`
- `/content/cc_Hz.csv`
- `/content/rsd_fs8.csv`


## 0) Installations minimales

In [1]:
!pip -q install astropy pandas numpy requests


## 1) Utilitaires

In [2]:
\
import os, re, json, requests, numpy as np, pandas as pd
from io import StringIO
from astropy.io import fits

def wget(url, ofile, chunk=1<<20):
    os.makedirs(os.path.dirname(ofile), exist_ok=True)
    r = requests.get(url, stream=True, timeout=90)
    if r.status_code != 200:
        raise RuntimeError(f"HTTP {r.status_code} for {url}")
    with open(ofile, "wb") as f:
        for ch in r.iter_content(chunk_size=chunk):
            if ch: f.write(ch)
    print("Downloaded:", url, "->", ofile)
    return ofile

def save_csv(df, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    df.to_csv(path, index=False)
    print("Saved", path, "shape", df.shape)

def save_npy(obj, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.save(path, obj)
    print("Saved", path, "shape", np.array(obj).shape)


## 2) Pantheon+ SH0ES (2022) — catalogue & covariance

In [3]:
\
# URLs GitHub "raw" (le '+' doit être encodé en '%2B')
BASE = "https://raw.githubusercontent.com/PantheonPlusSH0ES/DataRelease/main/Pantheon%2B_Data"
URL_CATALOG = f"{BASE}/Pantheon%2BSH0ES.dat"
URL_COV_TXT = f"{BASE}/covmat_Pantheon%2BSH0ES.txt"
URL_COV_FITS = f"{BASE}/covmat_Pantheon%2BSH0ES.fits"

path_catalog = "/content/Pantheon+SH0ES.dat"
path_covtxt  = "/content/covmat_Pantheon+SH0ES.txt"
path_covnpy  = "/content/sn_pantheonplus_cov.npy"
path_csv     = "/content/sn_pantheonplus_mu.csv"

# 2.1 catalogue principal
wget(URL_CATALOG, path_catalog)

# 2.2 parsage ASCII -> zCMB, MU_SH0ES, MU_SH0ES_ERR_DIAG
with open(path_catalog, "r", encoding="utf-8", errors="ignore") as f:
    lines = [ln for ln in f if ln.strip() and not ln.strip().startswith(("#","%","//"))]

# détecter header contenant 'zCMB' et 'MU_SH0ES'
header = None
for ln in lines[:30]:
    if ("zCMB" in ln) and ("MU_SH0ES" in ln):
        header = re.split(r"\s+", ln.strip())
        break
if header is None:
    header = re.split(r"\s+", lines[0].strip())

body = "".join(lines[1:])
df = pd.read_csv(StringIO(body), sep=r"\s+", names=header, engine="python")
need = ["zCMB","MU_SH0ES"]
for c in need:
    if c not in df.columns:
        raise RuntimeError(f"Colonne manquante: {c} dans Pantheon+SH0ES.dat")

err_col = "MU_SH0ES_ERR_DIAG" if "MU_SH0ES_ERR_DIAG" in df.columns else None
out = pd.DataFrame({
    "z": df["zCMB"].astype(float),
    "mu": df["MU_SH0ES"].astype(float),
    "mu_err": df[err_col].astype(float) if err_col else np.nan
})
save_csv(out, path_csv)

# 2.3 covariance (TXT prioritaire, FITS en secours)
got_cov = False
try:
    wget(URL_COV_TXT, path_covtxt)
    cov = np.loadtxt(path_covtxt)
    save_npy(cov, path_covnpy)
    got_cov = True
except Exception as e:
    print("Cov TXT indisponible:", e)

if not got_cov:
    try:
        path_covfits = "/content/covmat_Pantheon+SH0ES.fits"
        wget(URL_COV_FITS, path_covfits)
        cov = fits.getdata(path_covfits)
        save_npy(cov, path_covnpy)
        got_cov = True
    except Exception as e:
        print("Cov FITS indisponible:", e)
        print("NOTE: vous pouvez fournir manuellement la covariance complète si besoin.")


RuntimeError: HTTP 404 for https://raw.githubusercontent.com/PantheonPlusSH0ES/DataRelease/main/Pantheon%2B_Data/Pantheon%2BSH0ES.dat

## 3) DESI DR2 — BAO summaries (2025)

In [4]:
\
# Liste de fichiers candidats (noms officiels courants)
BAO_BASE = "https://data.desi.lbl.gov/public/release/dr2/baosummaries"
CANDIDATES = [
    "DESI_DR2_BAO_BGS.fits",
    "DESI_DR2_BAO_LRG.fits",
    "DESI_DR2_BAO_ELG.fits",
    "DESI_DR2_BAO_QSO.fits",
    "DESI_DR2_BAO_LYA.fits",
    "DESI_DR2_BAO_LYAxQSO.fits",
]

out_meas = "/content/bao_desi_dr2_meas.csv"
out_cov  = "/content/bao_desi_dr2_cov.npy"

rows = []
cov_blocks = []
for name in CANDIDATES:
    url = f"{BAO_BASE}/{name}"
    loc = f"/content/{name}"
    try:
        wget(url, loc)
        with fits.open(loc) as hdul:
            tbl = hdul[1].data
            cols = [c.upper() for c in tbl.columns.names]
            def pick(*cands):
                for c in cands:
                    if c in cols: return c
                return None
            c_z  = pick("Z_EFF","Z")
            c_dm = pick("DM_OVER_RD","DM_over_rd","DMRD")
            c_dh = pick("DH_OVER_RD","DH_over_rd","DHRD")
            if not (c_z and c_dm and c_dh):
                raise RuntimeError("Colonnes BAO manquantes")
            z  = np.atleast_1d(tbl[c_z]).astype(float)
            dm = np.atleast_1d(tbl[c_dm]).astype(float)
            dh = np.atleast_1d(tbl[c_dh]).astype(float)
            for i in range(len(z)):
                rows.append({"tracer": name.replace(".fits",""), "z": z[i], "DM_over_rd": dm[i], "DH_over_rd": dh[i]})
            # covariance: chercher HDU 'COV'/'COVMAT' ou colonne
            C = None
            for h in hdul:
                if getattr(h, "name", "").upper() in ("COV","COVMAT"):
                    C = np.array(h.data, dtype=float); break
            if C is None and "COV" in cols:
                C = np.array(tbl["COV"][0], dtype=float)
            if C is not None:
                cov_blocks.append(C)
    except Exception as e:
        print("Skip", name, ":", e)

import pandas as pd, numpy as np, os
if rows:
    df = pd.DataFrame(rows)
    save_csv(df, out_meas)
else:
    print("Aucune mesure BAO récupérée (vérifiez que les URLs sont accessibles depuis Colab).")

if cov_blocks:
    sizes = [c.shape[0] for c in cov_blocks]
    N = sum(sizes); Cglob = np.zeros((N,N)); i=0
    for C in cov_blocks:
        s=C.shape[0]; Cglob[i:i+s, i:i+s] = C; i += s
    save_npy(Cglob, out_cov)
else:
    print("Aucune covariance BAO trouvée dans les HDU; fournissez-la si nécessaire.")


Skip DESI_DR2_BAO_BGS.fits : HTTP 404 for https://data.desi.lbl.gov/public/release/dr2/baosummaries/DESI_DR2_BAO_BGS.fits
Skip DESI_DR2_BAO_LRG.fits : HTTP 404 for https://data.desi.lbl.gov/public/release/dr2/baosummaries/DESI_DR2_BAO_LRG.fits
Skip DESI_DR2_BAO_ELG.fits : HTTP 404 for https://data.desi.lbl.gov/public/release/dr2/baosummaries/DESI_DR2_BAO_ELG.fits
Skip DESI_DR2_BAO_QSO.fits : HTTP 404 for https://data.desi.lbl.gov/public/release/dr2/baosummaries/DESI_DR2_BAO_QSO.fits
Skip DESI_DR2_BAO_LYA.fits : HTTP 404 for https://data.desi.lbl.gov/public/release/dr2/baosummaries/DESI_DR2_BAO_LYA.fits
Skip DESI_DR2_BAO_LYAxQSO.fits : HTTP 404 for https://data.desi.lbl.gov/public/release/dr2/baosummaries/DESI_DR2_BAO_LYAxQSO.fits
Aucune mesure BAO récupérée (vérifiez que les URLs sont accessibles depuis Colab).
Aucune covariance BAO trouvée dans les HDU; fournissez-la si nécessaire.


## 4) Cosmic Chronometers — Moresco et al. (compilation publique)

In [5]:
\
RAW_CC = "https://raw.githubusercontent.com/danmoresco/CC_data/master/CC_Hz.dat"
out_cc = "/content/cc_Hz.csv"

txt = requests.get(RAW_CC, timeout=90).text
lines = [ln for ln in txt.splitlines() if ln.strip() and not ln.strip().startswith(("#","%","//"))]

# header heuristique
header=None
for ln in lines[:15]:
    if all(k in ln for k in ["z","H"]):
        header = re.split(r"\s+", ln.strip())
        break

from io import StringIO
if header is None:
    df = pd.read_csv(StringIO("\n".join(lines)), sep=r"\s+", names=["z","H","H_err"], engine="python")
else:
    df = pd.read_csv(StringIO("\n".join(lines[1:])), sep=r"\s+", names=header, engine="python")
if "H_err" not in df.columns:
    for cand in ["sigma_H","errH","eH"]:
        if cand in df.columns: df["H_err"] = df[cand]
if "H_err" not in df.columns:
    df["H_err"] = np.nan

save_csv(df[["z","H","H_err"]].astype(float), out_cc)


ValueError: could not convert string to float: '404:'

## 5) RSD — fσ₈ (compilation publique “Gold 2022”)

In [6]:
\
RAW_RSD = "https://raw.githubusercontent.com/evazayerzadeh/f_sigma8_data/master/data/f_sigma8.csv"
out_rsd = "/content/rsd_fs8.csv"

df = pd.read_csv(RAW_RSD)
# mapping flexible vers z, fs8, fs8_err
def pick_col(df, candidates):
    low = [c.lower() for c in df.columns]
    for name in candidates:
        if name.lower() in low:
            return df.columns[low.index(name.lower())]
    return None

cz = pick_col(df, ["z","redshift","z_eff","zeff"])
cf = pick_col(df, ["fs8","f_sigma8","f*sigma8","fσ8"])
ce = pick_col(df, ["fs8_err","sigma_fs8","err","error","sigma"])

if cz is None or cf is None:
    raise RuntimeError("Colonnes z/fs8 introuvables dans la compilation RSD.")
if ce is None:
    df["fs8_err"] = np.nan
else:
    df["fs8_err"] = df[ce]

out = df.rename(columns={cz:"z", cf:"fs8"})[["z","fs8","fs8_err"]].astype(float)
save_csv(out, out_rsd)


HTTPError: HTTP Error 404: Not Found

In [7]:
# === ELAS: Packager de rapport & figures (à lancer tel quel dans chaque notebook) ===
import os, json, glob, shutil, datetime
from pathlib import Path

# ---- 1) Paramètres à personnaliser légèrement ----
RUN_TAG = "notebook1"  # <-- change en "notebook2" dans le 2e notebook

ROOT = "/content/ELAS"
OUT  = f"{ROOT}/output"
TAB  = f"{OUT}/tables"
FIG  = f"{OUT}/figures"
PACK = f"{OUT}/report_bundle_{RUN_TAG}"

os.makedirs(TAB, exist_ok=True)
os.makedirs(FIG, exist_ok=True)
os.makedirs(PACK, exist_ok=True)

# ---- 2) Utilitaires ----
def load_json(p):
    try:
         with open(p,'r') as f: return json.load(f)
    except Exception: return None

def copy_if_exists(patterns, dst):
    os.makedirs(dst, exist_ok=True)
    copied = []
    for pat in patterns:
        for p in glob.glob(pat):
            try:
                tgt = os.path.join(dst, os.path.basename(p))
                shutil.copy2(p, tgt)
                copied.append(tgt)
            except Exception as e:
                print(f"[WARN] copie échouée: {p} -> {e}")
    return copied

def fmt(x, nd=3):
    try:
        return f"{float(x):.{nd}f}"
    except Exception:
        return "—"

# ---- 3) Récupère les JSONs "connus" si présents (sinon ignore) ----
files = {
    "phase_lock_12p8":        f"{TAB}/phase_locking_12p8_summary.json",
    "bootstrap_local_12p8":   f"{TAB}/bootstrap_ppc_local_12p8.json",
    "focus_fit_12p8":         f"{TAB}/focus_fit_Omega12p8_summary.json",
    "focus_fit_12p8_delta01": f"{TAB}/focus_fit_Omega12p8_delta01_summary.json",
    "loo_bao":                f"{TAB}/loo_bao_summary.json",
    "nulltest_fast":          f"{TAB}/nulltest_fast_summary.json",
    "matched_filter":         f"{TAB}/matched_filter_summary.json",
    "phase_lock_all":         f"{TAB}/phase_locking_summary.json",
    "bootstrap_ppc_v1":       f"{TAB}/bootstrap_ppc_summary.json",
    "focus_fit_Omega6":       f"{TAB}/focus_fit_Omega6_summary.json",
    "ppc_global_S5000":       f"{TAB}/ppc_global_S5000.json",          # si tu l'as sauvé
    "ppc_global_fast_ckpt":   f"{TAB}/ppc_global_fast_checkpoint.json" # sinon, résumé partiel
}
data = {k: load_json(p) for k,p in files.items()}

# ---- 4) Compose le Markdown de synthèse (robuste aux absences) ----
ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
md = []
md.append(f"# ELAS — Rapport de validation ({RUN_TAG})\n_Généré le {ts}_\n")

# Pic & cohérence de phase locale
pl = data["phase_lock_12p8"] or {}
boot = data["bootstrap_local_12p8"] or {}
md.append("## 1) Pic & cohérence de phase (fenêtre ~12.3–13.3)")
md.append(f"- **Phase-locking** : N={pl.get('N','—')}, R={fmt(pl.get('R'))}, Z={fmt(pl.get('Z'))}, p≈{pl.get('p_value_Rayleigh','—')}, Δφ_mean={fmt(pl.get('mean_phase_diff'))} rad.")
md.append(f"- **Bootstrap local** : Ω*≈{fmt(boot.get('Omega_star_local_obs'))}, Z_comb={fmt(boot.get('Z_comb_local_obs'))}, "
          f"stabilité={fmt(boot.get('bootstrap_stability_[12.3,13.3]'))}, pPPC_local={boot.get('PPC_local_p_value','—')}.\n")

# Fits focalisés
def summarize_focus(obj):
    if not obj: return None
    out = {}
    for row in obj:
        out[row.get("which","?")] = {
            "Δχ²": row.get("Delta_chi2"),
            "AICc_Λ": row.get("AICc_LCDM"),
            "AICc_E": row.get("AICc_ELAS"),
            "BIC_Λ": row.get("BIC_LCDM"),
            "BIC_E": row.get("BIC_ELAS"),
            "BF": row.get("BayesFactor_alt_over_null"),
            "N": row.get("N_points"),
            "Om": row.get("Om"), "H0": row.get("H0"),
            "δ": row.get("delta"), "φ": row.get("phi")
        }
    return out

fit_free = summarize_focus(data["focus_fit_12p8"])
fit_bnd  = summarize_focus(data["focus_fit_12p8_delta01"])

md.append("## 2) Fits focalisés @ Ω≈12.8")
if fit_free and "both" in fit_free:
    f = fit_free["both"]
    md.append(f"- **SN+BAO (libre)** : Δχ²={fmt(f['Δχ²'])}, AICc(Λ)={fmt(f['AICc_Λ'])} vs AICc(E)={fmt(f['AICc_E'])}, "
              f"BIC(Λ)={fmt(f['BIC_Λ'])} vs BIC(E)={fmt(f['BIC_E'])}, BF≈{fmt(f['BF'])}.  "
              f"Paramètres: Ωm={fmt(f['Om'])}, H0={fmt(f['H0'])}, δ={fmt(f['δ'])}, φ={fmt(f['φ'])}.")
if fit_bnd and "both" in fit_bnd:
    f = fit_bnd["both"]
    md.append(f"- **SN+BAO (|δ|≤0.1, prior σδ=0.05)** : Δχ²={fmt(f['Δχ²'])}, AICc(Λ)={fmt(f['AICc_Λ'])} vs AICc(E)={fmt(f['AICc_E'])}, "
              f"BIC(Λ)={fmt(f['BIC_Λ'])} vs BIC(E)={fmt(f['BIC_E'])}, BF≈{fmt(f['BF'])}.\n")

# Sensibilité LOO BAO
loo = data["loo_bao"]
md.append("## 3) Sensibilité (leave-one-out BAO)")
if loo:
    W = [row["Omega_at_max"] for row in loo]
    Z = [row["Z_comb_max"] for row in loo]
    import numpy as np
    md.append(f"- N={len(loo)}, ⟨Ω*⟩={fmt(np.mean(W))}±{fmt(np.std(W))}, ⟨Z_max⟩={fmt(np.mean(Z))}±{fmt(np.std(Z))}.")
    md.append(f"- Exemples: {json.dumps(loo[:3])}\n")
else:
    md.append("- (aucun fichier LOO trouvé)\n")

# Null tests
nt = data["nulltest_fast"] or {}
md.append("## 4) Null tests")
if nt:
    md.append(f"- Δχ²_obs={fmt(nt.get('dchi2_obs'))}, Δχ²_nul={fmt(nt.get('dchi2_null_mean'))}±{fmt(nt.get('dchi2_null_std'))}, "
              f"p≈{fmt(nt.get('p_value'),5)} (N={nt.get('N','—')}).\n")
else:
    md.append("- (aucun fichier null test trouvé)\n")

# PPC global
pg = data["ppc_global_S5000"] or data["ppc_global_fast_ckpt"] or {}
md.append("## 5) PPC globale (look-elsewhere)")
if pg:
    md.append(f"- p_global≈{pg.get('p_global') or pg.get('p_partial','—')} ; Z_obs_global={pg.get('Z_obs','—')} ; Ω_at_obs={pg.get('Omega_at_obs') or pg.get('W_obs','—')}.\n")
else:
    md.append("- (non disponible — voir cellule PPC S=5000)\n")

md.append("## 6) Conclusion courte")
md.append(
  "- Indice **local** d’oscillation à Ω_log≈12.9 (phase-locking élevé, bootstrap stable). "
  "SN seuls ~ neutres ; gain porté par BAO. Sensibilité LOO → **fragile avec peu de points**, à reporter honnêtement. "
  "Recommandation : réplication (DESI, Pantheon+)."
)

# ---- 5) Écritures rapport & JSON ----
MD_PATH = os.path.join(PACK, f"ELAS_validation_report_{RUN_TAG}.md")
JS_PATH = os.path.join(PACK, f"ELAS_validation_report_{RUN_TAG}.json")
with open(MD_PATH, "w") as f: f.write("\n".join(md))
with open(JS_PATH, "w") as f: json.dump({k: v for k,v in data.items() if v is not None}, f, indent=2)

# ---- 6) Copie les figures utiles (si présentes) ----
fig_patterns = [
    f"{FIG}/matched_filter_Z*.png",
    f"{FIG}/phase_locking_rose*.png",
    f"{FIG}/bootstrap_omegastar*.png",
    f"{FIG}/ppc_Zcomb_hist*.png",
    f"{FIG}/sn_hubble*.png",
    f"{FIG}/bao_residuals*.png",
    f"{FIG}/nulltest_fast_hist*.png",
    f"{FIG}/elas_upper_limits*.png",
    f"{FIG}/*.png",  # attrape-tout pour ne rien louper
]
COPIED = copy_if_exists(fig_patterns, os.path.join(PACK, "figures"))

# ---- 7) Manifest et ZIP ----
MANIFEST = os.path.join(PACK, "MANIFEST.txt")
with open(MANIFEST, "w") as f:
    f.write(f"ELAS bundle ({RUN_TAG}) généré le {ts}\n")
    f.write("Contenu:\n")
    for p in [MD_PATH, JS_PATH]:
        f.write(f" - {p}\n")
    f.write(f" - Figures ({len(COPIED)}):\n")
    for p in sorted(COPIED):
        f.write(f"   * {p}\n")

ZIP_PATH = shutil.make_archive(f"{PACK}", "zip", PACK)

print("==== BUNDLE GÉNÉRÉ ====")
print("Markdown :", MD_PATH)
print("Résumé    :", JS_PATH)
print("Zip       :", ZIP_PATH)


==== BUNDLE GÉNÉRÉ ====
Markdown : /content/ELAS/output/report_bundle_notebook1/ELAS_validation_report_notebook1.md
Résumé    : /content/ELAS/output/report_bundle_notebook1/ELAS_validation_report_notebook1.json
Zip       : /content/ELAS/output/report_bundle_notebook1.zip
